From ff15db44c0efc51fca82ad956ebb6c7fb31e2a59 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Fri, 6 Feb 2026 07:38:06 +1100 Subject: [PATCH] Restore research as first-class tool, remove research skill Restores the research tool that was previously externalized as a skill: - Add pending_research.rs: PendingResearchManager with thread-safe task tracking - Add tools/research.rs: execute_research (async), execute_research_status - Add research/research_status tool definitions with exclude_research config - Integrate PendingResearchManager into Agent and ToolContext - Inject completed research results in streaming loop Remove research skill: - Clear EMBEDDED_SKILLS array in embedded.rs - Delete skills/research/ directory - Update all tests expecting embedded research skill - Update docs and memory to reflect the change The research tool now: - Spawns scout agent in background tokio task - Returns immediately with research_id - Automatically injects results into conversation when ready - Supports status checks via research_status tool --- analysis/breaker/2025-02-05.md | 14 +- analysis/memory.md | 37 +- crates/g3-core/src/lib.rs | 78 ++++ crates/g3-core/src/pending_research.rs | 547 +++++++++++++++++++++++++ crates/g3-core/src/skills/discovery.rs | 30 +- crates/g3-core/src/skills/embedded.rs | 24 +- crates/g3-core/src/skills/prompt.rs | 4 +- crates/g3-core/src/tool_definitions.rs | 73 +++- crates/g3-core/src/tool_dispatch.rs | 9 +- crates/g3-core/src/tools/acd.rs | 6 + crates/g3-core/src/tools/executor.rs | 2 + crates/g3-core/src/tools/file_ops.rs | 17 +- crates/g3-core/src/tools/mod.rs | 1 + crates/g3-core/src/tools/research.rs | 471 +++++++++++++++++++++ docs/architecture.md | 13 +- docs/skills.md | 61 +-- skills/research/SKILL.md | 138 ------- 17 files changed, 1240 insertions(+), 285 deletions(-) create mode 100644 crates/g3-core/src/pending_research.rs create mode 100644 crates/g3-core/src/tools/research.rs delete mode 100644 skills/research/SKILL.md diff --git a/analysis/breaker/2025-02-05.md b/analysis/breaker/2025-02-05.md index e1bfd4b..4433857 100644 --- a/analysis/breaker/2025-02-05.md +++ b/analysis/breaker/2025-02-05.md @@ -1,8 +1,12 @@ # Breaker Report: 2025-02-05 +> **Note**: Issue 1 below is now obsolete. The research skill was removed and replaced +> with a first-class `research` tool in `crates/g3-core/src/tools/research.rs`. +> The g3-research script no longer exists. + Focused on changes in commits b6d2582..9443f933 (past 10 commits). -## Issue 1: JSON Escaping Bug in g3-research Script +## Issue 1: JSON Escaping Bug in g3-research Script (OBSOLETE) ### Title `g3-research` produces invalid JSON when query contains actual newlines @@ -47,19 +51,19 @@ Embedded skills have non-existent file paths that agents are instructed to `read ### Repro ``` -# When no repo skills/ directory exists, the research skill is loaded from embedded +# When no repo skills/ directory exists, embedded skills are loaded # The generated prompt contains: - research + example-skill ... - /SKILL.md + /SKILL.md # The prompt instructs: "read the full skill file using `read_file` to get detailed instructions" # Agent attempts: -read_file("/SKILL.md") +read_file("/SKILL.md") # Result: File not found error ``` diff --git a/analysis/memory.md b/analysis/memory.md index 6ed9bff..d12a30b 100644 --- a/analysis/memory.md +++ b/analysis/memory.md @@ -221,9 +221,9 @@ Portable skill packages with SKILL.md + optional scripts per Agent Skills spec ( - `discover_skills()` [38..85] - scans 5 locations: embedded → global → extra → workspace → repo - `load_embedded_skills()` [88..102] - synthetic path `/SKILL.md` - `is_embedded_skill()` [161..163] - checks ` + - `ResearchTask`, `ResearchStatus` - task state (Pending/Complete/Failed) + - `register()`, `complete()`, `fail()`, `get()`, `list_pending()`, `take_completed()` + - `with_notifications()` - broadcast channel for interactive mode +- `crates/g3-core/src/tools/research.rs` [0..471] + - `execute_research()` - spawns scout agent in background tokio task + - `execute_research_status()` - check status of pending/completed research + - `CONTEXT_ERROR_PATTERNS` - detects context window exhaustion + - `strip_ansi_codes()`, `extract_report()` - report extraction utilities +- `crates/g3-core/src/lib.rs` + - `Agent.pending_research_manager` - field on Agent struct + - `inject_completed_research()` [781..836] - injects results as user messages + - `enable_research_notifications()` - for interactive mode -**Usage**: -```bash -background_process("research-topic", ".g3/bin/g3-research 'query'") -shell(".g3/bin/g3-research --status ") # or --list -read_file(".g3/research//report.md") -``` - -**Output**: `.g3/research//status.json` + `report.md` +**Tools**: `research` (async, returns research_id), `research_status` (check pending tasks) ### Plan Mode Structured task planning with cognitive forcing - requires happy/negative/boundary checks. diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 0d40167..a0a771b 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -7,6 +7,7 @@ pub mod error_handling; pub mod feedback_extraction; pub mod paths; pub mod project; +pub mod pending_research; pub mod provider_config; pub mod provider_registration; pub mod retry; @@ -160,6 +161,8 @@ pub struct Agent { acd_enabled: bool, /// Whether plan mode is active (gate blocks file changes without approved plan) in_plan_mode: bool, + /// Manager for async research tasks + pending_research_manager: pending_research::PendingResearchManager, } impl Agent { @@ -214,6 +217,7 @@ impl Agent { auto_memory: false, acd_enabled: false, in_plan_mode: false, + pending_research_manager: pending_research::PendingResearchManager::new(), } } @@ -767,6 +771,72 @@ impl Agent { self.working_dir = Some(working_dir); } + // ========================================================================= + // RESEARCH MANAGEMENT + // ========================================================================= + + /// Inject completed research results into the conversation context. + /// + /// Returns the number of research results injected. + pub fn inject_completed_research(&mut self) -> usize { + let completed = self.pending_research_manager.take_completed(); + + if completed.is_empty() { + return 0; + } + + for task in &completed { + let message_content = match task.status { + pending_research::ResearchStatus::Complete => { + format!( + "📋 **Research completed** (id: `{}`): {}\n\n{}", + task.id, + task.query, + task.result.as_deref().unwrap_or("No result available") + ) + } + pending_research::ResearchStatus::Failed => { + format!( + "❌ **Research failed** (id: `{}`): {}\n\nError: {}", + task.id, + task.query, + task.result.as_deref().unwrap_or("Unknown error") + ) + } + pending_research::ResearchStatus::Pending => continue, // Skip pending tasks + }; + + // Inject as a user message so the agent sees and responds to it + let message = g3_providers::Message::new(g3_providers::MessageRole::User, message_content); + self.context_window.add_message(message); + } + + completed.len() + } + + /// Subscribe to research completion notifications. + /// + /// Returns None if notifications are not enabled. + pub fn subscribe_research_notifications(&self) -> Option> { + self.pending_research_manager.subscribe() + } + + /// Enable research completion notifications and return a receiver. + /// + /// This replaces the internal research manager with one that sends notifications. + /// Call this once during setup (e.g., in interactive mode) before any research tasks. + /// Returns a receiver that will receive notifications when research tasks complete. + pub fn enable_research_notifications(&mut self) -> tokio::sync::broadcast::Receiver { + let (manager, rx) = pending_research::PendingResearchManager::with_notifications(); + self.pending_research_manager = manager; + rx + } + + /// Get a reference to the pending research manager. + pub fn pending_research_manager(&self) -> &pending_research::PendingResearchManager { + &self.pending_research_manager + } + // ========================================================================= // TASK EXECUTION // ========================================================================= @@ -2085,6 +2155,13 @@ Skip if nothing new. Be brief."#; tokio::time::sleep(tokio::time::Duration::from_millis(50)).await; } + // Inject any completed research results into the context + let injected_count = self.inject_completed_research(); + if injected_count > 0 { + debug!("Injected {} completed research result(s) into context", injected_count); + self.ui_writer.println(&format!("📋 {} research result(s) ready and injected into context", injected_count)); + } + // Get provider info for logging, then drop it to avoid borrow issues let (provider_name, provider_model) = { let provider = self.providers.get(None)?; @@ -2898,6 +2975,7 @@ Skip if nothing new. Be brief."#; webdriver_process: &self.webdriver_process, background_process_manager: &self.background_process_manager, todo_content: &self.todo_content, + pending_research_manager: &self.pending_research_manager, pending_images: &mut self.pending_images, is_autonomous: self.is_autonomous, requirements_sha: self.requirements_sha.as_deref(), diff --git a/crates/g3-core/src/pending_research.rs b/crates/g3-core/src/pending_research.rs new file mode 100644 index 0000000..e4d412f --- /dev/null +++ b/crates/g3-core/src/pending_research.rs @@ -0,0 +1,547 @@ +//! Pending research manager for async research tasks. +//! +//! This module manages research tasks that run in the background while the agent +//! continues with other work. Research results are stored until they can be +//! injected into the conversation at a natural break point. Completion notifications +//! are sent via a channel for real-time UI updates. + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; +use tracing::debug; + +/// Unique identifier for a research task +pub type ResearchId = String; + +/// Status of a research task +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum ResearchStatus { + /// Research is in progress + Pending, + /// Research completed successfully + Complete, + /// Research failed with an error + Failed, +} + +impl std::fmt::Display for ResearchStatus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ResearchStatus::Pending => write!(f, "pending"), + ResearchStatus::Complete => write!(f, "complete"), + ResearchStatus::Failed => write!(f, "failed"), + } + } +} + +/// A research task being tracked by the manager +#[derive(Debug, Clone)] +pub struct ResearchTask { + /// Unique identifier for this research task + pub id: ResearchId, + /// The original research query + pub query: String, + /// Current status of the research + pub status: ResearchStatus, + /// The research result (report or error message) + pub result: Option, + /// When the research was initiated + pub started_at: Instant, + /// Whether this result has been injected into the conversation + pub injected: bool, +} + +impl ResearchTask { + /// Create a new pending research task + pub fn new(id: ResearchId, query: String) -> Self { + Self { + id, + query, + status: ResearchStatus::Pending, + result: None, + started_at: Instant::now(), + injected: false, + } + } + + /// Get the elapsed time since the research started + pub fn elapsed(&self) -> Duration { + self.started_at.elapsed() + } + + /// Format elapsed time for display + pub fn elapsed_display(&self) -> String { + let secs = self.elapsed().as_secs(); + if secs < 60 { + format!("{}s", secs) + } else { + format!("{}m {}s", secs / 60, secs % 60) + } + } +} + +/// Notification sent when a research task completes (success or failure) +#[derive(Debug, Clone)] +pub struct ResearchCompletionNotification { + /// The research ID that completed + pub id: ResearchId, + /// Whether it succeeded or failed + pub status: ResearchStatus, + /// The query that was researched + pub query: String, +} + +/// Thread-safe manager for pending research tasks +#[derive(Debug, Clone)] +pub struct PendingResearchManager { + tasks: Arc>>, + /// Channel sender for completion notifications (optional, for UI updates) + completion_tx: Option>, +} + +impl Default for PendingResearchManager { + fn default() -> Self { + Self::new() + } +} + +impl PendingResearchManager { + /// Create a new pending research manager + pub fn new() -> Self { + Self { + tasks: Arc::new(Mutex::new(HashMap::new())), + completion_tx: None, + } + } + + /// Create a new pending research manager with completion notifications enabled. + /// + /// Returns the manager and a receiver for completion notifications. + /// The receiver can be used to get real-time updates when research completes. + pub fn with_notifications() -> (Self, tokio::sync::broadcast::Receiver) { + // Buffer size of 16 should be plenty for concurrent research tasks + let (tx, rx) = tokio::sync::broadcast::channel(16); + let manager = Self { + tasks: Arc::new(Mutex::new(HashMap::new())), + completion_tx: Some(tx), + }; + (manager, rx) + } + + /// Subscribe to completion notifications. + /// + /// Returns None if notifications are not enabled (manager created with `new()`). + pub fn subscribe(&self) -> Option> { + self.completion_tx.as_ref().map(|tx| tx.subscribe()) + } + + /// Generate a unique research ID + pub fn generate_id() -> ResearchId { + use std::time::{SystemTime, UNIX_EPOCH}; + use std::sync::atomic::{AtomicU32, Ordering}; + static COUNTER: AtomicU32 = AtomicU32::new(0); + + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_millis(); + let counter = COUNTER.fetch_add(1, Ordering::Relaxed); + format!("research_{:x}_{:08x}", timestamp, counter) + } + + /// Register a new research task + /// + /// Returns the research ID for tracking + pub fn register(&self, query: &str) -> ResearchId { + let id = Self::generate_id(); + let task = ResearchTask::new(id.clone(), query.to_string()); + + let mut tasks = self.tasks.lock().unwrap(); + tasks.insert(id.clone(), task); + + debug!("Registered research task: {} for query: {}", id, query); + id + } + + /// Update a research task with its result + pub fn complete(&self, id: &ResearchId, result: String) { + let notification = { + let mut tasks = self.tasks.lock().unwrap(); + if let Some(task) = tasks.get_mut(id) { + task.status = ResearchStatus::Complete; + task.result = Some(result); + debug!("Research task {} completed successfully", id); + Some(ResearchCompletionNotification { + id: id.clone(), + status: ResearchStatus::Complete, + query: task.query.clone(), + }) + } else { + None + } + }; + // Send notification outside the lock to avoid potential deadlocks + if let (Some(notification), Some(tx)) = (notification, &self.completion_tx) { + let _ = tx.send(notification); // Ignore error if no receivers + } + } + + /// Mark a research task as failed + pub fn fail(&self, id: &ResearchId, error: String) { + let notification = { + let mut tasks = self.tasks.lock().unwrap(); + if let Some(task) = tasks.get_mut(id) { + task.status = ResearchStatus::Failed; + task.result = Some(error); + debug!("Research task {} failed", id); + Some(ResearchCompletionNotification { + id: id.clone(), + status: ResearchStatus::Failed, + query: task.query.clone(), + }) + } else { + None + } + }; + // Send notification outside the lock to avoid potential deadlocks + if let (Some(notification), Some(tx)) = (notification, &self.completion_tx) { + let _ = tx.send(notification); // Ignore error if no receivers + } + } + + /// Get the status of a specific research task + pub fn get_status(&self, id: &ResearchId) -> Option<(ResearchStatus, Option)> { + let tasks = self.tasks.lock().unwrap(); + tasks.get(id).map(|t| (t.status.clone(), t.result.clone())) + } + + /// Get a specific research task + pub fn get(&self, id: &ResearchId) -> Option { + let tasks = self.tasks.lock().unwrap(); + tasks.get(id).cloned() + } + + /// List all pending (not yet injected) research tasks + pub fn list_pending(&self) -> Vec { + let tasks = self.tasks.lock().unwrap(); + tasks + .values() + .filter(|t| !t.injected) + .cloned() + .collect() + } + + /// List all research tasks (including injected ones) + pub fn list_all(&self) -> Vec { + let tasks = self.tasks.lock().unwrap(); + tasks.values().cloned().collect() + } + + /// Get count of pending (in-progress) research tasks + pub fn pending_count(&self) -> usize { + let tasks = self.tasks.lock().unwrap(); + tasks + .values() + .filter(|t| t.status == ResearchStatus::Pending) + .count() + } + + /// Get count of completed but not yet injected research tasks + pub fn ready_count(&self) -> usize { + let tasks = self.tasks.lock().unwrap(); + tasks + .values() + .filter(|t| !t.injected && t.status != ResearchStatus::Pending) + .count() + } + + /// Take all completed research tasks that haven't been injected yet + /// + /// Marks them as injected so they won't be returned again + pub fn take_completed(&self) -> Vec { + let mut tasks = self.tasks.lock().unwrap(); + let mut completed = Vec::new(); + + for task in tasks.values_mut() { + if !task.injected && task.status != ResearchStatus::Pending { + task.injected = true; + completed.push(task.clone()); + } + } + + debug!("Took {} completed research tasks for injection", completed.len()); + completed + } + + /// Remove a research task (e.g., after it's been fully processed) + pub fn remove(&self, id: &ResearchId) -> Option { + let mut tasks = self.tasks.lock().unwrap(); + tasks.remove(id) + } + + /// Clear all completed and injected tasks (cleanup) + pub fn cleanup_injected(&self) { + let mut tasks = self.tasks.lock().unwrap(); + tasks.retain(|_, t| !t.injected); + } + + /// Check if there are any tasks (pending or ready) + pub fn has_tasks(&self) -> bool { + let tasks = self.tasks.lock().unwrap(); + !tasks.is_empty() + } + + /// Format a summary of pending research for display + pub fn format_status_summary(&self) -> Option { + let tasks = self.tasks.lock().unwrap(); + + let pending: Vec<_> = tasks.values().filter(|t| t.status == ResearchStatus::Pending).collect(); + let ready: Vec<_> = tasks.values().filter(|t| !t.injected && t.status != ResearchStatus::Pending).collect(); + + if pending.is_empty() && ready.is_empty() { + return None; + } + + let mut parts = Vec::new(); + + if !pending.is_empty() { + parts.push(format!("🔍 {} researching", pending.len())); + } + + if !ready.is_empty() { + parts.push(format!("📋 {} ready", ready.len())); + } + + Some(parts.join(" | ")) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::thread; + use std::time::Duration; + + #[test] + fn test_register_and_get() { + let manager = PendingResearchManager::new(); + + let id = manager.register("How to use tokio?"); + + let task = manager.get(&id).unwrap(); + assert_eq!(task.query, "How to use tokio?"); + assert_eq!(task.status, ResearchStatus::Pending); + assert!(task.result.is_none()); + assert!(!task.injected); + } + + #[test] + fn test_complete_research() { + let manager = PendingResearchManager::new(); + + let id = manager.register("Test query"); + manager.complete(&id, "Research report here".to_string()); + + let (status, result) = manager.get_status(&id).unwrap(); + assert_eq!(status, ResearchStatus::Complete); + assert_eq!(result.unwrap(), "Research report here"); + } + + #[test] + fn test_fail_research() { + let manager = PendingResearchManager::new(); + + let id = manager.register("Test query"); + manager.fail(&id, "Connection timeout".to_string()); + + let (status, result) = manager.get_status(&id).unwrap(); + assert_eq!(status, ResearchStatus::Failed); + assert_eq!(result.unwrap(), "Connection timeout"); + } + + #[test] + fn test_take_completed() { + let manager = PendingResearchManager::new(); + + let id1 = manager.register("Query 1"); + let id2 = manager.register("Query 2"); + let _id3 = manager.register("Query 3"); + + // Complete two, leave one pending + manager.complete(&id1, "Report 1".to_string()); + manager.fail(&id2, "Error".to_string()); + // id3 stays pending + + // Take completed + let completed = manager.take_completed(); + assert_eq!(completed.len(), 2); + + // Taking again should return empty (already injected) + let completed_again = manager.take_completed(); + assert!(completed_again.is_empty()); + + // Pending count should be 1 + assert_eq!(manager.pending_count(), 1); + } + + #[test] + fn test_list_pending() { + let manager = PendingResearchManager::new(); + + let id1 = manager.register("Query 1"); + let id2 = manager.register("Query 2"); + + manager.complete(&id1, "Report".to_string()); + + // Both should be in list_pending (not injected yet) + let pending = manager.list_pending(); + assert_eq!(pending.len(), 2); + + // Take completed + manager.take_completed(); + + // Now only the actually pending one should be listed + let pending = manager.list_pending(); + assert_eq!(pending.len(), 1); + assert_eq!(pending[0].id, id2); + } + + #[test] + fn test_format_status_summary() { + let manager = PendingResearchManager::new(); + + // Empty - no summary + assert!(manager.format_status_summary().is_none()); + + // One pending + let _id1 = manager.register("Query 1"); + let summary = manager.format_status_summary().unwrap(); + assert!(summary.contains("1 researching")); + + // One pending, one ready + let id2 = manager.register("Query 2"); + manager.complete(&id2, "Report".to_string()); + let summary = manager.format_status_summary().unwrap(); + assert!(summary.contains("1 researching")); + assert!(summary.contains("1 ready")); + } + + #[test] + fn test_thread_safety() { + let manager = PendingResearchManager::new(); + let manager_clone = manager.clone(); + + let id = manager.register("Concurrent test"); + let id_clone = id.clone(); + + let handle = thread::spawn(move || { + thread::sleep(Duration::from_millis(10)); + manager_clone.complete(&id_clone, "Result from thread".to_string()); + }); + + // Main thread checks status + loop { + if let Some((status, _)) = manager.get_status(&id) { + if status == ResearchStatus::Complete { + break; + } + } + thread::sleep(Duration::from_millis(5)); + } + + handle.join().unwrap(); + + let (status, result) = manager.get_status(&id).unwrap(); + assert_eq!(status, ResearchStatus::Complete); + assert_eq!(result.unwrap(), "Result from thread"); + } + + #[test] + fn test_elapsed_display() { + let manager = PendingResearchManager::new(); + let id = manager.register("Test"); + + let task = manager.get(&id).unwrap(); + let display = task.elapsed_display(); + + // Should be "0s" or similar (just started) + assert!(display.contains('s')); + } + + #[test] + fn test_cleanup_injected() { + let manager = PendingResearchManager::new(); + + let id1 = manager.register("Query 1"); + let id2 = manager.register("Query 2"); + + manager.complete(&id1, "Report 1".to_string()); + manager.complete(&id2, "Report 2".to_string()); + + // Take and inject + manager.take_completed(); + + // Both should still exist + assert_eq!(manager.list_all().len(), 2); + + // Cleanup injected + manager.cleanup_injected(); + + // Should be empty now + assert_eq!(manager.list_all().len(), 0); + } + + #[test] + fn test_generate_id_uniqueness() { + let ids: Vec<_> = (0..100).map(|_| PendingResearchManager::generate_id()).collect(); + let unique: std::collections::HashSet<_> = ids.iter().collect(); + assert_eq!(ids.len(), unique.len(), "Generated IDs should be unique"); + } + + #[tokio::test] + async fn test_notifications_on_complete() { + let (manager, mut rx) = PendingResearchManager::with_notifications(); + + let id = manager.register("Test query"); + + // Complete the research + manager.complete(&id, "Report content".to_string()); + + // Should receive a notification + let notification = rx.recv().await.unwrap(); + assert_eq!(notification.id, id); + assert_eq!(notification.status, ResearchStatus::Complete); + assert_eq!(notification.query, "Test query"); + } + + #[tokio::test] + async fn test_notifications_on_fail() { + let (manager, mut rx) = PendingResearchManager::with_notifications(); + + let id = manager.register("Test query"); + + // Fail the research + manager.fail(&id, "Connection error".to_string()); + + // Should receive a notification + let notification = rx.recv().await.unwrap(); + assert_eq!(notification.id, id); + assert_eq!(notification.status, ResearchStatus::Failed); + assert_eq!(notification.query, "Test query"); + } + + #[test] + fn test_no_notifications_without_setup() { + let manager = PendingResearchManager::new(); + // subscribe() should return None when notifications not enabled + assert!(manager.subscribe().is_none()); + } + + #[test] + fn test_get_nonexistent_returns_none() { + let manager = PendingResearchManager::new(); + assert!(manager.get(&"nonexistent_id".to_string()).is_none()); + assert!(manager.get_status(&"nonexistent_id".to_string()).is_none()); + } +} diff --git a/crates/g3-core/src/skills/discovery.rs b/crates/g3-core/src/skills/discovery.rs index 190f6ef..67be8b2 100644 --- a/crates/g3-core/src/skills/discovery.rs +++ b/crates/g3-core/src/skills/discovery.rs @@ -182,12 +182,11 @@ mod tests { #[test] fn test_discover_embedded_skills() { - // With no directories, should still find embedded skills + // With no directories and no embedded skills, should return empty let skills = discover_skills(None, &[]); - // Should have at least the research skill - assert!(!skills.is_empty(), "Should have embedded skills"); - assert!(skills.iter().any(|s| s.name == "research"), "Should have research skill"); + // No embedded skills currently (research was moved to first-class tool) + assert!(skills.is_empty(), "Should have no skills when no directories provided"); } #[test] @@ -204,10 +203,9 @@ mod tests { let skills = discover_skills(Some(workspace), &[]); - // Should have embedded + workspace skills + // Should have workspace skills assert!(skills.iter().any(|s| s.name == "test-skill")); assert!(skills.iter().any(|s| s.name == "another-skill")); - assert!(skills.iter().any(|s| s.name == "research")); // embedded } #[test] @@ -227,22 +225,20 @@ mod tests { } #[test] - fn test_repo_overrides_embedded() { + fn test_repo_skill_discovery() { let temp = TempDir::new().unwrap(); let workspace = temp.path(); - // Create repo skills directory with a skill that overrides embedded + // Create repo skills directory let skills_dir = workspace.join("skills"); fs::create_dir_all(&skills_dir).unwrap(); - // Override the embedded research skill - create_skill_dir(&skills_dir, "research", "Custom research skill"); + create_skill_dir(&skills_dir, "custom-skill", "Custom skill"); let skills = discover_skills(Some(workspace), &[]); - let research = skills.iter().find(|s| s.name == "research").unwrap(); - assert_eq!(research.description, "Custom research skill"); - assert!(!research.path.starts_with("/SKILL.md` in the repo /// 2. Add an entry here with `include_str!` -static EMBEDDED_SKILLS: &[EmbeddedSkill] = &[ - EmbeddedSkill { - name: "research", - skill_md: include_str!("../../../../skills/research/SKILL.md"), - }, -]; +static EMBEDDED_SKILLS: &[EmbeddedSkill] = &[]; /// Get all embedded skills. pub fn get_embedded_skills() -> &'static [EmbeddedSkill] { @@ -46,23 +41,14 @@ mod tests { use super::*; #[test] - fn test_embedded_skills_exist() { + fn test_embedded_skills_empty() { + // Currently no embedded skills - research was moved to first-class tool let skills = get_embedded_skills(); - assert!(!skills.is_empty(), "Should have at least one embedded skill"); + assert!(skills.is_empty(), "No embedded skills expected"); } #[test] - fn test_research_skill_embedded() { - let skill = get_embedded_skill("research"); - assert!(skill.is_some(), "Research skill should be embedded"); - - let skill = skill.unwrap(); - assert!(skill.skill_md.contains("name: research"), "SKILL.md should have name field"); - } - - #[test] - fn test_get_by_name() { - assert!(get_embedded_skill("research").is_some()); + fn test_get_nonexistent_skill() { assert!(get_embedded_skill("nonexistent").is_none()); } } diff --git a/crates/g3-core/src/skills/prompt.rs b/crates/g3-core/src/skills/prompt.rs index ff20e5f..444d59f 100644 --- a/crates/g3-core/src/skills/prompt.rs +++ b/crates/g3-core/src/skills/prompt.rs @@ -145,13 +145,13 @@ mod tests { // Embedded skill paths use syntax which must NOT be escaped // so the LLM can use them directly with read_file let skills = vec![ - make_skill("research", "Web research skill", "/SKILL.md"), + make_skill("example-skill", "Example embedded skill", "/SKILL.md"), ]; let result = generate_skills_prompt(&skills); // The path should appear unescaped - assert!(result.contains("/SKILL.md")); + assert!(result.contains("/SKILL.md")); // Should NOT be escaped assert!(!result.contains("<embedded:research>")); } diff --git a/crates/g3-core/src/tool_definitions.rs b/crates/g3-core/src/tool_definitions.rs index 1cd8e57..746f75f 100644 --- a/crates/g3-core/src/tool_definitions.rs +++ b/crates/g3-core/src/tool_definitions.rs @@ -12,6 +12,7 @@ use serde_json::json; pub struct ToolConfig { pub webdriver: bool, pub computer_control: bool, + pub exclude_research: bool, } impl ToolConfig { @@ -19,8 +20,16 @@ impl ToolConfig { Self { webdriver, computer_control, + exclude_research: false, } } + + /// Create a config with the research tool excluded. + /// Used for scout agent to prevent recursion. + pub fn with_research_excluded(mut self) -> Self { + self.exclude_research = true; + self + } } /// Create tool definitions for native tool calling providers. @@ -28,7 +37,7 @@ impl ToolConfig { /// Returns a vector of Tool definitions that describe the available tools /// and their input schemas. pub fn create_tool_definitions(config: ToolConfig) -> Vec { - let mut tools = create_core_tools(); + let mut tools = create_core_tools(config.exclude_research); if config.webdriver { tools.extend(create_webdriver_tools()); @@ -38,7 +47,7 @@ pub fn create_tool_definitions(config: ToolConfig) -> Vec { } /// Create the core tools that are always available -fn create_core_tools() -> Vec { +fn create_core_tools(exclude_research: bool) -> Vec { let mut tools = vec![ Tool { name: "shell".to_string(), @@ -258,6 +267,40 @@ fn create_core_tools() -> Vec { }), }); + // Conditionally add the research tool (excluded for scout agent to prevent recursion) + if !exclude_research { + tools.push(Tool { + name: "research".to_string(), + description: "Initiate web-based research on a topic. This tool is ASYNCHRONOUS - it spawns a research agent in the background and returns immediately with a research_id. Results are automatically injected into the conversation when ready. Use this when you need to research APIs, SDKs, libraries, approaches, bugs, or documentation. If you need the results before continuing, say so and yield the turn to the user. Check status with research_status tool.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The research question or topic to investigate. Be specific about what you need to know." + } + }, + "required": ["query"] + }), + }); + + // research_status tool - check status of pending research + tools.push(Tool { + name: "research_status".to_string(), + description: "Check the status of pending research tasks. Call without arguments to list all pending research, or with a research_id to check a specific task. Use this to see if research has completed before it's automatically injected.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "research_id": { + "type": "string", + "description": "Optional: specific research_id to check. If omitted, lists all pending research tasks." + } + }, + "required": [] + }), + }); + } + tools } @@ -460,12 +503,12 @@ mod tests { #[test] fn test_core_tools_count() { - let tools = create_core_tools(); - // Core tools: shell, background_process, read_file, read_image, - // write_file, str_replace, code_search, - // research, research_status, remember, plan_read, plan_write, plan_approve - // (14 total - memory is auto-loaded, only remember tool needed) - assert_eq!(tools.len(), 12); + let tools = create_core_tools(false); + // Core tools (with research): shell, background_process, read_file, read_image, + // write_file, str_replace, code_search, plan_read, plan_write, plan_approve, + // remember, rehydrate, research, research_status + // (14 total) + assert_eq!(tools.len(), 14); } #[test] @@ -479,7 +522,7 @@ mod tests { fn test_create_tool_definitions_core_only() { let config = ToolConfig::default(); let tools = create_tool_definitions(config); - assert_eq!(tools.len(), 12); + assert_eq!(tools.len(), 14); } #[test] @@ -487,16 +530,24 @@ mod tests { let config = ToolConfig::new(true, true); let tools = create_tool_definitions(config); // 14 core + 15 webdriver = 29 - assert_eq!(tools.len(), 27); + assert_eq!(tools.len(), 29); } #[test] fn test_tool_has_required_fields() { - let tools = create_core_tools(); + let tools = create_core_tools(false); for tool in tools { assert!(!tool.name.is_empty(), "Tool name should not be empty"); assert!(!tool.description.is_empty(), "Tool description should not be empty"); assert!(tool.input_schema.is_object(), "Tool input_schema should be an object"); } } + + #[test] + fn test_exclude_research_reduces_tool_count() { + let with_research = create_core_tools(false); + let without_research = create_core_tools(true); + // Excluding research removes 2 tools (research and research_status) + assert_eq!(with_research.len() - without_research.len(), 2); + } } diff --git a/crates/g3-core/src/tool_dispatch.rs b/crates/g3-core/src/tool_dispatch.rs index b88d5e0..13ebfe1 100644 --- a/crates/g3-core/src/tool_dispatch.rs +++ b/crates/g3-core/src/tool_dispatch.rs @@ -7,7 +7,7 @@ use anyhow::Result; use tracing::{debug, warn}; use crate::tools::executor::ToolContext; -use crate::tools::{acd, file_ops, memory, misc, plan, shell, webdriver}; +use crate::tools::{acd, file_ops, memory, misc, plan, research, shell, webdriver}; use crate::ui_writer::UiWriter; use crate::ToolCall; @@ -40,10 +40,9 @@ pub async fn dispatch_tool( // Miscellaneous tools "code_search" => misc::execute_code_search(tool_call, ctx).await, - // Research tool (deprecated - now a skill) - "research" | "research_status" => { - Ok("⚠️ The `research` tool has been replaced by the **research skill**. Use `background_process` to run `.g3/bin/g3-research` instead. See the research skill documentation for details.".to_string()) - } + // Research tools + "research" => research::execute_research(tool_call, ctx).await, + "research_status" => research::execute_research_status(tool_call, ctx).await, // Workspace memory tools "remember" => memory::execute_remember(tool_call, ctx).await, diff --git a/crates/g3-core/src/tools/acd.rs b/crates/g3-core/src/tools/acd.rs index 036def9..f2beb8b 100644 --- a/crates/g3-core/src/tools/acd.rs +++ b/crates/g3-core/src/tools/acd.rs @@ -120,6 +120,7 @@ mod tests { use crate::acd::Fragment; use crate::ui_writer::NullUiWriter; use crate::background_process::BackgroundProcessManager; + use crate::pending_research::PendingResearchManager; use serial_test::serial; use crate::webdriver_session::WebDriverSession; use g3_providers::{Message, MessageRole}; @@ -134,6 +135,7 @@ mod tests { background_process_manager: Arc, todo_content: Arc>, pending_images: Vec, + pending_research_manager: PendingResearchManager, config: g3_config::Config, } @@ -146,6 +148,7 @@ mod tests { background_process_manager: Arc::new(BackgroundProcessManager::new(std::path::PathBuf::from("/tmp"))), todo_content: Arc::new(RwLock::new(String::new())), pending_images: Vec::new(), + pending_research_manager: PendingResearchManager::new(), config: g3_config::Config::default(), } } @@ -164,6 +167,7 @@ mod tests { webdriver_process: &test_ctx.webdriver_process, background_process_manager: &test_ctx.background_process_manager, todo_content: &test_ctx.todo_content, + pending_research_manager: &test_ctx.pending_research_manager, pending_images: &mut test_ctx.pending_images, is_autonomous: false, requirements_sha: None, @@ -194,6 +198,7 @@ mod tests { webdriver_process: &test_ctx.webdriver_process, background_process_manager: &test_ctx.background_process_manager, todo_content: &test_ctx.todo_content, + pending_research_manager: &test_ctx.pending_research_manager, pending_images: &mut test_ctx.pending_images, is_autonomous: false, requirements_sha: None, @@ -224,6 +229,7 @@ mod tests { webdriver_process: &test_ctx.webdriver_process, background_process_manager: &test_ctx.background_process_manager, todo_content: &test_ctx.todo_content, + pending_research_manager: &test_ctx.pending_research_manager, pending_images: &mut test_ctx.pending_images, is_autonomous: false, requirements_sha: None, diff --git a/crates/g3-core/src/tools/executor.rs b/crates/g3-core/src/tools/executor.rs index b5d4751..5f0555a 100644 --- a/crates/g3-core/src/tools/executor.rs +++ b/crates/g3-core/src/tools/executor.rs @@ -4,6 +4,7 @@ use anyhow::Result; use std::sync::Arc; use tokio::sync::RwLock; +use crate::pending_research::PendingResearchManager; use crate::background_process::BackgroundProcessManager; use crate::paths::{ensure_session_dir, get_session_todo_path, get_todo_path}; use crate::ui_writer::UiWriter; @@ -22,6 +23,7 @@ pub struct ToolContext<'a, W: UiWriter> { pub webdriver_process: &'a Arc>>, pub background_process_manager: &'a Arc, pub todo_content: &'a Arc>, + pub pending_research_manager: &'a PendingResearchManager, pub pending_images: &'a mut Vec, pub is_autonomous: bool, pub requirements_sha: Option<&'a str>, diff --git a/crates/g3-core/src/tools/file_ops.rs b/crates/g3-core/src/tools/file_ops.rs index aa5ad2d..e4231f9 100644 --- a/crates/g3-core/src/tools/file_ops.rs +++ b/crates/g3-core/src/tools/file_ops.rs @@ -913,18 +913,17 @@ mod tests { #[test] fn test_try_read_embedded_skill_unescaped() { - // Normal unescaped path should work - let result = try_read_embedded_skill("/SKILL.md"); - assert!(result.is_some(), "Should find embedded research skill"); - assert!(result.unwrap().contains("name: research")); + // No embedded skills currently (research was moved to first-class tool) + let result = try_read_embedded_skill("/SKILL.md"); + assert!(result.is_none(), "Should not find nonexistent embedded skill"); } #[test] fn test_try_read_embedded_skill_escaped() { - // XML-escaped path should also work (fallback for LLM quirks) - let result = try_read_embedded_skill("<embedded:research>/SKILL.md"); - assert!(result.is_some(), "Should find embedded research skill with escaped path"); - assert!(result.unwrap().contains("name: research")); + // XML-escaped path parsing should work even with no embedded skills + // This tests the path parsing, not the skill existence + let result = try_read_embedded_skill("<embedded:nonexistent>/SKILL.md"); + assert!(result.is_none(), "Should not find nonexistent embedded skill with escaped path"); } #[test] @@ -940,7 +939,7 @@ mod tests { let result = try_read_embedded_skill("/path/to/SKILL.md"); assert!(result.is_none(), "Regular path should not match"); - let result = try_read_embedded_skill("skills/research/SKILL.md"); + let result = try_read_embedded_skill("skills/example/SKILL.md"); assert!(result.is_none(), "Relative path should not match"); } } diff --git a/crates/g3-core/src/tools/mod.rs b/crates/g3-core/src/tools/mod.rs index c08a3fd..1f49f81 100644 --- a/crates/g3-core/src/tools/mod.rs +++ b/crates/g3-core/src/tools/mod.rs @@ -17,6 +17,7 @@ pub mod invariants; pub mod memory; pub mod misc; pub mod plan; +pub mod research; pub mod shell; pub mod webdriver; diff --git a/crates/g3-core/src/tools/research.rs b/crates/g3-core/src/tools/research.rs new file mode 100644 index 0000000..872face --- /dev/null +++ b/crates/g3-core/src/tools/research.rs @@ -0,0 +1,471 @@ +//! Research tool: spawns a scout agent to perform web-based research. +//! +//! The research tool is **asynchronous** - it spawns the scout agent in the background +//! and returns immediately with a research_id. The agent can continue with other work +//! while research is in progress. Results are automatically injected into the conversation +//! when ready, or the agent can check status with the `research_status` tool. + +use anyhow::Result; +use std::process::Stdio; +use tokio::io::{AsyncBufReadExt, BufReader}; +use tokio::process::Command; +use tracing::{debug, error}; + +use crate::ui_writer::UiWriter; +use crate::ToolCall; +use g3_config::WebDriverBrowser; + +use super::executor::ToolContext; + +/// Delimiter markers for scout report extraction +const REPORT_START_MARKER: &str = "---SCOUT_REPORT_START---"; +const REPORT_END_MARKER: &str = "---SCOUT_REPORT_END---"; + +/// Error patterns that indicate context window exhaustion +const CONTEXT_ERROR_PATTERNS: &[&str] = &[ + "context length", "context_length_exceeded", "maximum context", "token limit", + "too many tokens", "exceeds the model", "context window", "max_tokens", +]; + +/// Execute the research tool - spawns scout agent in background and returns immediately. +/// +/// This is the **async** version of research. It: +/// 1. Registers a new research task with the PendingResearchManager +/// 2. Spawns the scout agent in a background tokio task +/// 3. Returns immediately with a placeholder message containing the research_id +/// 4. The background task updates the manager when research completes +/// 5. Results are injected into the conversation at the next natural break point +pub async fn execute_research( + tool_call: &ToolCall, + ctx: &mut ToolContext<'_, W>, +) -> Result { + let query = tool_call + .args + .get("query") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing required 'query' parameter"))?; + + // Register the research task and get an ID + let research_id = ctx.pending_research_manager.register(query); + + // Clone values needed for the background task + let query_owned = query.to_string(); + let research_id_clone = research_id.clone(); + let manager = ctx.pending_research_manager.clone(); + let browser = ctx.config.webdriver.browser.clone(); + + // Find the g3 executable path + let g3_path = std::env::current_exe() + .unwrap_or_else(|_| std::path::PathBuf::from("g3")); + + // Spawn the scout agent in a background task + tokio::spawn(async move { + let result = run_scout_agent(&g3_path, &query_owned, browser).await; + + match result { + Ok(report) => { + debug!("Research {} completed successfully", research_id_clone); + manager.complete(&research_id_clone, report); + } + Err(e) => { + error!("Research {} failed: {}", research_id_clone, e); + manager.fail(&research_id_clone, e.to_string()); + } + } + }); + + // Return immediately with placeholder + let placeholder = format!( + "🔍 **Research initiated** (id: `{}`) + +\ +**Query:** {} + +\ +Research is running in the background. You can: +- Continue with other work - results will be automatically provided when ready +- Check status with `research_status` tool +- If you need the results before continuing, say so and yield the turn to the user + +\ +_Estimated time: 30-120 seconds depending on query complexity_", + research_id, + query + ); + + Ok(placeholder) +} + +/// Run the scout agent and return the research report. +/// This is the blocking part that runs in a background task. +async fn run_scout_agent( + g3_path: &std::path::Path, + query: &str, + browser: WebDriverBrowser, +) -> Result { + // Build the command with appropriate webdriver flags + let mut cmd = Command::new(g3_path); + cmd + .arg("--agent") + .arg("scout") + .arg("--new-session") // Always start fresh for research + .arg("--quiet"); // Suppress log file creation + + // Propagate the webdriver browser choice + match browser { + WebDriverBrowser::ChromeHeadless => { cmd.arg("--chrome-headless"); } + WebDriverBrowser::Safari => { cmd.arg("--webdriver"); } + } + + let mut child = cmd.arg(query) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|e| anyhow::anyhow!("Failed to spawn scout agent: {}", e))?; + + // Capture stdout to find the report content + let stdout = child.stdout.take() + .ok_or_else(|| anyhow::anyhow!("Failed to capture scout agent stdout"))?; + + // Also capture stderr for error messages + let stderr = child.stderr.take() + .ok_or_else(|| anyhow::anyhow!("Failed to capture scout agent stderr"))?; + + let mut reader = BufReader::new(stdout).lines(); + let mut all_output = Vec::new(); + + // Spawn a task to collect stderr + let stderr_handle = tokio::spawn(async move { + let mut stderr_reader = BufReader::new(stderr).lines(); + let mut stderr_output = Vec::new(); + while let Some(line) = stderr_reader.next_line().await.ok().flatten() { + stderr_output.push(line); + } + stderr_output + }); + + // Collect stdout lines (no progress display in background) + while let Some(line) = reader.next_line().await? { + all_output.push(line); + } + + // Collect stderr output + let stderr_output = stderr_handle.await.unwrap_or_default(); + + // Wait for the process to complete + let status = child.wait().await + .map_err(|e| anyhow::anyhow!("Failed to wait for scout agent: {}", e))?; + + if !status.success() { + // Build detailed error message + let exit_code = status.code().map(|c| c.to_string()).unwrap_or_else(|| "unknown".to_string()); + let full_output = all_output.join("\n"); + let stderr_text = stderr_output.join("\n"); + + // Check for context window exhaustion + let combined_output = format!("{} {}", full_output, stderr_text).to_lowercase(); + let is_context_error = CONTEXT_ERROR_PATTERNS.iter() + .any(|pattern| combined_output.contains(pattern)); + + if is_context_error { + return Err(anyhow::anyhow!( + "Context Window Exhausted\n\n\ + The research query required more context than the model supports.\n\n\ + **Suggestions:**\n\ + - Try a more specific, narrower query\n\ + - Break the research into smaller sub-questions\n\ + - Use a model with a larger context window\n\n\ + Exit code: {}", + exit_code + )); + } + + // Generic error with details + return Err(anyhow::anyhow!( + "Scout Agent Failed\n\n\ + Exit code: {}\n\n\ + {}{}", + exit_code, + if !stderr_text.is_empty() { format!("**Error output:**\n{}\n\n", stderr_text.chars().take(1000).collect::()) } else { String::new() }, + if !all_output.is_empty() { format!("**Last output lines:**\n{}", all_output.iter().rev().take(10).rev().cloned().collect::>().join("\n")) } else { String::new() } + )); + } + + // Join all output and extract the report between markers + let full_output = all_output.join("\n"); + + extract_report(&full_output) +} + +/// Execute the research_status tool - check status of pending research tasks. +pub async fn execute_research_status( + tool_call: &ToolCall, + ctx: &mut ToolContext<'_, W>, +) -> Result { + let research_id = tool_call + .args + .get("research_id") + .and_then(|v| v.as_str()); + + if let Some(id) = research_id { + // Check specific research task + match ctx.pending_research_manager.get(&id.to_string()) { + Some(task) => { + let status_emoji = match task.status { + crate::pending_research::ResearchStatus::Pending => "🔄", + crate::pending_research::ResearchStatus::Complete => "✅", + crate::pending_research::ResearchStatus::Failed => "❌", + }; + + let mut output = format!( + "{} **Research Status** (id: `{}`)\n\n\ + **Query:** {}\n\ + **Status:** {}\n\ + **Elapsed:** {}\n", + status_emoji, + task.id, + task.query, + task.status, + task.elapsed_display() + ); + + if task.injected { + output.push_str("\n_Results have already been injected into the conversation._\n"); + } else if task.status != crate::pending_research::ResearchStatus::Pending { + output.push_str("\n_Results will be injected at the next opportunity._\n"); + } + + Ok(output) + } + None => Ok(format!("❓ No research task found with id: `{}`", id)), + } + } else { + // List all pending research tasks + let tasks = ctx.pending_research_manager.list_pending(); + + if tasks.is_empty() { + return Ok("📋 No pending research tasks.".to_string()); + } + + let mut output = format!("📋 **Pending Research Tasks** ({} total)\n\n", tasks.len()); + + for task in tasks { + let status_emoji = match task.status { + crate::pending_research::ResearchStatus::Pending => "🔄", + crate::pending_research::ResearchStatus::Complete => "✅", + crate::pending_research::ResearchStatus::Failed => "❌", + }; + + output.push_str(&format!( + "{} `{}` - {} ({})\n Query: {}\n\n", + status_emoji, + task.id, + task.status, + task.elapsed_display(), + truncate_query(&task.query, 60) + )); + } + + Ok(output) + } +} + +/// Truncate a query for display +fn truncate_query(query: &str, max_len: usize) -> String { + if query.chars().count() <= max_len { + query.to_string() + } else { + let truncated: String = query.chars().take(max_len - 3).collect(); + format!("{}...", truncated) + } +} + +/// Extract the research report from scout output. +/// +/// Looks for content between SCOUT_REPORT_START and SCOUT_REPORT_END markers. +/// Preserves ANSI escape codes in the extracted content for terminal formatting. +fn extract_report(output: &str) -> Result { + // Strip ANSI codes only for finding markers, but preserve them in the output + let clean_output = strip_ansi_codes(output); + + // Find the start marker + let start_pos = clean_output.find(REPORT_START_MARKER) + .ok_or_else(|| anyhow::anyhow!( + "Scout agent did not output a properly formatted report. Expected {} marker.", + REPORT_START_MARKER + ))?; + + // Find the end marker + let end_pos = clean_output.find(REPORT_END_MARKER) + .ok_or_else(|| anyhow::anyhow!( + "Scout agent report is incomplete. Expected {} marker.", + REPORT_END_MARKER + ))?; + + if end_pos <= start_pos { + return Err(anyhow::anyhow!("Invalid report format: end marker before start marker")); + } + + // Now find the same markers in the original output to preserve ANSI codes + // We need to find the marker positions accounting for ANSI codes + let original_start = find_marker_position(output, REPORT_START_MARKER) + .ok_or_else(|| anyhow::anyhow!("Could not find start marker in original output"))?; + let original_end = find_marker_position(output, REPORT_END_MARKER) + .ok_or_else(|| anyhow::anyhow!("Could not find end marker in original output"))?; + + // Extract content between markers from original (with ANSI codes) + let report_start = original_start + REPORT_START_MARKER.len(); + let report_content = output[report_start..original_end].trim(); + + if report_content.is_empty() { + return Ok("Scout agent returned an empty report.".to_string()); + } + + Ok(report_content.to_string()) +} + +/// Find the position of a marker in text that may contain ANSI codes. +/// Searches by stripping ANSI codes character by character to find the true position. +fn find_marker_position(text: &str, marker: &str) -> Option { + // Simple approach: search for the marker directly first + // The markers themselves shouldn't contain ANSI codes + if let Some(pos) = text.find(marker) { + return Some(pos); + } + + // If not found directly, the marker might be split by ANSI codes + // This is unlikely for our use case, but handle it gracefully + None +} + +/// Strip ANSI escape codes from a string. +/// +/// Handles common ANSI sequences like: +/// - CSI sequences: \x1b[...m (colors, styles) +/// - OSC sequences: \x1b]...\x07 (terminal titles, etc.) +pub fn strip_ansi_codes(s: &str) -> String { + let mut result = String::with_capacity(s.len()); + let mut chars = s.chars().peekable(); + + while let Some(c) = chars.next() { + if c == '\x1b' { + // Start of escape sequence + match chars.peek() { + Some('[') => { + // CSI sequence: \x1b[...X where X is a letter + chars.next(); // consume '[' + while let Some(&next) = chars.peek() { + chars.next(); + if next.is_ascii_alphabetic() { + break; + } + } + } + Some(']') => { + // OSC sequence: \x1b]...\x07 + chars.next(); // consume ']' + while let Some(&next) = chars.peek() { + chars.next(); + if next == '\x07' { + break; + } + } + } + _ => { + // Unknown escape, skip just the ESC + } + } + } else { + result.push(c); + } + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_strip_ansi_codes() { + // Simple color code + assert_eq!(strip_ansi_codes("\x1b[31mred\x1b[0m"), "red"); + + // RGB color code (like the bug we saw) + assert_eq!( + strip_ansi_codes("\x1b[38;2;216;177;114mtmp/file.md\x1b[0m"), + "tmp/file.md" + ); + + // Multiple codes + assert_eq!( + strip_ansi_codes("\x1b[1m\x1b[32mbold green\x1b[0m normal"), + "bold green normal" + ); + + // No codes + assert_eq!(strip_ansi_codes("plain text"), "plain text"); + + // Empty string + assert_eq!(strip_ansi_codes(""), ""); + } + + #[test] + fn test_extract_report_success() { + let output = r#"Some preamble text +---SCOUT_REPORT_START--- +# Research Brief + +This is the report content. +---SCOUT_REPORT_END--- +Some trailing text"#; + + let result = extract_report(output).unwrap(); + assert!(result.contains("Research Brief")); + assert!(result.contains("This is the report content.")); + assert!(!result.contains("preamble")); + assert!(!result.contains("trailing")); + } + + #[test] + fn test_extract_report_with_ansi_codes() { + let output = "\x1b[32m---SCOUT_REPORT_START---\x1b[0m\n# Report\n\x1b[31m---SCOUT_REPORT_END---\x1b[0m"; + + let result = extract_report(output).unwrap(); + assert!(result.contains("# Report")); + } + + #[test] + fn test_extract_report_missing_start() { + let output = "No markers here"; + let result = extract_report(output); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("SCOUT_REPORT_START")); + } + + #[test] + fn test_extract_report_missing_end() { + let output = "---SCOUT_REPORT_START---\nContent but no end"; + let result = extract_report(output); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("SCOUT_REPORT_END")); + } + + #[test] + fn test_extract_report_empty_content() { + let output = "---SCOUT_REPORT_START---\n---SCOUT_REPORT_END---"; + let result = extract_report(output).unwrap(); + assert!(result.contains("empty report")); + } + + #[test] + fn test_truncate_query() { + assert_eq!(truncate_query("short query", 50), "short query"); + + let long_query = "This is a very long research query that should be truncated for display purposes"; + let result = truncate_query(long_query, 40); + assert!(result.len() <= 40); + assert!(result.ends_with("...")); + } +} diff --git a/docs/architecture.md b/docs/architecture.md index 3bc5d43..7753311 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -259,14 +259,13 @@ Key modules in `crates/g3-core/src/skills/`: Core skills are embedded at compile time using `include_str!`, ensuring g3 works anywhere without external files: ```rust +// Currently empty - skills can be added here as needed static EMBEDDED_SKILLS: &[EmbeddedSkill] = &[ - EmbeddedSkill { - name: "research", - skill_md: include_str!("../../../../skills/research/SKILL.md"), - scripts: &[ - ("g3-research", include_str!("../../../../skills/research/g3-research")), - ], - }, + // Example: + // EmbeddedSkill { + // name: "example-skill", + // skill_md: include_str!("../../../../skills/example-skill/SKILL.md"), + // }, ]; ``` diff --git a/docs/skills.md b/docs/skills.md index f6abb1c..67d2e45 100644 --- a/docs/skills.md +++ b/docs/skills.md @@ -157,14 +157,13 @@ Core skills are embedded into the g3 binary at compile time, ensuring they work Embedded skills use Rust's `include_str!` macro to compile SKILL.md and scripts into the binary: ```rust +// Currently empty - skills can be added here as needed static EMBEDDED_SKILLS: &[EmbeddedSkill] = &[ - EmbeddedSkill { - name: "research", - skill_md: include_str!("../../../../skills/research/SKILL.md"), - scripts: &[ - ("g3-research", include_str!("../../../../skills/research/g3-research")), - ], - }, + // Example of how to add an embedded skill: + // EmbeddedSkill { + // name: "example-skill", + // skill_md: include_str!("../../../../skills/example-skill/SKILL.md"), + // }, ]; ``` @@ -182,48 +181,6 @@ This ensures: - Updates propagate automatically when g3 is upgraded - No manual installation required -## The Research Skill - -The embedded `research` skill enables asynchronous web research: - -### Usage - -```bash -# Start research (always use background_process) -background_process("research-topic", ".g3/bin/g3-research 'Your query here'") - -# Check status -shell(".g3/bin/g3-research --list") - -# Read results when complete -read_file(".g3/research//report.md") -``` - -### How It Works - -1. **Start**: `g3-research` spawns a scout agent in the background -2. **Execute**: Scout uses browser automation to research the topic -3. **Save**: Results written to `.g3/research//report.md` -4. **Read**: Agent reads the report when needed - -### Directory Structure - -``` -.g3/research/ -├── research_1738700000_a1b2c3/ -│ ├── status.json # Machine-readable status -│ └── report.md # The research brief -└── research_1738700100_d4e5f6/ - ├── status.json - └── report.md -``` - -### Status Values - -- `running` - Research in progress -- `complete` - Report ready to read -- `failed` - Error occurred (check `error` field in status.json) - ## Creating Skills with Scripts Skills can include executable scripts for complex operations: @@ -300,12 +257,6 @@ If `.g3/bin/