feat: Externalize research tool as embedded skill

Replaces the built-in research/research_status tools with a portable
skill-based approach:

- Add embedded skills infrastructure (skills compiled into binary)
- Add repo-local skills/ directory support (highest priority)
- Create research skill with SKILL.md and g3-research shell script
- Script extraction to .g3/bin/ with version tracking
- Filesystem-based handoff via .g3/research/<id>/status.json
- Remove PendingResearchManager and all research tool code
- Update system prompt to reference skill instead of tool

Benefits:
- No special tool infrastructure needed (just shell + read_file)
- Context-efficient (reports stay on disk until needed)
- Crash-resilient (state persisted to filesystem)
- Portable (skill can be overridden per-workspace)

Breaking change: research tool calls now return a deprecation message
pointing to the research skill.
This commit is contained in:
Dhanji R. Prasanna
2026-02-05 13:23:26 +11:00
parent bf9e3dc878
commit 39e586982c
19 changed files with 949 additions and 1638 deletions

View File

@@ -6,7 +6,6 @@ pub mod context_window;
pub mod error_handling;
pub mod feedback_extraction;
pub mod paths;
pub mod pending_research;
pub mod project;
pub mod provider_config;
pub mod provider_registration;
@@ -39,9 +38,6 @@ pub use task_result::TaskResult;
// Re-export context window types
pub use context_window::{ContextWindow, ThinResult, ThinScope};
// Re-export pending research types for notification handling
pub use pending_research::{PendingResearchManager, ResearchCompletionNotification, ResearchStatus};
// Export agent prompt generation for CLI use
pub use prompts::{
get_agent_system_prompt, get_agent_system_prompt_with_skills,
@@ -164,8 +160,6 @@ pub struct Agent<W: UiWriter> {
acd_enabled: bool,
/// Whether plan mode is active (gate blocks file changes without approved plan)
in_plan_mode: bool,
/// Manager for async research tasks
pending_research_manager: pending_research::PendingResearchManager,
}
impl<W: UiWriter> Agent<W> {
@@ -220,7 +214,6 @@ impl<W: UiWriter> Agent<W> {
auto_memory: false,
acd_enabled: false,
in_plan_mode: false,
pending_research_manager: pending_research::PendingResearchManager::new(),
}
}
@@ -951,16 +944,11 @@ impl<W: UiWriter> Agent<W> {
let provider_name = provider.name().to_string();
let _has_native_tool_calling = provider.has_native_tool_calling();
let _supports_cache_control = provider.supports_cache_control();
// Check if we should exclude the research tool (scout agent to prevent recursion)
let exclude_research = self.agent_name.as_deref() == Some("scout");
let tools = if provider.has_native_tool_calling() {
let mut tool_config = tool_definitions::ToolConfig::new(
let tool_config = tool_definitions::ToolConfig::new(
self.config.webdriver.enabled,
self.config.computer_control.enabled,
);
if exclude_research {
tool_config = tool_config.with_research_excluded();
}
Some(tool_definitions::create_tool_definitions(tool_config))
} else {
None
@@ -1099,51 +1087,6 @@ impl<W: UiWriter> Agent<W> {
self.context_window.add_message(message);
}
/// Check for completed research tasks and inject them into the context.
///
/// This should be called at natural break points:
/// - End of each tool iteration (before next LLM call)
/// - Before prompting user in interactive mode
///
/// Returns the number of research results injected.
pub fn inject_completed_research(&mut self) -> usize {
let completed = self.pending_research_manager.take_completed();
if completed.is_empty() {
return 0;
}
for task in &completed {
let message_content = match task.status {
pending_research::ResearchStatus::Complete => {
format!(
"📋 **Research completed** (id: `{}`): {}\n\n{}",
task.id,
task.query,
task.result.as_deref().unwrap_or("No result available")
)
}
pending_research::ResearchStatus::Failed => {
format!(
"❌ **Research failed** (id: `{}`): {}\n\nError: {}",
task.id,
task.query,
task.result.as_deref().unwrap_or("Unknown error")
)
}
pending_research::ResearchStatus::Pending => continue, // Skip pending tasks
};
// Inject as a user message so the agent sees and responds to it
let message = Message::new(MessageRole::User, message_content);
self.context_window.add_message(message);
debug!("Injected research result for task {}", task.id);
}
completed.len()
}
/// Execute a tool call and return the result.
/// This is a public wrapper around execute_tool for use by external callers
/// like the planner's fast-discovery feature.
@@ -1504,30 +1447,6 @@ impl<W: UiWriter> Agent<W> {
&self.config
}
pub fn get_pending_research_manager(&self) -> &pending_research::PendingResearchManager {
&self.pending_research_manager
}
/// Subscribe to research completion notifications.
///
/// Returns a receiver that will receive notifications when research tasks complete.
/// Returns None if the agent was not configured with notifications enabled.
/// Use this in interactive mode to get real-time updates when research finishes.
pub fn subscribe_research_notifications(&self) -> Option<tokio::sync::broadcast::Receiver<pending_research::ResearchCompletionNotification>> {
self.pending_research_manager.subscribe()
}
/// Enable research completion notifications and return a receiver.
///
/// This replaces the internal research manager with one that sends notifications.
/// Call this once during setup (e.g., in interactive mode) before any research tasks.
/// Returns a receiver that will receive notifications when research tasks complete.
pub fn enable_research_notifications(&mut self) -> tokio::sync::broadcast::Receiver<pending_research::ResearchCompletionNotification> {
let (manager, rx) = pending_research::PendingResearchManager::with_notifications();
self.pending_research_manager = manager;
rx
}
pub fn set_requirements_sha(&mut self, sha: String) {
self.requirements_sha = Some(sha);
}
@@ -2150,14 +2069,6 @@ Skip if nothing new. Be brief."#;
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
}
// Check for completed research and inject into context
// This happens at the start of each iteration, before the LLM call
let injected_count = self.inject_completed_research();
if injected_count > 0 {
debug!("Injected {} completed research result(s) into context", injected_count);
self.ui_writer.println(&format!("📋 {} research result(s) ready and injected into context", injected_count));
}
// Get provider info for logging, then drop it to avoid borrow issues
let (provider_name, provider_model) = {
let provider = self.providers.get(None)?;
@@ -2564,14 +2475,10 @@ Skip if nothing new. Be brief."#;
// Ensure tools are included for native providers in subsequent iterations
let provider_for_tools = self.providers.get(None)?;
if provider_for_tools.has_native_tool_calling() {
let mut tool_config = tool_definitions::ToolConfig::new(
let tool_config = tool_definitions::ToolConfig::new(
self.config.webdriver.enabled,
self.config.computer_control.enabled,
);
// Exclude research tool for scout agent to prevent recursion
if self.agent_name.as_deref() == Some("scout") {
tool_config = tool_config.with_research_excluded();
}
request.tools =
Some(tool_definitions::create_tool_definitions(tool_config));
}
@@ -2980,7 +2887,6 @@ Skip if nothing new. Be brief."#;
requirements_sha: self.requirements_sha.as_deref(),
context_total_tokens: self.context_window.total_tokens,
context_used_tokens: self.context_window.used_tokens,
pending_research_manager: &self.pending_research_manager,
};
// Dispatch to the appropriate tool handler

View File

@@ -1,540 +0,0 @@
//! Pending research manager for async research tasks.
//!
//! This module manages research tasks that run in the background while the agent
//! continues with other work. Research results are stored until they can be
//! injected into the conversation at a natural break point. Completion notifications
//! are sent via a channel for real-time UI updates.
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
use tracing::debug;
/// Unique identifier for a research task
pub type ResearchId = String;
/// Status of a research task
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum ResearchStatus {
/// Research is in progress
Pending,
/// Research completed successfully
Complete,
/// Research failed with an error
Failed,
}
impl std::fmt::Display for ResearchStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ResearchStatus::Pending => write!(f, "pending"),
ResearchStatus::Complete => write!(f, "complete"),
ResearchStatus::Failed => write!(f, "failed"),
}
}
}
/// A research task being tracked by the manager
#[derive(Debug, Clone)]
pub struct ResearchTask {
/// Unique identifier for this research task
pub id: ResearchId,
/// The original research query
pub query: String,
/// Current status of the research
pub status: ResearchStatus,
/// The research result (report or error message)
pub result: Option<String>,
/// When the research was initiated
pub started_at: Instant,
/// Whether this result has been injected into the conversation
pub injected: bool,
}
impl ResearchTask {
/// Create a new pending research task
pub fn new(id: ResearchId, query: String) -> Self {
Self {
id,
query,
status: ResearchStatus::Pending,
result: None,
started_at: Instant::now(),
injected: false,
}
}
/// Get the elapsed time since the research started
pub fn elapsed(&self) -> Duration {
self.started_at.elapsed()
}
/// Format elapsed time for display
pub fn elapsed_display(&self) -> String {
let secs = self.elapsed().as_secs();
if secs < 60 {
format!("{}s", secs)
} else {
format!("{}m {}s", secs / 60, secs % 60)
}
}
}
/// Notification sent when a research task completes (success or failure)
#[derive(Debug, Clone)]
pub struct ResearchCompletionNotification {
/// The research ID that completed
pub id: ResearchId,
/// Whether it succeeded or failed
pub status: ResearchStatus,
/// The query that was researched
pub query: String,
}
/// Thread-safe manager for pending research tasks
#[derive(Debug, Clone)]
pub struct PendingResearchManager {
tasks: Arc<Mutex<HashMap<ResearchId, ResearchTask>>>,
/// Channel sender for completion notifications (optional, for UI updates)
completion_tx: Option<tokio::sync::broadcast::Sender<ResearchCompletionNotification>>,
}
impl Default for PendingResearchManager {
fn default() -> Self {
Self::new()
}
}
impl PendingResearchManager {
/// Create a new pending research manager
pub fn new() -> Self {
Self {
tasks: Arc::new(Mutex::new(HashMap::new())),
completion_tx: None,
}
}
/// Create a new pending research manager with completion notifications enabled.
///
/// Returns the manager and a receiver for completion notifications.
/// The receiver can be used to get real-time updates when research completes.
pub fn with_notifications() -> (Self, tokio::sync::broadcast::Receiver<ResearchCompletionNotification>) {
// Buffer size of 16 should be plenty for concurrent research tasks
let (tx, rx) = tokio::sync::broadcast::channel(16);
let manager = Self {
tasks: Arc::new(Mutex::new(HashMap::new())),
completion_tx: Some(tx),
};
(manager, rx)
}
/// Subscribe to completion notifications.
///
/// Returns None if notifications are not enabled (manager created with `new()`).
pub fn subscribe(&self) -> Option<tokio::sync::broadcast::Receiver<ResearchCompletionNotification>> {
self.completion_tx.as_ref().map(|tx| tx.subscribe())
}
/// Generate a unique research ID
pub fn generate_id() -> ResearchId {
use std::time::{SystemTime, UNIX_EPOCH};
use std::sync::atomic::{AtomicU32, Ordering};
static COUNTER: AtomicU32 = AtomicU32::new(0);
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_millis();
let counter = COUNTER.fetch_add(1, Ordering::Relaxed);
format!("research_{:x}_{:08x}", timestamp, counter)
}
/// Register a new research task
///
/// Returns the research ID for tracking
pub fn register(&self, query: &str) -> ResearchId {
let id = Self::generate_id();
let task = ResearchTask::new(id.clone(), query.to_string());
let mut tasks = self.tasks.lock().unwrap();
tasks.insert(id.clone(), task);
debug!("Registered research task: {} for query: {}", id, query);
id
}
/// Update a research task with its result
pub fn complete(&self, id: &ResearchId, result: String) {
let notification = {
let mut tasks = self.tasks.lock().unwrap();
if let Some(task) = tasks.get_mut(id) {
task.status = ResearchStatus::Complete;
task.result = Some(result);
debug!("Research task {} completed successfully", id);
Some(ResearchCompletionNotification {
id: id.clone(),
status: ResearchStatus::Complete,
query: task.query.clone(),
})
} else {
None
}
};
// Send notification outside the lock to avoid potential deadlocks
if let (Some(notification), Some(tx)) = (notification, &self.completion_tx) {
let _ = tx.send(notification); // Ignore error if no receivers
}
}
/// Mark a research task as failed
pub fn fail(&self, id: &ResearchId, error: String) {
let notification = {
let mut tasks = self.tasks.lock().unwrap();
if let Some(task) = tasks.get_mut(id) {
task.status = ResearchStatus::Failed;
task.result = Some(error);
debug!("Research task {} failed", id);
Some(ResearchCompletionNotification {
id: id.clone(),
status: ResearchStatus::Failed,
query: task.query.clone(),
})
} else {
None
}
};
// Send notification outside the lock to avoid potential deadlocks
if let (Some(notification), Some(tx)) = (notification, &self.completion_tx) {
let _ = tx.send(notification); // Ignore error if no receivers
}
}
/// Get the status of a specific research task
pub fn get_status(&self, id: &ResearchId) -> Option<(ResearchStatus, Option<String>)> {
let tasks = self.tasks.lock().unwrap();
tasks.get(id).map(|t| (t.status.clone(), t.result.clone()))
}
/// Get a specific research task
pub fn get(&self, id: &ResearchId) -> Option<ResearchTask> {
let tasks = self.tasks.lock().unwrap();
tasks.get(id).cloned()
}
/// List all pending (not yet injected) research tasks
pub fn list_pending(&self) -> Vec<ResearchTask> {
let tasks = self.tasks.lock().unwrap();
tasks
.values()
.filter(|t| !t.injected)
.cloned()
.collect()
}
/// List all research tasks (including injected ones)
pub fn list_all(&self) -> Vec<ResearchTask> {
let tasks = self.tasks.lock().unwrap();
tasks.values().cloned().collect()
}
/// Get count of pending (in-progress) research tasks
pub fn pending_count(&self) -> usize {
let tasks = self.tasks.lock().unwrap();
tasks
.values()
.filter(|t| t.status == ResearchStatus::Pending)
.count()
}
/// Get count of completed but not yet injected research tasks
pub fn ready_count(&self) -> usize {
let tasks = self.tasks.lock().unwrap();
tasks
.values()
.filter(|t| !t.injected && t.status != ResearchStatus::Pending)
.count()
}
/// Take all completed research tasks that haven't been injected yet
///
/// Marks them as injected so they won't be returned again
pub fn take_completed(&self) -> Vec<ResearchTask> {
let mut tasks = self.tasks.lock().unwrap();
let mut completed = Vec::new();
for task in tasks.values_mut() {
if !task.injected && task.status != ResearchStatus::Pending {
task.injected = true;
completed.push(task.clone());
}
}
debug!("Took {} completed research tasks for injection", completed.len());
completed
}
/// Remove a research task (e.g., after it's been fully processed)
pub fn remove(&self, id: &ResearchId) -> Option<ResearchTask> {
let mut tasks = self.tasks.lock().unwrap();
tasks.remove(id)
}
/// Clear all completed and injected tasks (cleanup)
pub fn cleanup_injected(&self) {
let mut tasks = self.tasks.lock().unwrap();
tasks.retain(|_, t| !t.injected);
}
/// Check if there are any tasks (pending or ready)
pub fn has_tasks(&self) -> bool {
let tasks = self.tasks.lock().unwrap();
!tasks.is_empty()
}
/// Format a summary of pending research for display
pub fn format_status_summary(&self) -> Option<String> {
let tasks = self.tasks.lock().unwrap();
let pending: Vec<_> = tasks.values().filter(|t| t.status == ResearchStatus::Pending).collect();
let ready: Vec<_> = tasks.values().filter(|t| !t.injected && t.status != ResearchStatus::Pending).collect();
if pending.is_empty() && ready.is_empty() {
return None;
}
let mut parts = Vec::new();
if !pending.is_empty() {
parts.push(format!("🔍 {} researching", pending.len()));
}
if !ready.is_empty() {
parts.push(format!("📋 {} ready", ready.len()));
}
Some(parts.join(" | "))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread;
use std::time::Duration;
#[test]
fn test_register_and_get() {
let manager = PendingResearchManager::new();
let id = manager.register("How to use tokio?");
let task = manager.get(&id).unwrap();
assert_eq!(task.query, "How to use tokio?");
assert_eq!(task.status, ResearchStatus::Pending);
assert!(task.result.is_none());
assert!(!task.injected);
}
#[test]
fn test_complete_research() {
let manager = PendingResearchManager::new();
let id = manager.register("Test query");
manager.complete(&id, "Research report here".to_string());
let (status, result) = manager.get_status(&id).unwrap();
assert_eq!(status, ResearchStatus::Complete);
assert_eq!(result.unwrap(), "Research report here");
}
#[test]
fn test_fail_research() {
let manager = PendingResearchManager::new();
let id = manager.register("Test query");
manager.fail(&id, "Connection timeout".to_string());
let (status, result) = manager.get_status(&id).unwrap();
assert_eq!(status, ResearchStatus::Failed);
assert_eq!(result.unwrap(), "Connection timeout");
}
#[test]
fn test_take_completed() {
let manager = PendingResearchManager::new();
let id1 = manager.register("Query 1");
let id2 = manager.register("Query 2");
let _id3 = manager.register("Query 3");
// Complete two, leave one pending
manager.complete(&id1, "Report 1".to_string());
manager.fail(&id2, "Error".to_string());
// id3 stays pending
// Take completed
let completed = manager.take_completed();
assert_eq!(completed.len(), 2);
// Taking again should return empty (already injected)
let completed_again = manager.take_completed();
assert!(completed_again.is_empty());
// Pending count should be 1
assert_eq!(manager.pending_count(), 1);
}
#[test]
fn test_list_pending() {
let manager = PendingResearchManager::new();
let id1 = manager.register("Query 1");
let id2 = manager.register("Query 2");
manager.complete(&id1, "Report".to_string());
// Both should be in list_pending (not injected yet)
let pending = manager.list_pending();
assert_eq!(pending.len(), 2);
// Take completed
manager.take_completed();
// Now only the actually pending one should be listed
let pending = manager.list_pending();
assert_eq!(pending.len(), 1);
assert_eq!(pending[0].id, id2);
}
#[test]
fn test_format_status_summary() {
let manager = PendingResearchManager::new();
// Empty - no summary
assert!(manager.format_status_summary().is_none());
// One pending
let _id1 = manager.register("Query 1");
let summary = manager.format_status_summary().unwrap();
assert!(summary.contains("1 researching"));
// One pending, one ready
let id2 = manager.register("Query 2");
manager.complete(&id2, "Report".to_string());
let summary = manager.format_status_summary().unwrap();
assert!(summary.contains("1 researching"));
assert!(summary.contains("1 ready"));
}
#[test]
fn test_thread_safety() {
let manager = PendingResearchManager::new();
let manager_clone = manager.clone();
let id = manager.register("Concurrent test");
let id_clone = id.clone();
let handle = thread::spawn(move || {
thread::sleep(Duration::from_millis(10));
manager_clone.complete(&id_clone, "Result from thread".to_string());
});
// Main thread checks status
loop {
if let Some((status, _)) = manager.get_status(&id) {
if status == ResearchStatus::Complete {
break;
}
}
thread::sleep(Duration::from_millis(5));
}
handle.join().unwrap();
let (status, result) = manager.get_status(&id).unwrap();
assert_eq!(status, ResearchStatus::Complete);
assert_eq!(result.unwrap(), "Result from thread");
}
#[test]
fn test_elapsed_display() {
let manager = PendingResearchManager::new();
let id = manager.register("Test");
let task = manager.get(&id).unwrap();
let display = task.elapsed_display();
// Should be "0s" or similar (just started)
assert!(display.contains('s'));
}
#[test]
fn test_cleanup_injected() {
let manager = PendingResearchManager::new();
let id1 = manager.register("Query 1");
let id2 = manager.register("Query 2");
manager.complete(&id1, "Report 1".to_string());
manager.complete(&id2, "Report 2".to_string());
// Take and inject
manager.take_completed();
// Both should still exist
assert_eq!(manager.list_all().len(), 2);
// Cleanup injected
manager.cleanup_injected();
// Should be empty now
assert_eq!(manager.list_all().len(), 0);
}
#[test]
fn test_generate_id_uniqueness() {
let ids: Vec<_> = (0..100).map(|_| PendingResearchManager::generate_id()).collect();
let unique: std::collections::HashSet<_> = ids.iter().collect();
assert_eq!(ids.len(), unique.len(), "Generated IDs should be unique");
}
#[tokio::test]
async fn test_notifications_on_complete() {
let (manager, mut rx) = PendingResearchManager::with_notifications();
let id = manager.register("Test query");
// Complete the research
manager.complete(&id, "Report content".to_string());
// Should receive a notification
let notification = rx.recv().await.unwrap();
assert_eq!(notification.id, id);
assert_eq!(notification.status, ResearchStatus::Complete);
assert_eq!(notification.query, "Test query");
}
#[tokio::test]
async fn test_notifications_on_fail() {
let (manager, mut rx) = PendingResearchManager::with_notifications();
let id = manager.register("Test query");
// Fail the research
manager.fail(&id, "Connection error".to_string());
// Should receive a notification
let notification = rx.recv().await.unwrap();
assert_eq!(notification.id, id);
assert_eq!(notification.status, ResearchStatus::Failed);
assert_eq!(notification.query, "Test query");
}
#[test]
fn test_no_notifications_without_setup() {
let manager = PendingResearchManager::new();
// subscribe() should return None when notifications not enabled
assert!(manager.subscribe().is_none());
}
}

View File

@@ -73,11 +73,6 @@ Short description for providers without native calling specs:
- Multiple searches: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\"}, {\"name\": \"structs\", \"query\": \"(struct_item name: (type_identifier) @name)\", \"language\": \"rust\"}]}}
- With context lines: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\", \"context_lines\": 3}]}}
- **research**: Perform web-based research and return a structured report
- Format: {\"tool\": \"research\", \"args\": {\"query\": \"your research question\"}}
- Example: {\"tool\": \"research\", \"args\": {\"query\": \"Best Rust HTTP client libraries for async/await\"}}
- Use for researching APIs, SDKs, libraries, approaches, bugs, or any topic requiring web research
- **remember**: Save discovered code locations to workspace memory
- Format: {\"tool\": \"remember\", \"args\": {\"notes\": \"markdown notes\"}}
- Example: {\"tool\": \"remember\", \"args\": {\"notes\": \"### Feature Name\\n- `file.rs` [0..100] - `function_name()\"}}

View File

@@ -1,15 +1,17 @@
//! Skill discovery - scans directories for SKILL.md files.
//!
//! Discovers skills from:
//! - Global: ~/.g3/skills/
//! - Workspace: .g3/skills/
//!
//! Workspace skills override global skills with the same name.
//! Discovers skills from (highest to lowest priority):
//! 1. Repo: `skills/` at repo root (checked into git, overrides all)
//! 2. Workspace: `.g3/skills/` (local customizations)
//! 3. Extra paths from config
//! 4. Global: `~/.g3/skills/`
//! 5. Embedded: compiled into binary (always available)
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use tracing::{debug, warn};
use super::embedded::get_embedded_skills;
use super::parser::Skill;
/// Default global skills directory
@@ -18,28 +20,36 @@ const GLOBAL_SKILLS_DIR: &str = "~/.g3/skills";
/// Default workspace skills directory (relative to workspace root)
const WORKSPACE_SKILLS_DIR: &str = ".g3/skills";
/// Repo-local skills directory (relative to workspace root, checked into git)
const REPO_SKILLS_DIR: &str = "skills";
/// Discover all available skills from configured paths.
///
/// Skills are loaded from:
/// 1. Global directory (~/.g3/skills/)
/// 2. Workspace directory (.g3/skills/)
/// Skills are loaded in priority order (lowest to highest):
/// 1. Embedded skills (compiled into binary)
/// 2. Global directory (~/.g3/skills/)
/// 3. Extra paths from config
/// 4. Workspace directory (.g3/skills/)
/// 5. Repo directory (skills/) - highest priority
///
/// Workspace skills override global skills with the same name.
/// Additional paths can be provided via `extra_paths`.
/// Higher priority skills override lower priority skills with the same name.
pub fn discover_skills(
workspace_dir: Option<&Path>,
extra_paths: &[PathBuf],
) -> Vec<Skill> {
let mut skills_by_name: HashMap<String, Skill> = HashMap::new();
// 1. Load global skills first (lowest priority)
// 1. Load embedded skills first (lowest priority)
load_embedded_skills(&mut skills_by_name);
// 2. Load global skills
let global_dir = expand_tilde(GLOBAL_SKILLS_DIR);
if global_dir.exists() {
debug!("Scanning global skills directory: {}", global_dir.display());
load_skills_from_dir(&global_dir, &mut skills_by_name);
}
// 2. Load from extra paths (medium priority)
// 3. Load from extra paths
for path in extra_paths {
let expanded = if path.starts_with("~") {
expand_tilde(&path.to_string_lossy())
@@ -52,7 +62,7 @@ pub fn discover_skills(
}
}
// 3. Load workspace skills last (highest priority - overrides others)
// 4. Load workspace skills (.g3/skills/)
if let Some(workspace) = workspace_dir {
let workspace_skills = workspace.join(WORKSPACE_SKILLS_DIR);
if workspace_skills.exists() {
@@ -61,6 +71,15 @@ pub fn discover_skills(
}
}
// 5. Load repo skills (skills/) - highest priority
if let Some(workspace) = workspace_dir {
let repo_skills = workspace.join(REPO_SKILLS_DIR);
if repo_skills.exists() {
debug!("Scanning repo skills directory: {}", repo_skills.display());
load_skills_from_dir(&repo_skills, &mut skills_by_name);
}
}
// Convert to sorted vector for deterministic ordering
let mut skills: Vec<Skill> = skills_by_name.into_values().collect();
skills.sort_by(|a, b| a.name.cmp(&b.name));
@@ -69,6 +88,23 @@ pub fn discover_skills(
skills
}
/// Load embedded skills into the map.
fn load_embedded_skills(skills: &mut HashMap<String, Skill>) {
for embedded in get_embedded_skills() {
match Skill::parse(embedded.skill_md, Path::new("<embedded>")) {
Ok(mut skill) => {
// Mark as embedded in the path
skill.path = format!("<embedded:{}>/{}", embedded.name, "SKILL.md");
debug!("Loaded embedded skill: {}", skill.name);
skills.insert(skill.name.clone(), skill);
}
Err(e) => {
warn!("Failed to parse embedded skill '{}': {}", embedded.name, e);
}
}
}
}
/// Load skills from a directory into the map.
/// Each subdirectory should contain a SKILL.md file.
fn load_skills_from_dir(dir: &Path, skills: &mut HashMap<String, Skill>) {
@@ -125,6 +161,11 @@ fn expand_tilde(path: &str) -> PathBuf {
PathBuf::from(expanded.as_ref())
}
/// Check if a skill is from an embedded source.
pub fn is_embedded_skill(skill: &Skill) -> bool {
skill.path.starts_with("<embedded:")
}
#[cfg(test)]
mod tests {
use super::*;
@@ -144,6 +185,16 @@ mod tests {
skill_dir
}
#[test]
fn test_discover_embedded_skills() {
// With no directories, should still find embedded skills
let skills = discover_skills(None, &[]);
// Should have at least the research skill
assert!(!skills.is_empty(), "Should have embedded skills");
assert!(skills.iter().any(|s| s.name == "research"), "Should have research skill");
}
#[test]
fn test_discover_from_workspace() {
let temp = TempDir::new().unwrap();
@@ -158,9 +209,66 @@ mod tests {
let skills = discover_skills(Some(workspace), &[]);
assert_eq!(skills.len(), 2);
assert_eq!(skills[0].name, "another-skill"); // Sorted alphabetically
assert_eq!(skills[1].name, "test-skill");
// Should have embedded + workspace skills
assert!(skills.iter().any(|s| s.name == "test-skill"));
assert!(skills.iter().any(|s| s.name == "another-skill"));
assert!(skills.iter().any(|s| s.name == "research")); // embedded
}
#[test]
fn test_discover_from_repo_skills() {
let temp = TempDir::new().unwrap();
let workspace = temp.path();
// Create repo skills directory (skills/)
let skills_dir = workspace.join("skills");
fs::create_dir_all(&skills_dir).unwrap();
create_skill_dir(&skills_dir, "repo-skill", "A repo skill");
let skills = discover_skills(Some(workspace), &[]);
assert!(skills.iter().any(|s| s.name == "repo-skill"));
}
#[test]
fn test_repo_overrides_embedded() {
let temp = TempDir::new().unwrap();
let workspace = temp.path();
// Create repo skills directory with a skill that overrides embedded
let skills_dir = workspace.join("skills");
fs::create_dir_all(&skills_dir).unwrap();
// Override the embedded research skill
create_skill_dir(&skills_dir, "research", "Custom research skill");
let skills = discover_skills(Some(workspace), &[]);
let research = skills.iter().find(|s| s.name == "research").unwrap();
assert_eq!(research.description, "Custom research skill");
assert!(!is_embedded_skill(research), "Should not be marked as embedded");
}
#[test]
fn test_repo_overrides_workspace() {
let temp = TempDir::new().unwrap();
let workspace = temp.path();
// Create workspace skill
let workspace_skills = workspace.join(".g3/skills");
fs::create_dir_all(&workspace_skills).unwrap();
create_skill_dir(&workspace_skills, "shared-skill", "Workspace version");
// Create repo skill with same name (should override)
let repo_skills = workspace.join("skills");
fs::create_dir_all(&repo_skills).unwrap();
create_skill_dir(&repo_skills, "shared-skill", "Repo version");
let skills = discover_skills(Some(workspace), &[]);
let shared = skills.iter().find(|s| s.name == "shared-skill").unwrap();
assert_eq!(shared.description, "Repo version");
}
#[test]
@@ -173,8 +281,7 @@ mod tests {
let skills = discover_skills(None, &[extra_dir]);
assert_eq!(skills.len(), 1);
assert_eq!(skills[0].name, "extra-skill");
assert!(skills.iter().any(|s| s.name == "extra-skill"));
}
#[test]
@@ -194,15 +301,15 @@ mod tests {
let skills = discover_skills(Some(workspace), &[extra_dir]);
assert_eq!(skills.len(), 1);
assert_eq!(skills[0].name, "shared-skill");
assert_eq!(skills[0].description, "Workspace version");
let shared = skills.iter().find(|s| s.name == "shared-skill").unwrap();
assert_eq!(shared.description, "Workspace version");
}
#[test]
fn test_nonexistent_directory() {
let skills = discover_skills(Some(Path::new("/nonexistent/path")), &[]);
assert!(skills.is_empty());
// Should still have embedded skills
assert!(!skills.is_empty());
}
#[test]
@@ -212,7 +319,8 @@ mod tests {
fs::create_dir_all(&skills_dir).unwrap();
let skills = discover_skills(Some(temp.path()), &[]);
assert!(skills.is_empty());
// Should still have embedded skills
assert!(!skills.is_empty());
}
#[test]
@@ -234,9 +342,9 @@ mod tests {
let skills = discover_skills(Some(temp.path()), &[]);
// Only valid skill should be loaded
assert_eq!(skills.len(), 1);
assert_eq!(skills[0].name, "valid-skill");
// Valid skill should be loaded, invalid should be skipped
assert!(skills.iter().any(|s| s.name == "valid-skill"));
assert!(!skills.iter().any(|s| s.name == "invalid-skill"));
}
#[test]
@@ -254,8 +362,7 @@ mod tests {
let skills = discover_skills(Some(temp.path()), &[]);
assert_eq!(skills.len(), 1);
assert_eq!(skills[0].name, "lowercase-skill");
assert!(skills.iter().any(|s| s.name == "lowercase-skill"));
}
#[test]
@@ -266,4 +373,11 @@ mod tests {
let no_tilde = expand_tilde("/absolute/path");
assert_eq!(no_tilde, PathBuf::from("/absolute/path"));
}
#[test]
fn test_is_embedded_skill() {
let skills = discover_skills(None, &[]);
let research = skills.iter().find(|s| s.name == "research").unwrap();
assert!(is_embedded_skill(research));
}
}

View File

@@ -0,0 +1,87 @@
//! Embedded skills - compiled into the binary for portability.
//!
//! Core skills are embedded at compile time using `include_str!`.
//! This ensures g3 works anywhere without needing external skill files.
//!
//! Priority order (highest to lowest):
//! 1. Repo `skills/` directory (on disk, checked into git)
//! 2. Workspace `.g3/skills/` directory
//! 3. Config extra_paths
//! 4. Global `~/.g3/skills/` directory
//! 5. Embedded skills (this module - always available)
use std::collections::HashMap;
/// An embedded skill with its SKILL.md content and optional scripts.
#[derive(Debug, Clone)]
pub struct EmbeddedSkill {
/// Skill name (must match the name in SKILL.md frontmatter)
pub name: &'static str,
/// Content of SKILL.md
pub skill_md: &'static str,
/// Scripts bundled with the skill: (filename, content)
pub scripts: &'static [(&'static str, &'static str)],
}
/// All embedded skills, compiled into the binary.
///
/// To add a new embedded skill:
/// 1. Create `skills/<name>/SKILL.md` in the repo
/// 2. Add an entry here with `include_str!`
static EMBEDDED_SKILLS: &[EmbeddedSkill] = &[
EmbeddedSkill {
name: "research",
skill_md: include_str!("../../../../skills/research/SKILL.md"),
scripts: &[
("g3-research", include_str!("../../../../skills/research/g3-research")),
],
},
];
/// Get all embedded skills.
pub fn get_embedded_skills() -> &'static [EmbeddedSkill] {
EMBEDDED_SKILLS
}
/// Get an embedded skill by name.
pub fn get_embedded_skill(name: &str) -> Option<&'static EmbeddedSkill> {
EMBEDDED_SKILLS.iter().find(|s| s.name == name)
}
/// Get embedded skills as a map for easy lookup.
pub fn get_embedded_skills_map() -> HashMap<&'static str, &'static EmbeddedSkill> {
EMBEDDED_SKILLS.iter().map(|s| (s.name, s)).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_embedded_skills_exist() {
let skills = get_embedded_skills();
assert!(!skills.is_empty(), "Should have at least one embedded skill");
}
#[test]
fn test_research_skill_embedded() {
let skill = get_embedded_skill("research");
assert!(skill.is_some(), "Research skill should be embedded");
let skill = skill.unwrap();
assert!(skill.skill_md.contains("name: research"), "SKILL.md should have name field");
assert!(!skill.scripts.is_empty(), "Research skill should have scripts");
}
#[test]
fn test_get_by_name() {
assert!(get_embedded_skill("research").is_some());
assert!(get_embedded_skill("nonexistent").is_none());
}
#[test]
fn test_skills_map() {
let map = get_embedded_skills_map();
assert!(map.contains_key("research"));
}
}

View File

@@ -0,0 +1,234 @@
//! Script extraction for embedded skills.
//!
//! Extracts embedded scripts to `.g3/bin/` on first use.
//! Scripts are re-extracted if the embedded version changes.
use anyhow::{Context, Result};
use std::fs;
use std::os::unix::fs::PermissionsExt;
use std::path::{Path, PathBuf};
use tracing::{debug, info};
use super::embedded::get_embedded_skill;
/// Directory where extracted scripts are placed (relative to workspace)
const BIN_DIR: &str = ".g3/bin";
/// Version file to track when scripts need re-extraction
const VERSION_FILE: &str = ".version";
/// Extract a script from an embedded skill to the bin directory.
///
/// Returns the path to the extracted script.
///
/// # Arguments
/// * `skill_name` - Name of the skill containing the script
/// * `script_name` - Name of the script file to extract
/// * `workspace_dir` - Workspace root directory
///
/// # Returns
/// Path to the extracted script, ready to execute.
pub fn extract_script(
skill_name: &str,
script_name: &str,
workspace_dir: &Path,
) -> Result<PathBuf> {
let skill = get_embedded_skill(skill_name)
.with_context(|| format!("Embedded skill '{}' not found", skill_name))?;
let script_content = skill
.scripts
.iter()
.find(|(name, _)| *name == script_name)
.map(|(_, content)| *content)
.with_context(|| format!("Script '{}' not found in skill '{}'", script_name, skill_name))?;
let bin_dir = workspace_dir.join(BIN_DIR);
fs::create_dir_all(&bin_dir)
.with_context(|| format!("Failed to create bin directory: {}", bin_dir.display()))?;
let script_path = bin_dir.join(script_name);
let version_path = bin_dir.join(format!("{}{}", script_name, VERSION_FILE));
// Check if we need to extract (script missing or version changed)
let needs_extraction = if !script_path.exists() {
debug!("Script {} does not exist, extracting", script_path.display());
true
} else if needs_update(&version_path, script_content)? {
debug!("Script {} is outdated, re-extracting", script_path.display());
true
} else {
debug!("Script {} is up to date", script_path.display());
false
};
if needs_extraction {
// Write the script
fs::write(&script_path, script_content)
.with_context(|| format!("Failed to write script: {}", script_path.display()))?;
// Make it executable (Unix only)
#[cfg(unix)]
{
let mut perms = fs::metadata(&script_path)?.permissions();
perms.set_mode(0o755);
fs::set_permissions(&script_path, perms)?;
}
// Write version file (content hash)
let hash = compute_hash(script_content);
fs::write(&version_path, hash)
.with_context(|| format!("Failed to write version file: {}", version_path.display()))?;
info!("Extracted {} to {}", script_name, script_path.display());
}
Ok(script_path)
}
/// Extract all scripts from an embedded skill.
///
/// Returns a vector of (script_name, script_path) pairs.
pub fn extract_all_scripts(
skill_name: &str,
workspace_dir: &Path,
) -> Result<Vec<(String, PathBuf)>> {
let skill = get_embedded_skill(skill_name)
.with_context(|| format!("Embedded skill '{}' not found", skill_name))?;
let mut extracted = Vec::new();
for (script_name, _) in skill.scripts {
let path = extract_script(skill_name, script_name, workspace_dir)?;
extracted.push((script_name.to_string(), path));
}
Ok(extracted)
}
/// Check if a script needs to be updated based on version file.
fn needs_update(version_path: &Path, current_content: &str) -> Result<bool> {
if !version_path.exists() {
return Ok(true);
}
let stored_hash = fs::read_to_string(version_path)
.with_context(|| format!("Failed to read version file: {}", version_path.display()))?;
let current_hash = compute_hash(current_content);
Ok(stored_hash.trim() != current_hash)
}
/// Compute a simple hash of content for version tracking.
/// Uses a fast non-cryptographic hash.
fn compute_hash(content: &str) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
content.hash(&mut hasher);
format!("{:016x}", hasher.finish())
}
/// Get the path where a script would be extracted.
/// Does not actually extract the script.
pub fn get_script_path(script_name: &str, workspace_dir: &Path) -> PathBuf {
workspace_dir.join(BIN_DIR).join(script_name)
}
/// Check if a script has been extracted.
pub fn is_script_extracted(script_name: &str, workspace_dir: &Path) -> bool {
get_script_path(script_name, workspace_dir).exists()
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_extract_research_script() {
let temp = TempDir::new().unwrap();
let result = extract_script("research", "g3-research", temp.path());
assert!(result.is_ok(), "Should extract research script: {:?}", result.err());
let script_path = result.unwrap();
assert!(script_path.exists(), "Script should exist after extraction");
// Check it's executable
#[cfg(unix)]
{
let metadata = fs::metadata(&script_path).unwrap();
let mode = metadata.permissions().mode();
assert!(mode & 0o111 != 0, "Script should be executable");
}
// Check content
let content = fs::read_to_string(&script_path).unwrap();
assert!(content.starts_with("#!/bin/bash"), "Should be a bash script");
}
#[test]
fn test_extract_idempotent() {
let temp = TempDir::new().unwrap();
// Extract twice
let path1 = extract_script("research", "g3-research", temp.path()).unwrap();
let path2 = extract_script("research", "g3-research", temp.path()).unwrap();
assert_eq!(path1, path2, "Should return same path");
}
#[test]
fn test_version_tracking() {
let temp = TempDir::new().unwrap();
// Extract
extract_script("research", "g3-research", temp.path()).unwrap();
// Version file should exist
let version_path = temp.path().join(".g3/bin/g3-research.version");
assert!(version_path.exists(), "Version file should exist");
let hash = fs::read_to_string(&version_path).unwrap();
assert!(!hash.is_empty(), "Version file should contain hash");
}
#[test]
fn test_nonexistent_skill() {
let temp = TempDir::new().unwrap();
let result = extract_script("nonexistent", "script", temp.path());
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("not found"));
}
#[test]
fn test_nonexistent_script() {
let temp = TempDir::new().unwrap();
let result = extract_script("research", "nonexistent", temp.path());
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("not found"));
}
#[test]
fn test_get_script_path() {
let workspace = Path::new("/workspace");
let path = get_script_path("g3-research", workspace);
assert_eq!(path, PathBuf::from("/workspace/.g3/bin/g3-research"));
}
#[test]
fn test_compute_hash() {
let hash1 = compute_hash("hello world");
let hash2 = compute_hash("hello world");
let hash3 = compute_hash("different content");
assert_eq!(hash1, hash2, "Same content should produce same hash");
assert_ne!(hash1, hash3, "Different content should produce different hash");
assert_eq!(hash1.len(), 16, "Hash should be 16 hex chars");
}
}

View File

@@ -22,10 +22,12 @@
//!
//! # Discovery
//!
//! Skills are discovered from:
//! 1. Global: `~/.g3/skills/` (lowest priority)
//! 2. Extra paths from config (medium priority)
//! 3. Workspace: `.g3/skills/` (highest priority, overrides others)
//! Skills are discovered from (highest to lowest priority):
//! 1. Repo: `skills/` at repo root (checked into git, overrides all)
//! 2. Workspace: `.g3/skills/` (local customizations)
//! 3. Extra paths from config
//! 4. Global: `~/.g3/skills/`
//! 5. Embedded: compiled into binary (always available)
//!
//! # Usage
//!
@@ -36,7 +38,10 @@
mod parser;
mod discovery;
mod prompt;
mod embedded;
pub mod extraction;
pub use parser::Skill;
pub use discovery::discover_skills;
pub use prompt::generate_skills_prompt;
pub use embedded::{get_embedded_skills, get_embedded_skill, EmbeddedSkill};

View File

@@ -12,7 +12,6 @@ use serde_json::json;
pub struct ToolConfig {
pub webdriver: bool,
pub computer_control: bool,
pub exclude_research: bool,
}
impl ToolConfig {
@@ -20,16 +19,8 @@ impl ToolConfig {
Self {
webdriver,
computer_control,
exclude_research: false,
}
}
/// Create a config with the research tool excluded.
/// Used for scout agent to prevent recursion.
pub fn with_research_excluded(mut self) -> Self {
self.exclude_research = true;
self
}
}
/// Create tool definitions for native tool calling providers.
@@ -37,7 +28,7 @@ impl ToolConfig {
/// Returns a vector of Tool definitions that describe the available tools
/// and their input schemas.
pub fn create_tool_definitions(config: ToolConfig) -> Vec<Tool> {
let mut tools = create_core_tools(config.exclude_research);
let mut tools = create_core_tools();
if config.webdriver {
tools.extend(create_webdriver_tools());
@@ -47,7 +38,7 @@ pub fn create_tool_definitions(config: ToolConfig) -> Vec<Tool> {
}
/// Create the core tools that are always available
fn create_core_tools(exclude_research: bool) -> Vec<Tool> {
fn create_core_tools() -> Vec<Tool> {
let mut tools = vec![
Tool {
name: "shell".to_string(),
@@ -195,40 +186,6 @@ fn create_core_tools(exclude_research: bool) -> Vec<Tool> {
},
];
// Conditionally add the research tool (excluded for scout agent to prevent recursion)
if !exclude_research {
tools.push(Tool {
name: "research".to_string(),
description: "Initiate web-based research on a topic. This tool is ASYNCHRONOUS - it spawns a research agent in the background and returns immediately with a research_id. Results are automatically injected into the conversation when ready. Use this when you need to research APIs, SDKs, libraries, approaches, bugs, or documentation. If you need the results before continuing, say so and yield the turn to the user. Check status with research_status tool.".to_string(),
input_schema: json!({
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The research question or topic to investigate. Be specific about what you need to know."
}
},
"required": ["query"]
}),
});
// research_status tool - check status of pending research
tools.push(Tool {
name: "research_status".to_string(),
description: "Check the status of pending research tasks. Call without arguments to list all pending research, or with a research_id to check a specific task. Use this to see if research has completed before it's automatically injected.".to_string(),
input_schema: json!({
"type": "object",
"properties": {
"research_id": {
"type": "string",
"description": "Optional: specific research_id to check. If omitted, lists all pending research tasks."
}
},
"required": []
}),
});
}
// Plan Mode tools
tools.push(Tool {
name: "plan_read".to_string(),
@@ -499,12 +456,12 @@ mod tests {
#[test]
fn test_core_tools_count() {
let tools = create_core_tools(false);
let tools = create_core_tools();
// Core tools: shell, background_process, read_file, read_image,
// write_file, str_replace, code_search,
// research, research_status, remember, plan_read, plan_write, plan_approve
// (14 total - memory is auto-loaded, only remember tool needed)
assert_eq!(tools.len(), 14);
assert_eq!(tools.len(), 12);
}
#[test]
@@ -518,7 +475,7 @@ mod tests {
fn test_create_tool_definitions_core_only() {
let config = ToolConfig::default();
let tools = create_tool_definitions(config);
assert_eq!(tools.len(), 14);
assert_eq!(tools.len(), 12);
}
#[test]
@@ -526,28 +483,16 @@ mod tests {
let config = ToolConfig::new(true, true);
let tools = create_tool_definitions(config);
// 14 core + 15 webdriver = 29
assert_eq!(tools.len(), 29);
assert_eq!(tools.len(), 27);
}
#[test]
fn test_tool_has_required_fields() {
let tools = create_core_tools(false);
let tools = create_core_tools();
for tool in tools {
assert!(!tool.name.is_empty(), "Tool name should not be empty");
assert!(!tool.description.is_empty(), "Tool description should not be empty");
assert!(tool.input_schema.is_object(), "Tool input_schema should be an object");
}
}
#[test]
fn test_research_tool_excluded() {
let tools_with_research = create_core_tools(false);
let tools_without_research = create_core_tools(true);
assert_eq!(tools_with_research.len(), 14);
assert_eq!(tools_without_research.len(), 12); // research + research_status both excluded
assert!(tools_with_research.iter().any(|t| t.name == "research"));
assert!(!tools_without_research.iter().any(|t| t.name == "research"));
}
}

View File

@@ -7,7 +7,7 @@ use anyhow::Result;
use tracing::{debug, warn};
use crate::tools::executor::ToolContext;
use crate::tools::{acd, file_ops, memory, misc, plan, research, shell, webdriver};
use crate::tools::{acd, file_ops, memory, misc, plan, shell, webdriver};
use crate::ui_writer::UiWriter;
use crate::ToolCall;
@@ -40,10 +40,11 @@ pub async fn dispatch_tool<W: UiWriter>(
// Miscellaneous tools
"code_search" => misc::execute_code_search(tool_call, ctx).await,
// Research tool
"research" => research::execute_research(tool_call, ctx).await,
"research_status" => research::execute_research_status(tool_call, ctx).await,
// Research tool (deprecated - now a skill)
"research" | "research_status" => {
Ok("⚠️ The `research` tool has been replaced by the **research skill**. Use `background_process` to run `.g3/bin/g3-research` instead. See the research skill documentation for details.".to_string())
}
// Workspace memory tools
"remember" => memory::execute_remember(tool_call, ctx).await,

View File

@@ -120,7 +120,6 @@ mod tests {
use crate::acd::Fragment;
use crate::ui_writer::NullUiWriter;
use crate::background_process::BackgroundProcessManager;
use crate::pending_research::PendingResearchManager;
use serial_test::serial;
use crate::webdriver_session::WebDriverSession;
use g3_providers::{Message, MessageRole};
@@ -136,7 +135,6 @@ mod tests {
todo_content: Arc<RwLock<String>>,
pending_images: Vec<g3_providers::ImageContent>,
config: g3_config::Config,
pending_research_manager: PendingResearchManager,
}
impl TestContext {
@@ -149,7 +147,6 @@ mod tests {
todo_content: Arc::new(RwLock::new(String::new())),
pending_images: Vec::new(),
config: g3_config::Config::default(),
pending_research_manager: PendingResearchManager::new(),
}
}
}
@@ -172,7 +169,6 @@ mod tests {
requirements_sha: None,
context_total_tokens: 100000,
context_used_tokens: 10000,
pending_research_manager: &test_ctx.pending_research_manager,
};
let tool_call = ToolCall {
@@ -203,7 +199,6 @@ mod tests {
requirements_sha: None,
context_total_tokens: 100000,
context_used_tokens: 10000,
pending_research_manager: &test_ctx.pending_research_manager,
};
let tool_call = ToolCall {
@@ -234,7 +229,6 @@ mod tests {
requirements_sha: None,
context_total_tokens: 100000,
context_used_tokens: 10000,
pending_research_manager: &test_ctx.pending_research_manager,
};
let tool_call = ToolCall {

View File

@@ -5,7 +5,6 @@ use std::sync::Arc;
use tokio::sync::RwLock;
use crate::background_process::BackgroundProcessManager;
use crate::pending_research::PendingResearchManager;
use crate::paths::{ensure_session_dir, get_session_todo_path, get_todo_path};
use crate::ui_writer::UiWriter;
use crate::webdriver_session::WebDriverSession;
@@ -28,7 +27,6 @@ pub struct ToolContext<'a, W: UiWriter> {
pub requirements_sha: Option<&'a str>,
pub context_total_tokens: u32,
pub context_used_tokens: u32,
pub pending_research_manager: &'a PendingResearchManager,
}
impl<'a, W: UiWriter> ToolContext<'a, W> {

View File

@@ -7,7 +7,6 @@
//! - `plan` - Plan Mode for structured task planning
//! - `webdriver` - Browser automation via WebDriver
//! - `misc` - Other tools (screenshots, code search, etc.)
//! - `research` - Web research via scout agent
//! - `memory` - Workspace memory (remember)
//! - `acd` - Aggressive Context Dehydration (rehydrate)
@@ -18,7 +17,6 @@ pub mod invariants;
pub mod memory;
pub mod misc;
pub mod plan;
pub mod research;
pub mod shell;
pub mod webdriver;

View File

@@ -1,710 +0,0 @@
//! Research tool: spawns a scout agent to perform web-based research.
//!
//! The research tool is **asynchronous** - it spawns the scout agent in the background
//! and returns immediately with a research_id. The agent can continue with other work
//! while research is in progress. Results are automatically injected into the conversation
//! when ready, or the agent can check status with the `research_status` tool.
use anyhow::Result;
use std::process::Stdio;
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::Command;
use tracing::{debug, error};
use crate::ui_writer::UiWriter;
use crate::ToolCall;
use g3_config::WebDriverBrowser;
use super::executor::ToolContext;
/// Delimiter markers for scout report extraction
const REPORT_START_MARKER: &str = "---SCOUT_REPORT_START---";
const REPORT_END_MARKER: &str = "---SCOUT_REPORT_END---";
/// Translate scout agent output lines into friendly progress messages.
///
/// Parses tool call headers from the scout output and returns human-readable
/// progress messages. Returns None for lines that should be suppressed.
#[allow(dead_code)] // Used in tests, may be used for progress display in future
fn translate_progress(line: &str) -> Option<String> {
// Strip ANSI codes first for pattern matching
let clean_line = strip_ansi_codes(line);
let trimmed = clean_line.trim();
// Tool call header pattern: "┌─ tool_name" or "┌─ tool_name | args"
if !trimmed.starts_with("┌─") {
return None;
}
// Extract tool name and optional args after the box drawing char
let after_prefix = trimmed.trim_start_matches("┌─").trim();
// Split on " | " to separate tool name from args
let (tool_name, args) = if let Some(pipe_pos) = after_prefix.find(" | ") {
let name = after_prefix[..pipe_pos].trim();
let arg = after_prefix[pipe_pos + 3..].trim();
(name, Some(arg))
} else {
(after_prefix.trim(), None)
};
// Translate tool names to friendly messages
match tool_name {
"webdriver_start" => Some("🌐 Launching browser...".to_string()),
"webdriver_navigate" => {
if let Some(url) = args {
// Extract domain from URL for cleaner display
let display_url = extract_domain(url).unwrap_or(url);
Some(format!("🔗 Navigating to {}...", display_url))
} else {
Some("🔗 Navigating...".to_string())
}
}
"webdriver_get_page_source" => {
if let Some(arg) = args {
// arg might be max_length or file path
if arg.contains('/') || arg.ends_with(".html") || arg.ends_with(".md") {
let filename = arg.rsplit('/').next().unwrap_or(arg);
Some(format!("📥 Downloading {}...", filename))
} else {
Some("📄 Reading page content...".to_string())
}
} else {
Some("📄 Reading page content...".to_string())
}
}
"webdriver_find_element" | "webdriver_find_elements" => {
Some("🔍 Searching page...".to_string())
}
"webdriver_click" => Some("👆 Clicking element...".to_string()),
"webdriver_quit" => Some("✅ Closing browser...".to_string()),
"read_file" => {
if let Some(path) = args {
// Check if there's a range specified (format: "filename [start..end]")
if let Some(bracket_pos) = path.find(" [") {
let filename = path[..bracket_pos].rsplit('/').next().unwrap_or(&path[..bracket_pos]);
let range = &path[bracket_pos + 1..]; // includes "[start..end]"
Some(format!("📖 Reading {} slice {}...", filename, range.trim_end_matches(']').trim_start_matches('[')))
} else {
let filename = path.rsplit('/').next().unwrap_or(path);
Some(format!("📖 Reading {}...", filename))
}
} else {
Some("📖 Reading file...".to_string())
}
}
"write_file" => {
if let Some(path) = args {
let filename = path.rsplit('/').next().unwrap_or(path);
Some(format!("💾 Writing {}...", filename))
} else {
Some("💾 Writing file...".to_string())
}
}
"shell" => {
if let Some(cmd) = args {
// Show a truncated snippet of the command with wider display
let snippet = truncate_command_snippet(cmd, 60);
Some(format!(" > `{}` ...", snippet))
} else {
Some("⚙️ Running command...".to_string())
}
}
// Suppress unknown tools - don't show raw output
_ => None,
}
}
/// Extract domain from a URL for cleaner display.
#[allow(dead_code)] // Used in tests
fn extract_domain(url: &str) -> Option<&str> {
// Remove protocol
let without_protocol = url
.strip_prefix("https://")
.or_else(|| url.strip_prefix("http://"))
.unwrap_or(url);
// Get just the domain (before any path)
without_protocol.split('/').next()
}
/// Truncate a command to a maximum length for display.
/// Preserves the beginning of the command and adds "..." if truncated.
#[allow(dead_code)] // Used in tests
fn truncate_command_snippet(cmd: &str, max_len: usize) -> String {
// Take just the first line if multi-line
let first_line = cmd.lines().next().unwrap_or(cmd);
if first_line.chars().count() <= max_len {
first_line.to_string()
} else {
let truncated: String = first_line.chars().take(max_len - 3).collect();
format!("{}...", truncated)
}
}
/// Error patterns that indicate context window exhaustion
const CONTEXT_ERROR_PATTERNS: &[&str] = &[
"context length", "context_length_exceeded", "maximum context", "token limit",
"too many tokens", "exceeds the model", "context window", "max_tokens",
];
/// Execute the research tool - spawns scout agent in background and returns immediately.
///
/// This is the **async** version of research. It:
/// 1. Registers a new research task with the PendingResearchManager
/// 2. Spawns the scout agent in a background tokio task
/// 3. Returns immediately with a placeholder message containing the research_id
/// 4. The background task updates the manager when research completes
/// 5. Results are injected into the conversation at the next natural break point
pub async fn execute_research<W: UiWriter>(
tool_call: &ToolCall,
ctx: &mut ToolContext<'_, W>,
) -> Result<String> {
let query = tool_call
.args
.get("query")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing required 'query' parameter"))?;
// Register the research task and get an ID
let research_id = ctx.pending_research_manager.register(query);
// Clone values needed for the background task
let query_owned = query.to_string();
let research_id_clone = research_id.clone();
let manager = ctx.pending_research_manager.clone();
let browser = ctx.config.webdriver.browser.clone();
// Find the g3 executable path
let g3_path = std::env::current_exe()
.unwrap_or_else(|_| std::path::PathBuf::from("g3"));
// Spawn the scout agent in a background task
tokio::spawn(async move {
let result = run_scout_agent(&g3_path, &query_owned, browser).await;
match result {
Ok(report) => {
debug!("Research {} completed successfully", research_id_clone);
manager.complete(&research_id_clone, report);
}
Err(e) => {
error!("Research {} failed: {}", research_id_clone, e);
manager.fail(&research_id_clone, e.to_string());
}
}
});
// Return immediately with placeholder
let placeholder = format!(
"🔍 **Research initiated** (id: `{}`)
\
**Query:** {}
\
Research is running in the background. You can:
- Continue with other work - results will be automatically provided when ready
- Check status with `research_status` tool
- If you need the results before continuing, say so and yield the turn to the user
\
_Estimated time: 30-120 seconds depending on query complexity_",
research_id,
query
);
Ok(placeholder)
}
/// Run the scout agent and return the research report.
/// This is the blocking part that runs in a background task.
async fn run_scout_agent(
g3_path: &std::path::Path,
query: &str,
browser: WebDriverBrowser,
) -> Result<String> {
// Build the command with appropriate webdriver flags
let mut cmd = Command::new(g3_path);
cmd
.arg("--agent")
.arg("scout")
.arg("--new-session") // Always start fresh for research
.arg("--quiet"); // Suppress log file creation
// Propagate the webdriver browser choice
match browser {
WebDriverBrowser::ChromeHeadless => { cmd.arg("--chrome-headless"); }
WebDriverBrowser::Safari => { cmd.arg("--webdriver"); }
}
let mut child = cmd.arg(query)
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| anyhow::anyhow!("Failed to spawn scout agent: {}", e))?;
// Capture stdout to find the report content
let stdout = child.stdout.take()
.ok_or_else(|| anyhow::anyhow!("Failed to capture scout agent stdout"))?;
// Also capture stderr for error messages
let stderr = child.stderr.take()
.ok_or_else(|| anyhow::anyhow!("Failed to capture scout agent stderr"))?;
let mut reader = BufReader::new(stdout).lines();
let mut all_output = Vec::new();
// Spawn a task to collect stderr
let stderr_handle = tokio::spawn(async move {
let mut stderr_reader = BufReader::new(stderr).lines();
let mut stderr_output = Vec::new();
while let Some(line) = stderr_reader.next_line().await.ok().flatten() {
stderr_output.push(line);
}
stderr_output
});
// Collect stdout lines (no progress display in background)
while let Some(line) = reader.next_line().await? {
all_output.push(line);
}
// Collect stderr output
let stderr_output = stderr_handle.await.unwrap_or_default();
// Wait for the process to complete
let status = child.wait().await
.map_err(|e| anyhow::anyhow!("Failed to wait for scout agent: {}", e))?;
if !status.success() {
// Build detailed error message
let exit_code = status.code().map(|c| c.to_string()).unwrap_or_else(|| "unknown".to_string());
let full_output = all_output.join("\n");
let stderr_text = stderr_output.join("\n");
// Check for context window exhaustion
let combined_output = format!("{} {}", full_output, stderr_text).to_lowercase();
let is_context_error = CONTEXT_ERROR_PATTERNS.iter()
.any(|pattern| combined_output.contains(pattern));
if is_context_error {
return Err(anyhow::anyhow!(
"Context Window Exhausted\n\n\
The research query required more context than the model supports.\n\n\
**Suggestions:**\n\
- Try a more specific, narrower query\n\
- Break the research into smaller sub-questions\n\
- Use a model with a larger context window\n\n\
Exit code: {}",
exit_code
));
}
// Generic error with details
return Err(anyhow::anyhow!(
"Scout Agent Failed\n\n\
Exit code: {}\n\n\
{}{}",
exit_code,
if !stderr_text.is_empty() { format!("**Error output:**\n{}\n\n", stderr_text.chars().take(1000).collect::<String>()) } else { String::new() },
if !all_output.is_empty() { format!("**Last output lines:**\n{}", all_output.iter().rev().take(10).rev().cloned().collect::<Vec<_>>().join("\n")) } else { String::new() }
));
}
// Join all output and extract the report between markers
let full_output = all_output.join("\n");
extract_report(&full_output)
}
/// Execute the research_status tool - check status of pending research tasks.
pub async fn execute_research_status<W: UiWriter>(
tool_call: &ToolCall,
ctx: &mut ToolContext<'_, W>,
) -> Result<String> {
let research_id = tool_call
.args
.get("research_id")
.and_then(|v| v.as_str());
if let Some(id) = research_id {
// Check specific research task
match ctx.pending_research_manager.get(&id.to_string()) {
Some(task) => {
let status_emoji = match task.status {
crate::pending_research::ResearchStatus::Pending => "🔄",
crate::pending_research::ResearchStatus::Complete => "",
crate::pending_research::ResearchStatus::Failed => "",
};
let mut output = format!(
"{} **Research Status** (id: `{}`)\n\n\
**Query:** {}\n\
**Status:** {}\n\
**Elapsed:** {}\n",
status_emoji,
task.id,
task.query,
task.status,
task.elapsed_display()
);
if task.injected {
output.push_str("\n_Results have already been injected into the conversation._\n");
} else if task.status != crate::pending_research::ResearchStatus::Pending {
output.push_str("\n_Results will be injected at the next opportunity._\n");
}
Ok(output)
}
None => Ok(format!("❓ No research task found with id: `{}`", id)),
}
} else {
// List all pending research tasks
let tasks = ctx.pending_research_manager.list_pending();
if tasks.is_empty() {
return Ok("📋 No pending research tasks.".to_string());
}
let mut output = format!("📋 **Pending Research Tasks** ({} total)\n\n", tasks.len());
for task in tasks {
let status_emoji = match task.status {
crate::pending_research::ResearchStatus::Pending => "🔄",
crate::pending_research::ResearchStatus::Complete => "",
crate::pending_research::ResearchStatus::Failed => "",
};
output.push_str(&format!(
"{} `{}` - {} ({})\n Query: {}\n\n",
status_emoji,
task.id,
task.status,
task.elapsed_display(),
truncate_query(&task.query, 60)
));
}
Ok(output)
}
}
/// Truncate a query for display
fn truncate_query(query: &str, max_len: usize) -> String {
if query.chars().count() <= max_len {
query.to_string()
} else {
let truncated: String = query.chars().take(max_len - 3).collect();
format!("{}...", truncated)
}
}
/// Extract the research report from scout output.
///
/// Looks for content between SCOUT_REPORT_START and SCOUT_REPORT_END markers.
/// Preserves ANSI escape codes in the extracted content for terminal formatting.
fn extract_report(output: &str) -> Result<String> {
// Strip ANSI codes only for finding markers, but preserve them in the output
let clean_output = strip_ansi_codes(output);
// Find the start marker
let start_pos = clean_output.find(REPORT_START_MARKER)
.ok_or_else(|| anyhow::anyhow!(
"Scout agent did not output a properly formatted report. Expected {} marker.",
REPORT_START_MARKER
))?;
// Find the end marker
let end_pos = clean_output.find(REPORT_END_MARKER)
.ok_or_else(|| anyhow::anyhow!(
"Scout agent report is incomplete. Expected {} marker.",
REPORT_END_MARKER
))?;
if end_pos <= start_pos {
return Err(anyhow::anyhow!("Invalid report format: end marker before start marker"));
}
// Now find the same markers in the original output to preserve ANSI codes
// We need to find the marker positions accounting for ANSI codes
let original_start = find_marker_position(output, REPORT_START_MARKER)
.ok_or_else(|| anyhow::anyhow!("Could not find start marker in original output"))?;
let original_end = find_marker_position(output, REPORT_END_MARKER)
.ok_or_else(|| anyhow::anyhow!("Could not find end marker in original output"))?;
// Extract content between markers from original (with ANSI codes)
let report_start = original_start + REPORT_START_MARKER.len();
let report_content = output[report_start..original_end].trim();
if report_content.is_empty() {
return Ok("Scout agent returned an empty report.".to_string());
}
Ok(report_content.to_string())
}
/// Find the position of a marker in text that may contain ANSI codes.
/// Searches by stripping ANSI codes character by character to find the true position.
fn find_marker_position(text: &str, marker: &str) -> Option<usize> {
// Simple approach: search for the marker directly first
// The markers themselves shouldn't contain ANSI codes
if let Some(pos) = text.find(marker) {
return Some(pos);
}
// If not found directly, the marker might be split by ANSI codes
// This is unlikely for our use case, but handle it gracefully
None
}
/// Strip ANSI escape codes from a string.
///
/// Handles common ANSI sequences like:
/// - CSI sequences: \x1b[...m (colors, styles)
/// - OSC sequences: \x1b]...\x07 (terminal titles, etc.)
pub fn strip_ansi_codes(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut chars = s.chars().peekable();
while let Some(c) = chars.next() {
if c == '\x1b' {
// Start of escape sequence
match chars.peek() {
Some('[') => {
// CSI sequence: \x1b[...X where X is a letter
chars.next(); // consume '['
while let Some(&next) = chars.peek() {
chars.next();
if next.is_ascii_alphabetic() {
break;
}
}
}
Some(']') => {
// OSC sequence: \x1b]...\x07
chars.next(); // consume ']'
while let Some(&next) = chars.peek() {
chars.next();
if next == '\x07' {
break;
}
}
}
_ => {
// Unknown escape, skip just the ESC
}
}
} else {
result.push(c);
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_strip_ansi_codes() {
// Simple color code
assert_eq!(strip_ansi_codes("\x1b[31mred\x1b[0m"), "red");
// RGB color code (like the bug we saw)
assert_eq!(
strip_ansi_codes("\x1b[38;2;216;177;114mtmp/file.md\x1b[0m"),
"tmp/file.md"
);
// Multiple codes
assert_eq!(
strip_ansi_codes("\x1b[1m\x1b[32mbold green\x1b[0m normal"),
"bold green normal"
);
// No codes
assert_eq!(strip_ansi_codes("plain text"), "plain text");
// Empty string
assert_eq!(strip_ansi_codes(""), "");
}
#[test]
fn test_extract_report_success() {
let output = r#"Some preamble text
---SCOUT_REPORT_START---
# Research Brief
This is the report content.
---SCOUT_REPORT_END---
Some trailing text"#;
let result = extract_report(output).unwrap();
assert!(result.contains("Research Brief"));
assert!(result.contains("This is the report content."));
assert!(!result.contains("preamble"));
assert!(!result.contains("trailing"));
}
#[test]
fn test_extract_report_with_ansi_codes() {
let output = "\x1b[32m---SCOUT_REPORT_START---\x1b[0m\n# Report\n\x1b[31m---SCOUT_REPORT_END---\x1b[0m";
let result = extract_report(output).unwrap();
assert!(result.contains("# Report"));
}
#[test]
fn test_extract_report_missing_start() {
let output = "No markers here";
let result = extract_report(output);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("SCOUT_REPORT_START"));
}
#[test]
fn test_extract_report_missing_end() {
let output = "---SCOUT_REPORT_START---\nContent but no end";
let result = extract_report(output);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("SCOUT_REPORT_END"));
}
#[test]
fn test_extract_report_empty_content() {
let output = "---SCOUT_REPORT_START---\n---SCOUT_REPORT_END---";
let result = extract_report(output).unwrap();
assert!(result.contains("empty report"));
}
#[test]
fn test_extract_domain() {
assert_eq!(extract_domain("https://www.rust-lang.org/"), Some("www.rust-lang.org"));
assert_eq!(extract_domain("https://python.org/downloads"), Some("python.org"));
assert_eq!(extract_domain("http://example.com"), Some("example.com"));
assert_eq!(extract_domain("example.com/path"), Some("example.com"));
}
#[test]
fn test_translate_progress_webdriver_start() {
let line = "┌─ webdriver_start";
assert_eq!(translate_progress(line), Some("🌐 Launching browser...".to_string()));
}
#[test]
fn test_translate_progress_webdriver_navigate() {
let line = "┌─ webdriver_navigate | https://www.rust-lang.org/";
assert_eq!(translate_progress(line), Some("🔗 Navigating to www.rust-lang.org...".to_string()));
}
#[test]
fn test_translate_progress_webdriver_get_page_source() {
// With max_length arg (number)
let line = "┌─ webdriver_get_page_source | 15000";
assert_eq!(translate_progress(line), Some("📄 Reading page content...".to_string()));
// With file path
let line = "┌─ webdriver_get_page_source | tmp/rust_release.html";
assert_eq!(translate_progress(line), Some("📥 Downloading rust_release.html...".to_string()));
}
#[test]
fn test_translate_progress_webdriver_find_elements() {
let line = "┌─ webdriver_find_elements | .download-os-source, .download-for-current-os";
assert_eq!(translate_progress(line), Some("🔍 Searching page...".to_string()));
}
#[test]
fn test_translate_progress_webdriver_quit() {
let line = "┌─ webdriver_quit";
assert_eq!(translate_progress(line), Some("✅ Closing browser...".to_string()));
}
#[test]
fn test_translate_progress_read_file() {
// Without range
let line = "┌─ read_file | /path/to/file.rs";
assert_eq!(translate_progress(line), Some("📖 Reading file.rs...".to_string()));
// With range (file slice)
let line = "┌─ read_file | /path/to/file.rs [1000..2000]";
assert_eq!(translate_progress(line), Some("📖 Reading file.rs slice 1000..2000...".to_string()));
}
#[test]
fn test_translate_progress_write_file() {
let line = "┌─ write_file | output.md";
assert_eq!(translate_progress(line), Some("💾 Writing output.md...".to_string()));
}
#[test]
fn test_translate_progress_shell() {
let line = "┌─ shell | ls -la";
assert_eq!(translate_progress(line), Some(" > `ls -la` ...".to_string()));
}
#[test]
fn test_translate_progress_with_ansi_codes() {
// Real output from scout agent has ANSI codes
let line = "\x1b[1;38;5;69m┌─ webdriver_start\x1b[0m";
assert_eq!(translate_progress(line), Some("🌐 Launching browser...".to_string()));
let line = "\x1b[1;38;5;69m┌─ webdriver_navigate\x1b[0m\x1b[35m | https://www.python.org/\x1b[0m";
assert_eq!(translate_progress(line), Some("🔗 Navigating to www.python.org...".to_string()));
}
#[test]
fn test_translate_progress_suppresses_non_tool_lines() {
assert_eq!(translate_progress("Some random output"), None);
assert_eq!(translate_progress("│ Page source (59851 chars)"), None);
assert_eq!(translate_progress("└─ ⚡️ 1.5s"), None);
assert_eq!(translate_progress(""), None);
}
#[test]
fn test_truncate_command_snippet() {
// Short command - no truncation
assert_eq!(truncate_command_snippet("ls -la", 40), "ls -la");
// Long command - truncated
let long_cmd = "grep -r 'some very long search pattern' --include='*.rs' /path/to/directory";
let result = truncate_command_snippet(long_cmd, 40);
assert!(result.len() <= 40);
assert!(result.ends_with("..."));
// Multi-line command - only first line
let multi_line = "echo 'line1'\necho 'line2'";
assert_eq!(truncate_command_snippet(multi_line, 40), "echo 'line1'");
}
#[test]
fn test_translate_progress_shell_long_command() {
let line = "┌─ shell | grep -r 'some very long search pattern that exceeds the limit' --include='*.rs'";
let result = translate_progress(line).unwrap();
assert!(result.starts_with(" > `grep"));
assert!(result.contains("..."));
}
#[test]
fn test_truncate_query() {
assert_eq!(truncate_query("short query", 50), "short query");
let long_query = "This is a very long research query that should be truncated for display purposes";
let result = truncate_query(long_query, 40);
assert!(result.len() <= 40);
assert!(result.ends_with("..."));
}
}