feat: Externalize research tool as embedded skill

Replaces the built-in research/research_status tools with a portable
skill-based approach:

- Add embedded skills infrastructure (skills compiled into binary)
- Add repo-local skills/ directory support (highest priority)
- Create research skill with SKILL.md and g3-research shell script
- Script extraction to .g3/bin/ with version tracking
- Filesystem-based handoff via .g3/research/<id>/status.json
- Remove PendingResearchManager and all research tool code
- Update system prompt to reference skill instead of tool

Benefits:
- No special tool infrastructure needed (just shell + read_file)
- Context-efficient (reports stay on disk until needed)
- Crash-resilient (state persisted to filesystem)
- Portable (skill can be overridden per-workspace)

Breaking change: research tool calls now return a deprecation message
pointing to the research skill.
This commit is contained in:
Dhanji R. Prasanna
2026-02-05 13:23:26 +11:00
parent bf9e3dc878
commit 39e586982c
19 changed files with 949 additions and 1638 deletions

View File

@@ -1,15 +1,17 @@
//! Skill discovery - scans directories for SKILL.md files.
//!
//! Discovers skills from:
//! - Global: ~/.g3/skills/
//! - Workspace: .g3/skills/
//!
//! Workspace skills override global skills with the same name.
//! Discovers skills from (highest to lowest priority):
//! 1. Repo: `skills/` at repo root (checked into git, overrides all)
//! 2. Workspace: `.g3/skills/` (local customizations)
//! 3. Extra paths from config
//! 4. Global: `~/.g3/skills/`
//! 5. Embedded: compiled into binary (always available)
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use tracing::{debug, warn};
use super::embedded::get_embedded_skills;
use super::parser::Skill;
/// Default global skills directory
@@ -18,28 +20,36 @@ const GLOBAL_SKILLS_DIR: &str = "~/.g3/skills";
/// Default workspace skills directory (relative to workspace root)
const WORKSPACE_SKILLS_DIR: &str = ".g3/skills";
/// Repo-local skills directory (relative to workspace root, checked into git)
const REPO_SKILLS_DIR: &str = "skills";
/// Discover all available skills from configured paths.
///
/// Skills are loaded from:
/// 1. Global directory (~/.g3/skills/)
/// 2. Workspace directory (.g3/skills/)
/// Skills are loaded in priority order (lowest to highest):
/// 1. Embedded skills (compiled into binary)
/// 2. Global directory (~/.g3/skills/)
/// 3. Extra paths from config
/// 4. Workspace directory (.g3/skills/)
/// 5. Repo directory (skills/) - highest priority
///
/// Workspace skills override global skills with the same name.
/// Additional paths can be provided via `extra_paths`.
/// Higher priority skills override lower priority skills with the same name.
pub fn discover_skills(
workspace_dir: Option<&Path>,
extra_paths: &[PathBuf],
) -> Vec<Skill> {
let mut skills_by_name: HashMap<String, Skill> = HashMap::new();
// 1. Load global skills first (lowest priority)
// 1. Load embedded skills first (lowest priority)
load_embedded_skills(&mut skills_by_name);
// 2. Load global skills
let global_dir = expand_tilde(GLOBAL_SKILLS_DIR);
if global_dir.exists() {
debug!("Scanning global skills directory: {}", global_dir.display());
load_skills_from_dir(&global_dir, &mut skills_by_name);
}
// 2. Load from extra paths (medium priority)
// 3. Load from extra paths
for path in extra_paths {
let expanded = if path.starts_with("~") {
expand_tilde(&path.to_string_lossy())
@@ -52,7 +62,7 @@ pub fn discover_skills(
}
}
// 3. Load workspace skills last (highest priority - overrides others)
// 4. Load workspace skills (.g3/skills/)
if let Some(workspace) = workspace_dir {
let workspace_skills = workspace.join(WORKSPACE_SKILLS_DIR);
if workspace_skills.exists() {
@@ -61,6 +71,15 @@ pub fn discover_skills(
}
}
// 5. Load repo skills (skills/) - highest priority
if let Some(workspace) = workspace_dir {
let repo_skills = workspace.join(REPO_SKILLS_DIR);
if repo_skills.exists() {
debug!("Scanning repo skills directory: {}", repo_skills.display());
load_skills_from_dir(&repo_skills, &mut skills_by_name);
}
}
// Convert to sorted vector for deterministic ordering
let mut skills: Vec<Skill> = skills_by_name.into_values().collect();
skills.sort_by(|a, b| a.name.cmp(&b.name));
@@ -69,6 +88,23 @@ pub fn discover_skills(
skills
}
/// Load embedded skills into the map.
fn load_embedded_skills(skills: &mut HashMap<String, Skill>) {
for embedded in get_embedded_skills() {
match Skill::parse(embedded.skill_md, Path::new("<embedded>")) {
Ok(mut skill) => {
// Mark as embedded in the path
skill.path = format!("<embedded:{}>/{}", embedded.name, "SKILL.md");
debug!("Loaded embedded skill: {}", skill.name);
skills.insert(skill.name.clone(), skill);
}
Err(e) => {
warn!("Failed to parse embedded skill '{}': {}", embedded.name, e);
}
}
}
}
/// Load skills from a directory into the map.
/// Each subdirectory should contain a SKILL.md file.
fn load_skills_from_dir(dir: &Path, skills: &mut HashMap<String, Skill>) {
@@ -125,6 +161,11 @@ fn expand_tilde(path: &str) -> PathBuf {
PathBuf::from(expanded.as_ref())
}
/// Check if a skill is from an embedded source.
pub fn is_embedded_skill(skill: &Skill) -> bool {
skill.path.starts_with("<embedded:")
}
#[cfg(test)]
mod tests {
use super::*;
@@ -144,6 +185,16 @@ mod tests {
skill_dir
}
#[test]
fn test_discover_embedded_skills() {
// With no directories, should still find embedded skills
let skills = discover_skills(None, &[]);
// Should have at least the research skill
assert!(!skills.is_empty(), "Should have embedded skills");
assert!(skills.iter().any(|s| s.name == "research"), "Should have research skill");
}
#[test]
fn test_discover_from_workspace() {
let temp = TempDir::new().unwrap();
@@ -158,9 +209,66 @@ mod tests {
let skills = discover_skills(Some(workspace), &[]);
assert_eq!(skills.len(), 2);
assert_eq!(skills[0].name, "another-skill"); // Sorted alphabetically
assert_eq!(skills[1].name, "test-skill");
// Should have embedded + workspace skills
assert!(skills.iter().any(|s| s.name == "test-skill"));
assert!(skills.iter().any(|s| s.name == "another-skill"));
assert!(skills.iter().any(|s| s.name == "research")); // embedded
}
#[test]
fn test_discover_from_repo_skills() {
let temp = TempDir::new().unwrap();
let workspace = temp.path();
// Create repo skills directory (skills/)
let skills_dir = workspace.join("skills");
fs::create_dir_all(&skills_dir).unwrap();
create_skill_dir(&skills_dir, "repo-skill", "A repo skill");
let skills = discover_skills(Some(workspace), &[]);
assert!(skills.iter().any(|s| s.name == "repo-skill"));
}
#[test]
fn test_repo_overrides_embedded() {
let temp = TempDir::new().unwrap();
let workspace = temp.path();
// Create repo skills directory with a skill that overrides embedded
let skills_dir = workspace.join("skills");
fs::create_dir_all(&skills_dir).unwrap();
// Override the embedded research skill
create_skill_dir(&skills_dir, "research", "Custom research skill");
let skills = discover_skills(Some(workspace), &[]);
let research = skills.iter().find(|s| s.name == "research").unwrap();
assert_eq!(research.description, "Custom research skill");
assert!(!is_embedded_skill(research), "Should not be marked as embedded");
}
#[test]
fn test_repo_overrides_workspace() {
let temp = TempDir::new().unwrap();
let workspace = temp.path();
// Create workspace skill
let workspace_skills = workspace.join(".g3/skills");
fs::create_dir_all(&workspace_skills).unwrap();
create_skill_dir(&workspace_skills, "shared-skill", "Workspace version");
// Create repo skill with same name (should override)
let repo_skills = workspace.join("skills");
fs::create_dir_all(&repo_skills).unwrap();
create_skill_dir(&repo_skills, "shared-skill", "Repo version");
let skills = discover_skills(Some(workspace), &[]);
let shared = skills.iter().find(|s| s.name == "shared-skill").unwrap();
assert_eq!(shared.description, "Repo version");
}
#[test]
@@ -173,8 +281,7 @@ mod tests {
let skills = discover_skills(None, &[extra_dir]);
assert_eq!(skills.len(), 1);
assert_eq!(skills[0].name, "extra-skill");
assert!(skills.iter().any(|s| s.name == "extra-skill"));
}
#[test]
@@ -194,15 +301,15 @@ mod tests {
let skills = discover_skills(Some(workspace), &[extra_dir]);
assert_eq!(skills.len(), 1);
assert_eq!(skills[0].name, "shared-skill");
assert_eq!(skills[0].description, "Workspace version");
let shared = skills.iter().find(|s| s.name == "shared-skill").unwrap();
assert_eq!(shared.description, "Workspace version");
}
#[test]
fn test_nonexistent_directory() {
let skills = discover_skills(Some(Path::new("/nonexistent/path")), &[]);
assert!(skills.is_empty());
// Should still have embedded skills
assert!(!skills.is_empty());
}
#[test]
@@ -212,7 +319,8 @@ mod tests {
fs::create_dir_all(&skills_dir).unwrap();
let skills = discover_skills(Some(temp.path()), &[]);
assert!(skills.is_empty());
// Should still have embedded skills
assert!(!skills.is_empty());
}
#[test]
@@ -234,9 +342,9 @@ mod tests {
let skills = discover_skills(Some(temp.path()), &[]);
// Only valid skill should be loaded
assert_eq!(skills.len(), 1);
assert_eq!(skills[0].name, "valid-skill");
// Valid skill should be loaded, invalid should be skipped
assert!(skills.iter().any(|s| s.name == "valid-skill"));
assert!(!skills.iter().any(|s| s.name == "invalid-skill"));
}
#[test]
@@ -254,8 +362,7 @@ mod tests {
let skills = discover_skills(Some(temp.path()), &[]);
assert_eq!(skills.len(), 1);
assert_eq!(skills[0].name, "lowercase-skill");
assert!(skills.iter().any(|s| s.name == "lowercase-skill"));
}
#[test]
@@ -266,4 +373,11 @@ mod tests {
let no_tilde = expand_tilde("/absolute/path");
assert_eq!(no_tilde, PathBuf::from("/absolute/path"));
}
#[test]
fn test_is_embedded_skill() {
let skills = discover_skills(None, &[]);
let research = skills.iter().find(|s| s.name == "research").unwrap();
assert!(is_embedded_skill(research));
}
}

View File

@@ -0,0 +1,87 @@
//! Embedded skills - compiled into the binary for portability.
//!
//! Core skills are embedded at compile time using `include_str!`.
//! This ensures g3 works anywhere without needing external skill files.
//!
//! Priority order (highest to lowest):
//! 1. Repo `skills/` directory (on disk, checked into git)
//! 2. Workspace `.g3/skills/` directory
//! 3. Config extra_paths
//! 4. Global `~/.g3/skills/` directory
//! 5. Embedded skills (this module - always available)
use std::collections::HashMap;
/// An embedded skill with its SKILL.md content and optional scripts.
#[derive(Debug, Clone)]
pub struct EmbeddedSkill {
/// Skill name (must match the name in SKILL.md frontmatter)
pub name: &'static str,
/// Content of SKILL.md
pub skill_md: &'static str,
/// Scripts bundled with the skill: (filename, content)
pub scripts: &'static [(&'static str, &'static str)],
}
/// All embedded skills, compiled into the binary.
///
/// To add a new embedded skill:
/// 1. Create `skills/<name>/SKILL.md` in the repo
/// 2. Add an entry here with `include_str!`
static EMBEDDED_SKILLS: &[EmbeddedSkill] = &[
EmbeddedSkill {
name: "research",
skill_md: include_str!("../../../../skills/research/SKILL.md"),
scripts: &[
("g3-research", include_str!("../../../../skills/research/g3-research")),
],
},
];
/// Get all embedded skills.
pub fn get_embedded_skills() -> &'static [EmbeddedSkill] {
EMBEDDED_SKILLS
}
/// Get an embedded skill by name.
pub fn get_embedded_skill(name: &str) -> Option<&'static EmbeddedSkill> {
EMBEDDED_SKILLS.iter().find(|s| s.name == name)
}
/// Get embedded skills as a map for easy lookup.
pub fn get_embedded_skills_map() -> HashMap<&'static str, &'static EmbeddedSkill> {
EMBEDDED_SKILLS.iter().map(|s| (s.name, s)).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_embedded_skills_exist() {
let skills = get_embedded_skills();
assert!(!skills.is_empty(), "Should have at least one embedded skill");
}
#[test]
fn test_research_skill_embedded() {
let skill = get_embedded_skill("research");
assert!(skill.is_some(), "Research skill should be embedded");
let skill = skill.unwrap();
assert!(skill.skill_md.contains("name: research"), "SKILL.md should have name field");
assert!(!skill.scripts.is_empty(), "Research skill should have scripts");
}
#[test]
fn test_get_by_name() {
assert!(get_embedded_skill("research").is_some());
assert!(get_embedded_skill("nonexistent").is_none());
}
#[test]
fn test_skills_map() {
let map = get_embedded_skills_map();
assert!(map.contains_key("research"));
}
}

View File

@@ -0,0 +1,234 @@
//! Script extraction for embedded skills.
//!
//! Extracts embedded scripts to `.g3/bin/` on first use.
//! Scripts are re-extracted if the embedded version changes.
use anyhow::{Context, Result};
use std::fs;
use std::os::unix::fs::PermissionsExt;
use std::path::{Path, PathBuf};
use tracing::{debug, info};
use super::embedded::get_embedded_skill;
/// Directory where extracted scripts are placed (relative to workspace)
const BIN_DIR: &str = ".g3/bin";
/// Version file to track when scripts need re-extraction
const VERSION_FILE: &str = ".version";
/// Extract a script from an embedded skill to the bin directory.
///
/// Returns the path to the extracted script.
///
/// # Arguments
/// * `skill_name` - Name of the skill containing the script
/// * `script_name` - Name of the script file to extract
/// * `workspace_dir` - Workspace root directory
///
/// # Returns
/// Path to the extracted script, ready to execute.
pub fn extract_script(
skill_name: &str,
script_name: &str,
workspace_dir: &Path,
) -> Result<PathBuf> {
let skill = get_embedded_skill(skill_name)
.with_context(|| format!("Embedded skill '{}' not found", skill_name))?;
let script_content = skill
.scripts
.iter()
.find(|(name, _)| *name == script_name)
.map(|(_, content)| *content)
.with_context(|| format!("Script '{}' not found in skill '{}'", script_name, skill_name))?;
let bin_dir = workspace_dir.join(BIN_DIR);
fs::create_dir_all(&bin_dir)
.with_context(|| format!("Failed to create bin directory: {}", bin_dir.display()))?;
let script_path = bin_dir.join(script_name);
let version_path = bin_dir.join(format!("{}{}", script_name, VERSION_FILE));
// Check if we need to extract (script missing or version changed)
let needs_extraction = if !script_path.exists() {
debug!("Script {} does not exist, extracting", script_path.display());
true
} else if needs_update(&version_path, script_content)? {
debug!("Script {} is outdated, re-extracting", script_path.display());
true
} else {
debug!("Script {} is up to date", script_path.display());
false
};
if needs_extraction {
// Write the script
fs::write(&script_path, script_content)
.with_context(|| format!("Failed to write script: {}", script_path.display()))?;
// Make it executable (Unix only)
#[cfg(unix)]
{
let mut perms = fs::metadata(&script_path)?.permissions();
perms.set_mode(0o755);
fs::set_permissions(&script_path, perms)?;
}
// Write version file (content hash)
let hash = compute_hash(script_content);
fs::write(&version_path, hash)
.with_context(|| format!("Failed to write version file: {}", version_path.display()))?;
info!("Extracted {} to {}", script_name, script_path.display());
}
Ok(script_path)
}
/// Extract all scripts from an embedded skill.
///
/// Returns a vector of (script_name, script_path) pairs.
pub fn extract_all_scripts(
skill_name: &str,
workspace_dir: &Path,
) -> Result<Vec<(String, PathBuf)>> {
let skill = get_embedded_skill(skill_name)
.with_context(|| format!("Embedded skill '{}' not found", skill_name))?;
let mut extracted = Vec::new();
for (script_name, _) in skill.scripts {
let path = extract_script(skill_name, script_name, workspace_dir)?;
extracted.push((script_name.to_string(), path));
}
Ok(extracted)
}
/// Check if a script needs to be updated based on version file.
fn needs_update(version_path: &Path, current_content: &str) -> Result<bool> {
if !version_path.exists() {
return Ok(true);
}
let stored_hash = fs::read_to_string(version_path)
.with_context(|| format!("Failed to read version file: {}", version_path.display()))?;
let current_hash = compute_hash(current_content);
Ok(stored_hash.trim() != current_hash)
}
/// Compute a simple hash of content for version tracking.
/// Uses a fast non-cryptographic hash.
fn compute_hash(content: &str) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
content.hash(&mut hasher);
format!("{:016x}", hasher.finish())
}
/// Get the path where a script would be extracted.
/// Does not actually extract the script.
pub fn get_script_path(script_name: &str, workspace_dir: &Path) -> PathBuf {
workspace_dir.join(BIN_DIR).join(script_name)
}
/// Check if a script has been extracted.
pub fn is_script_extracted(script_name: &str, workspace_dir: &Path) -> bool {
get_script_path(script_name, workspace_dir).exists()
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_extract_research_script() {
let temp = TempDir::new().unwrap();
let result = extract_script("research", "g3-research", temp.path());
assert!(result.is_ok(), "Should extract research script: {:?}", result.err());
let script_path = result.unwrap();
assert!(script_path.exists(), "Script should exist after extraction");
// Check it's executable
#[cfg(unix)]
{
let metadata = fs::metadata(&script_path).unwrap();
let mode = metadata.permissions().mode();
assert!(mode & 0o111 != 0, "Script should be executable");
}
// Check content
let content = fs::read_to_string(&script_path).unwrap();
assert!(content.starts_with("#!/bin/bash"), "Should be a bash script");
}
#[test]
fn test_extract_idempotent() {
let temp = TempDir::new().unwrap();
// Extract twice
let path1 = extract_script("research", "g3-research", temp.path()).unwrap();
let path2 = extract_script("research", "g3-research", temp.path()).unwrap();
assert_eq!(path1, path2, "Should return same path");
}
#[test]
fn test_version_tracking() {
let temp = TempDir::new().unwrap();
// Extract
extract_script("research", "g3-research", temp.path()).unwrap();
// Version file should exist
let version_path = temp.path().join(".g3/bin/g3-research.version");
assert!(version_path.exists(), "Version file should exist");
let hash = fs::read_to_string(&version_path).unwrap();
assert!(!hash.is_empty(), "Version file should contain hash");
}
#[test]
fn test_nonexistent_skill() {
let temp = TempDir::new().unwrap();
let result = extract_script("nonexistent", "script", temp.path());
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("not found"));
}
#[test]
fn test_nonexistent_script() {
let temp = TempDir::new().unwrap();
let result = extract_script("research", "nonexistent", temp.path());
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("not found"));
}
#[test]
fn test_get_script_path() {
let workspace = Path::new("/workspace");
let path = get_script_path("g3-research", workspace);
assert_eq!(path, PathBuf::from("/workspace/.g3/bin/g3-research"));
}
#[test]
fn test_compute_hash() {
let hash1 = compute_hash("hello world");
let hash2 = compute_hash("hello world");
let hash3 = compute_hash("different content");
assert_eq!(hash1, hash2, "Same content should produce same hash");
assert_ne!(hash1, hash3, "Different content should produce different hash");
assert_eq!(hash1.len(), 16, "Hash should be 16 hex chars");
}
}

View File

@@ -22,10 +22,12 @@
//!
//! # Discovery
//!
//! Skills are discovered from:
//! 1. Global: `~/.g3/skills/` (lowest priority)
//! 2. Extra paths from config (medium priority)
//! 3. Workspace: `.g3/skills/` (highest priority, overrides others)
//! Skills are discovered from (highest to lowest priority):
//! 1. Repo: `skills/` at repo root (checked into git, overrides all)
//! 2. Workspace: `.g3/skills/` (local customizations)
//! 3. Extra paths from config
//! 4. Global: `~/.g3/skills/`
//! 5. Embedded: compiled into binary (always available)
//!
//! # Usage
//!
@@ -36,7 +38,10 @@
mod parser;
mod discovery;
mod prompt;
mod embedded;
pub mod extraction;
pub use parser::Skill;
pub use discovery::discover_skills;
pub use prompt::generate_skills_prompt;
pub use embedded::{get_embedded_skills, get_embedded_skill, EmbeddedSkill};