feat: Externalize research tool as embedded skill

Replaces the built-in research/research_status tools with a portable
skill-based approach:

- Add embedded skills infrastructure (skills compiled into binary)
- Add repo-local skills/ directory support (highest priority)
- Create research skill with SKILL.md and g3-research shell script
- Script extraction to .g3/bin/ with version tracking
- Filesystem-based handoff via .g3/research/<id>/status.json
- Remove PendingResearchManager and all research tool code
- Update system prompt to reference skill instead of tool

Benefits:
- No special tool infrastructure needed (just shell + read_file)
- Context-efficient (reports stay on disk until needed)
- Crash-resilient (state persisted to filesystem)
- Portable (skill can be overridden per-workspace)

Breaking change: research tool calls now return a deprecation message
pointing to the research skill.
This commit is contained in:
Dhanji R. Prasanna
2026-02-05 13:23:26 +11:00
parent bf9e3dc878
commit 39e586982c
19 changed files with 949 additions and 1638 deletions

306
skills/research/g3-research Executable file
View File

@@ -0,0 +1,306 @@
#!/bin/bash
#
# g3-research - Perform web research via scout agent with filesystem handoff
#
# Usage:
# g3-research "<query>" Start new research
# g3-research --status <id> Check status of specific research
# g3-research --list List all research tasks
# g3-research --help Show this help
#
# Research results are stored in .g3/research/<id>/
# - status.json: Machine-readable status
# - report.md: The research brief (when complete)
set -euo pipefail
# Configuration
RESEARCH_DIR=".g3/research"
SCOUT_AGENT="scout"
# Report markers (must match scout agent output)
REPORT_START_MARKER="---SCOUT_REPORT_START---"
REPORT_END_MARKER="---SCOUT_REPORT_END---"
#######################################
# Generate a unique research ID
#######################################
generate_id() {
local timestamp
local random_suffix
timestamp=$(date +%s)
random_suffix=$(head -c 6 /dev/urandom | xxd -p | head -c 6)
echo "research_${timestamp}_${random_suffix}"
}
#######################################
# Get current ISO 8601 timestamp
#######################################
get_timestamp() {
date -u +"%Y-%m-%dT%H:%M:%SZ"
}
#######################################
# Write status.json file
# Arguments:
# $1 - research directory
# $2 - id
# $3 - query
# $4 - status (running|complete|failed)
# $5 - started_at
# $6 - completed_at (optional, use "null" for running)
# $7 - error (optional, use "null" for success)
#######################################
write_status() {
local dir="$1"
local id="$2"
local query="$3"
local status="$4"
local started_at="$5"
local completed_at="$6"
local error="$7"
# Escape query for JSON (handle quotes and newlines)
local escaped_query
escaped_query=$(echo -n "$query" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\n/\\n/g')
# Format completed_at and error as JSON values
local completed_json
local error_json
if [[ "$completed_at" == "null" ]]; then
completed_json="null"
else
completed_json="\"$completed_at\""
fi
if [[ "$error" == "null" ]]; then
error_json="null"
else
# Escape error message for JSON
local escaped_error
escaped_error=$(echo -n "$error" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\n/\\n/g' | head -c 1000)
error_json="\"$escaped_error\""
fi
cat > "${dir}/status.json" << EOF
{
"id": "${id}",
"query": "${escaped_query}",
"status": "${status}",
"started_at": "${started_at}",
"completed_at": ${completed_json},
"report_path": "${dir}/report.md",
"error": ${error_json}
}
EOF
}
#######################################
# Extract report from scout output
# Arguments:
# $1 - scout output file
# Returns:
# Report content between markers, or empty if not found
#######################################
extract_report() {
local output_file="$1"
# Use sed to extract content between markers
sed -n "/${REPORT_START_MARKER}/,/${REPORT_END_MARKER}/p" "$output_file" | \
sed "1d;\$d" # Remove first and last lines (the markers)
}
#######################################
# Run research
# Arguments:
# $1 - query
#######################################
run_research() {
local query="$1"
local id
local research_dir
local started_at
local output_file
local exit_code
# Generate unique ID and create directory
id=$(generate_id)
research_dir="${RESEARCH_DIR}/${id}"
mkdir -p "$research_dir"
started_at=$(get_timestamp)
output_file="${research_dir}/scout_output.txt"
# Write initial status
write_status "$research_dir" "$id" "$query" "running" "$started_at" "null" "null"
# Output the research ID immediately so caller knows where to look
echo "{\"id\": \"${id}\", \"status\": \"running\", \"path\": \"${research_dir}\"}"
# Find g3 binary
local g3_bin
if command -v g3 &> /dev/null; then
g3_bin="g3"
elif [[ -x "./target/release/g3" ]]; then
g3_bin="./target/release/g3"
elif [[ -x "./target/debug/g3" ]]; then
g3_bin="./target/debug/g3"
else
write_status "$research_dir" "$id" "$query" "failed" "$started_at" "$(get_timestamp)" "g3 binary not found in PATH or target/"
echo "{\"id\": \"${id}\", \"status\": \"failed\", \"error\": \"g3 binary not found\"}" >&2
exit 1
fi
# Run scout agent and capture output
set +e
"$g3_bin" --agent "$SCOUT_AGENT" --new-session --quiet "$query" > "$output_file" 2>&1
exit_code=$?
set -e
local completed_at
completed_at=$(get_timestamp)
if [[ $exit_code -ne 0 ]]; then
# Scout failed
local error_msg
error_msg=$(tail -20 "$output_file" 2>/dev/null || echo "Unknown error")
write_status "$research_dir" "$id" "$query" "failed" "$started_at" "$completed_at" "$error_msg"
echo "{\"id\": \"${id}\", \"status\": \"failed\", \"error\": \"Scout agent exited with code ${exit_code}\"}" >&2
exit 1
fi
# Extract report from output
local report
report=$(extract_report "$output_file")
if [[ -z "$report" ]]; then
write_status "$research_dir" "$id" "$query" "failed" "$started_at" "$completed_at" "Scout did not produce a valid report (missing markers)"
echo "{\"id\": \"${id}\", \"status\": \"failed\", \"error\": \"No report markers found in output\"}" >&2
exit 1
fi
# Write report to file
echo "$report" > "${research_dir}/report.md"
# Update status to complete
write_status "$research_dir" "$id" "$query" "complete" "$started_at" "$completed_at" "null"
# Clean up scout output (optional - keep for debugging)
# rm -f "$output_file"
echo "{\"id\": \"${id}\", \"status\": \"complete\", \"report_path\": \"${research_dir}/report.md\"}"
}
#######################################
# Check status of a specific research task
# Arguments:
# $1 - research ID
#######################################
check_status() {
local id="$1"
local status_file="${RESEARCH_DIR}/${id}/status.json"
if [[ ! -f "$status_file" ]]; then
echo "{\"error\": \"Research task not found: ${id}\"}" >&2
exit 1
fi
cat "$status_file"
}
#######################################
# List all research tasks
#######################################
list_research() {
if [[ ! -d "$RESEARCH_DIR" ]]; then
echo "[]"
return
fi
local first=true
echo "["
for status_file in "${RESEARCH_DIR}"/*/status.json; do
if [[ ! -f "$status_file" ]]; then
continue
fi
if [[ "$first" == true ]]; then
first=false
else
echo ","
fi
cat "$status_file"
done
echo "]"
}
#######################################
# Show help
#######################################
show_help() {
cat << 'EOF'
g3-research - Perform web research via scout agent
USAGE:
g3-research "<query>" Start new research
g3-research --status <id> Check status of specific research
g3-research --list List all research tasks
g3-research --help Show this help
EXAMPLES:
# Start research (run via background_process)
g3-research "What are the best Rust HTTP client libraries?"
# Check status
g3-research --status research_1738700000_a1b2c3
# List all research
g3-research --list
OUTPUT:
All commands output JSON for machine parsing.
Research results are stored in .g3/research/<id>/
FILES:
.g3/research/<id>/status.json Machine-readable status
.g3/research/<id>/report.md Research brief (when complete)
EOF
}
#######################################
# Main
#######################################
main() {
if [[ $# -eq 0 ]]; then
show_help
exit 1
fi
case "$1" in
--help|-h)
show_help
;;
--status)
if [[ $# -lt 2 ]]; then
echo "{\"error\": \"Missing research ID\"}" >&2
exit 1
fi
check_status "$2"
;;
--list)
list_research
;;
-*)
echo "{\"error\": \"Unknown option: $1\"}" >&2
exit 1
;;
*)
# Treat as query
run_research "$1"
;;
esac
}
main "$@"