feat: Externalize research tool as embedded skill

Replaces the built-in research/research_status tools with a portable skill-based approach: - Add embedded skills infrastructure (skills compiled into binary) - Add repo-local skills/ directory support (highest priority) - Create research skill with SKILL.md and g3-research shell script - Script extraction to .g3/bin/ with version tracking - Filesystem-based handoff via .g3/research/<id>/status.json - Remove PendingResearchManager and all research tool code - Update system prompt to reference skill instead of tool Benefits: - No special tool infrastructure needed (just shell + read_file) - Context-efficient (reports stay on disk until needed) - Crash-resilient (state persisted to filesystem) - Portable (skill can be overridden per-workspace) Breaking change: research tool calls now return a deprecation message pointing to the research skill.
2026-02-05 13:23:26 +11:00
parent bf9e3dc878
commit 39e586982c
19 changed files with 949 additions and 1638 deletions
--- a/skills/research/g3-research
+++ b/skills/research/g3-research
@@ -0,0 +1,306 @@
+#!/bin/bash
+#
+# g3-research - Perform web research via scout agent with filesystem handoff
+#
+# Usage:
+#   g3-research "<query>"           Start new research
+#   g3-research --status <id>       Check status of specific research
+#   g3-research --list              List all research tasks
+#   g3-research --help              Show this help
+#
+# Research results are stored in .g3/research/<id>/
+#   - status.json: Machine-readable status
+#   - report.md: The research brief (when complete)
+
+set -euo pipefail
+
+# Configuration
+RESEARCH_DIR=".g3/research"
+SCOUT_AGENT="scout"
+
+# Report markers (must match scout agent output)
+REPORT_START_MARKER="---SCOUT_REPORT_START---"
+REPORT_END_MARKER="---SCOUT_REPORT_END---"
+
+#######################################
+# Generate a unique research ID
+#######################################
+generate_id() {
+    local timestamp
+    local random_suffix
+    timestamp=$(date +%s)
+    random_suffix=$(head -c 6 /dev/urandom | xxd -p | head -c 6)
+    echo "research_${timestamp}_${random_suffix}"
+}
+
+#######################################
+# Get current ISO 8601 timestamp
+#######################################
+get_timestamp() {
+    date -u +"%Y-%m-%dT%H:%M:%SZ"
+}
+
+#######################################
+# Write status.json file
+# Arguments:
+#   $1 - research directory
+#   $2 - id
+#   $3 - query
+#   $4 - status (running|complete|failed)
+#   $5 - started_at
+#   $6 - completed_at (optional, use "null" for running)
+#   $7 - error (optional, use "null" for success)
+#######################################
+write_status() {
+    local dir="$1"
+    local id="$2"
+    local query="$3"
+    local status="$4"
+    local started_at="$5"
+    local completed_at="$6"
+    local error="$7"
+    
+    # Escape query for JSON (handle quotes and newlines)
+    local escaped_query
+    escaped_query=$(echo -n "$query" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\n/\\n/g')
+    
+    # Format completed_at and error as JSON values
+    local completed_json
+    local error_json
+    if [[ "$completed_at" == "null" ]]; then
+        completed_json="null"
+    else
+        completed_json="\"$completed_at\""
+    fi
+    if [[ "$error" == "null" ]]; then
+        error_json="null"
+    else
+        # Escape error message for JSON
+        local escaped_error
+        escaped_error=$(echo -n "$error" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\n/\\n/g' | head -c 1000)
+        error_json="\"$escaped_error\""
+    fi
+    
+    cat > "${dir}/status.json" << EOF
+{
+  "id": "${id}",
+  "query": "${escaped_query}",
+  "status": "${status}",
+  "started_at": "${started_at}",
+  "completed_at": ${completed_json},
+  "report_path": "${dir}/report.md",
+  "error": ${error_json}
+}
+EOF
+}
+
+#######################################
+# Extract report from scout output
+# Arguments:
+#   $1 - scout output file
+# Returns:
+#   Report content between markers, or empty if not found
+#######################################
+extract_report() {
+    local output_file="$1"
+    
+    # Use sed to extract content between markers
+    sed -n "/${REPORT_START_MARKER}/,/${REPORT_END_MARKER}/p" "$output_file" | \
+        sed "1d;\$d"  # Remove first and last lines (the markers)
+}
+
+#######################################
+# Run research
+# Arguments:
+#   $1 - query
+#######################################
+run_research() {
+    local query="$1"
+    local id
+    local research_dir
+    local started_at
+    local output_file
+    local exit_code
+    
+    # Generate unique ID and create directory
+    id=$(generate_id)
+    research_dir="${RESEARCH_DIR}/${id}"
+    mkdir -p "$research_dir"
+    
+    started_at=$(get_timestamp)
+    output_file="${research_dir}/scout_output.txt"
+    
+    # Write initial status
+    write_status "$research_dir" "$id" "$query" "running" "$started_at" "null" "null"
+    
+    # Output the research ID immediately so caller knows where to look
+    echo "{\"id\": \"${id}\", \"status\": \"running\", \"path\": \"${research_dir}\"}"
+    
+    # Find g3 binary
+    local g3_bin
+    if command -v g3 &> /dev/null; then
+        g3_bin="g3"
+    elif [[ -x "./target/release/g3" ]]; then
+        g3_bin="./target/release/g3"
+    elif [[ -x "./target/debug/g3" ]]; then
+        g3_bin="./target/debug/g3"
+    else
+        write_status "$research_dir" "$id" "$query" "failed" "$started_at" "$(get_timestamp)" "g3 binary not found in PATH or target/"
+        echo "{\"id\": \"${id}\", \"status\": \"failed\", \"error\": \"g3 binary not found\"}" >&2
+        exit 1
+    fi
+    
+    # Run scout agent and capture output
+    set +e
+    "$g3_bin" --agent "$SCOUT_AGENT" --new-session --quiet "$query" > "$output_file" 2>&1
+    exit_code=$?
+    set -e
+    
+    local completed_at
+    completed_at=$(get_timestamp)
+    
+    if [[ $exit_code -ne 0 ]]; then
+        # Scout failed
+        local error_msg
+        error_msg=$(tail -20 "$output_file" 2>/dev/null || echo "Unknown error")
+        write_status "$research_dir" "$id" "$query" "failed" "$started_at" "$completed_at" "$error_msg"
+        echo "{\"id\": \"${id}\", \"status\": \"failed\", \"error\": \"Scout agent exited with code ${exit_code}\"}" >&2
+        exit 1
+    fi
+    
+    # Extract report from output
+    local report
+    report=$(extract_report "$output_file")
+    
+    if [[ -z "$report" ]]; then
+        write_status "$research_dir" "$id" "$query" "failed" "$started_at" "$completed_at" "Scout did not produce a valid report (missing markers)"
+        echo "{\"id\": \"${id}\", \"status\": \"failed\", \"error\": \"No report markers found in output\"}" >&2
+        exit 1
+    fi
+    
+    # Write report to file
+    echo "$report" > "${research_dir}/report.md"
+    
+    # Update status to complete
+    write_status "$research_dir" "$id" "$query" "complete" "$started_at" "$completed_at" "null"
+    
+    # Clean up scout output (optional - keep for debugging)
+    # rm -f "$output_file"
+    
+    echo "{\"id\": \"${id}\", \"status\": \"complete\", \"report_path\": \"${research_dir}/report.md\"}"
+}
+
+#######################################
+# Check status of a specific research task
+# Arguments:
+#   $1 - research ID
+#######################################
+check_status() {
+    local id="$1"
+    local status_file="${RESEARCH_DIR}/${id}/status.json"
+    
+    if [[ ! -f "$status_file" ]]; then
+        echo "{\"error\": \"Research task not found: ${id}\"}" >&2
+        exit 1
+    fi
+    
+    cat "$status_file"
+}
+
+#######################################
+# List all research tasks
+#######################################
+list_research() {
+    if [[ ! -d "$RESEARCH_DIR" ]]; then
+        echo "[]"
+        return
+    fi
+    
+    local first=true
+    echo "["
+    
+    for status_file in "${RESEARCH_DIR}"/*/status.json; do
+        if [[ ! -f "$status_file" ]]; then
+            continue
+        fi
+        
+        if [[ "$first" == true ]]; then
+            first=false
+        else
+            echo ","
+        fi
+        
+        cat "$status_file"
+    done
+    
+    echo "]"
+}
+
+#######################################
+# Show help
+#######################################
+show_help() {
+    cat << 'EOF'
+g3-research - Perform web research via scout agent
+
+USAGE:
+    g3-research "<query>"           Start new research
+    g3-research --status <id>       Check status of specific research
+    g3-research --list              List all research tasks
+    g3-research --help              Show this help
+
+EXAMPLES:
+    # Start research (run via background_process)
+    g3-research "What are the best Rust HTTP client libraries?"
+
+    # Check status
+    g3-research --status research_1738700000_a1b2c3
+
+    # List all research
+    g3-research --list
+
+OUTPUT:
+    All commands output JSON for machine parsing.
+    Research results are stored in .g3/research/<id>/
+
+FILES:
+    .g3/research/<id>/status.json   Machine-readable status
+    .g3/research/<id>/report.md     Research brief (when complete)
+EOF
+}
+
+#######################################
+# Main
+#######################################
+main() {
+    if [[ $# -eq 0 ]]; then
+        show_help
+        exit 1
+    fi
+    
+    case "$1" in
+        --help|-h)
+            show_help
+            ;;
+        --status)
+            if [[ $# -lt 2 ]]; then
+                echo "{\"error\": \"Missing research ID\"}" >&2
+                exit 1
+            fi
+            check_status "$2"
+            ;;
+        --list)
+            list_research
+            ;;
+        -*)
+            echo "{\"error\": \"Unknown option: $1\"}" >&2
+            exit 1
+            ;;
+        *)
+            # Treat as query
+            run_research "$1"
+            ;;
+    esac
+}
+
+main "$@"