g3/skills/research/g3-research

#!/bin/bash
#
# g3-research - Perform web research via scout agent with filesystem handoff
#
# Usage:
#   g3-research "<query>"           Start new research
#   g3-research --status <id>       Check status of specific research
#   g3-research --list              List all research tasks
#   g3-research --help              Show this help
#
# Research results are stored in .g3/research/<id>/
#   - status.json: Machine-readable status
#   - report.md: The research brief (when complete)

set -euo pipefail

# Configuration
RESEARCH_DIR=".g3/research"
SCOUT_AGENT="scout"

# Report markers (must match scout agent output)
REPORT_START_MARKER="---SCOUT_REPORT_START---"
REPORT_END_MARKER="---SCOUT_REPORT_END---"

#######################################
# Generate a unique research ID
#######################################
generate_id() {
    local timestamp
    local random_suffix
    timestamp=$(date +%s)
    random_suffix=$(head -c 6 /dev/urandom | xxd -p | head -c 6)
    echo "research_${timestamp}_${random_suffix}"
}

#######################################
# Get current ISO 8601 timestamp
#######################################
get_timestamp() {
    date -u +"%Y-%m-%dT%H:%M:%SZ"
}

#######################################
# Write status.json file
# Arguments:
#   $1 - research directory
#   $2 - id
#   $3 - query
#   $4 - status (running|complete|failed)
#   $5 - started_at
#   $6 - completed_at (optional, use "null" for running)
#   $7 - error (optional, use "null" for success)
#######################################
write_status() {
    local dir="$1"
    local id="$2"
    local query="$3"
    local status="$4"
    local started_at="$5"
    local completed_at="$6"
    local error="$7"

    # Escape query for JSON (handle quotes and newlines)
    local escaped_query
    escaped_query=$(echo -n "$query" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\n/\\n/g')

    # Format completed_at and error as JSON values
    local completed_json
    local error_json
    if [[ "$completed_at" == "null" ]]; then
        completed_json="null"
    else
        completed_json="\"$completed_at\""
    fi
    if [[ "$error" == "null" ]]; then
        error_json="null"
    else
        # Escape error message for JSON
        local escaped_error
        escaped_error=$(echo -n "$error" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\n/\\n/g' | head -c 1000)
        error_json="\"$escaped_error\""
    fi

    cat > "${dir}/status.json" << EOF
{
  "id": "${id}",
  "query": "${escaped_query}",
  "status": "${status}",
  "started_at": "${started_at}",
  "completed_at": ${completed_json},
  "report_path": "${dir}/report.md",
  "error": ${error_json}
}
EOF
}

#######################################
# Extract report from scout output
# Arguments:
#   $1 - scout output file
# Returns:
#   Report content between markers, or empty if not found
#######################################
strip_ansi() {
    # Comprehensive ANSI escape sequence stripping
    perl -pe 's/\e\[[0-9;]*[a-zA-Z]//g; s/\e\][^\a]*\a//g; s/\e[()][AB012]//g'
}

extract_report() {
    local output_file="$1"
    local report

    # Use sed to extract content between markers
    report=$(sed -n "/${REPORT_START_MARKER}/,/${REPORT_END_MARKER}/p" "$output_file" | \
        sed "1d;\$d" | \
        strip_ansi)  # Remove markers and strip ANSI codes

    if [[ -n "$report" ]]; then
        echo "$report"
        return 0
    fi

    # Fallback: if no markers found, try to extract useful content from raw output
    # Strip ANSI escape codes and g3 UI elements
    report=$(cat "$output_file" | \
        strip_ansi | \
        grep -v '^🆕 Starting new session' | \
        grep -v '^>> agent mode' | \
        grep -v '^\[38;' | \
        grep -v '^-> ~' | \
        grep -v '^ *✓' | \
        grep -v '^📝 Auto-memory:' | \
        grep -v 'Auto-memory:' | \
        grep -v '^$' | \
        sed '/^[[:space:]]*$/d' | \
        head -500)

    if [[ -n "$report" ]]; then
        echo "$report"
        return 0
    fi
}

#######################################
# Run research
# Arguments:
#   $1 - query
#######################################
run_research() {
    local query="$1"
    local id
    local research_dir
    local started_at
    local output_file
    local exit_code

    # Generate unique ID and create directory
    id=$(generate_id)
    research_dir="${RESEARCH_DIR}/${id}"
    mkdir -p "$research_dir"

    started_at=$(get_timestamp)
    output_file="${research_dir}/scout_output.txt"

    # Write initial status
    write_status "$research_dir" "$id" "$query" "running" "$started_at" "null" "null"

    # Output the research ID immediately so caller knows where to look
    echo "{\"id\": \"${id}\", \"status\": \"running\", \"path\": \"${research_dir}\"}"

    # Find g3 binary
    local g3_bin
    if command -v g3 &> /dev/null; then
        g3_bin="g3"
    elif [[ -x "./target/release/g3" ]]; then
        g3_bin="./target/release/g3"
    elif [[ -x "./target/debug/g3" ]]; then
        g3_bin="./target/debug/g3"
    else
        write_status "$research_dir" "$id" "$query" "failed" "$started_at" "$(get_timestamp)" "g3 binary not found in PATH or target/"
        echo "{\"id\": \"${id}\", \"status\": \"failed\", \"error\": \"g3 binary not found\"}" >&2
        exit 1
    fi

    # Run scout agent and capture output
    set +e
    "$g3_bin" --agent "$SCOUT_AGENT" --new-session --quiet "$query" > "$output_file" 2>&1
    exit_code=$?
    set -e

    local completed_at
    completed_at=$(get_timestamp)

    if [[ $exit_code -ne 0 ]]; then
        # Scout failed
        local error_msg
        error_msg=$(tail -20 "$output_file" 2>/dev/null || echo "Unknown error")
        write_status "$research_dir" "$id" "$query" "failed" "$started_at" "$completed_at" "$error_msg"
        echo "{\"id\": \"${id}\", \"status\": \"failed\", \"error\": \"Scout agent exited with code ${exit_code}\"}" >&2
        exit 1
    fi

    # Extract report from output
    local report
    report=$(extract_report "$output_file")

    if [[ -z "$report" ]]; then
        write_status "$research_dir" "$id" "$query" "failed" "$started_at" "$completed_at" "Scout did not produce a valid report (missing markers)"
        echo "{\"id\": \"${id}\", \"status\": \"failed\", \"error\": \"No report markers found in output\"}" >&2
        exit 1
    fi

    # Write report to file
    echo "$report" > "${research_dir}/report.md"

    # Update status to complete
    write_status "$research_dir" "$id" "$query" "complete" "$started_at" "$completed_at" "null"

    # Clean up scout output (optional - keep for debugging)
    # rm -f "$output_file"

    echo "{\"id\": \"${id}\", \"status\": \"complete\", \"report_path\": \"${research_dir}/report.md\"}"
}

#######################################
# Check status of a specific research task
# Arguments:
#   $1 - research ID
#######################################
check_status() {
    local id="$1"
    local status_file="${RESEARCH_DIR}/${id}/status.json"

    if [[ ! -f "$status_file" ]]; then
        echo "{\"error\": \"Research task not found: ${id}\"}" >&2
        exit 1
    fi

    cat "$status_file"
}

#######################################
# List all research tasks
#######################################
list_research() {
    if [[ ! -d "$RESEARCH_DIR" ]]; then
        echo "[]"
        return
    fi

    local first=true
    echo "["

    for status_file in "${RESEARCH_DIR}"/*/status.json; do
        if [[ ! -f "$status_file" ]]; then
            continue
        fi

        if [[ "$first" == true ]]; then
            first=false
        else
            echo ","
        fi

        cat "$status_file"
    done

    echo "]"
}

#######################################
# Show help
#######################################
show_help() {
    cat << 'EOF'
g3-research - Perform web research via scout agent

USAGE:
    g3-research "<query>"           Start new research
    g3-research --status <id>       Check status of specific research
    g3-research --list              List all research tasks
    g3-research --help              Show this help

EXAMPLES:
    # Start research (run via background_process)
    g3-research "What are the best Rust HTTP client libraries?"

    # Check status
    g3-research --status research_1738700000_a1b2c3

    # List all research
    g3-research --list

OUTPUT:
    All commands output JSON for machine parsing.
    Research results are stored in .g3/research/<id>/

FILES:
    .g3/research/<id>/status.json   Machine-readable status
    .g3/research/<id>/report.md     Research brief (when complete)
EOF
}

#######################################
# Main
#######################################
main() {
    if [[ $# -eq 0 ]]; then
        show_help
        exit 1
    fi

    case "$1" in
        --help|-h)
            show_help
            ;;
        --status)
            if [[ $# -lt 2 ]]; then
                echo "{\"error\": \"Missing research ID\"}" >&2
                exit 1
            fi
            check_status "$2"
            ;;
        --list)
            list_research
            ;;
        -*)
            echo "{\"error\": \"Unknown option: $1\"}" >&2
            exit 1
            ;;
        *)
            # Treat as query
            run_research "$1"
            ;;
    esac
}

main "$@"