Merge sessions/interactive/9681cb67

This commit is contained in:
Dhanji R. Prasanna
2026-01-30 13:01:00 +11:00
11 changed files with 797 additions and 81 deletions

View File

@@ -1,5 +1,5 @@
# Workspace Memory
> Updated: 2026-01-27T04:00:00Z | Size: ~12k chars
> Updated: 2026-01-30T01:10:54Z | Size: 13.2k chars
### Remember Tool Wiring
- `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()`
@@ -210,3 +210,34 @@ context_length = 32768
max_tokens = 4096
gpu_layers = 99
```
### Async Research Tool
Research tool is asynchronous - spawns scout agent in background, returns immediately with research_id.
- `crates/g3-core/src/pending_research.rs`
- `PendingResearchManager` [80..100] - thread-safe task storage (Arc<Mutex<HashMap>>)
- `ResearchTask` [40..75] - id, query, status, result, started_at, injected
- `ResearchStatus` [20..35] - Pending, Complete, Failed enum
- `register()` [110..125] - creates task, returns research_id
- `complete()` / `fail()` [130..150] - update task status
- `take_completed()` [180..200] - returns completed tasks, marks as injected
- `list_all()` [165..170] - returns all tasks for /research command
- `crates/g3-core/src/tools/research.rs`
- `execute_research()` [150..210] - spawns scout in tokio::spawn, returns placeholder
- `run_scout_agent()` [215..300] - async fn that runs in background task
- `execute_research_status()` [305..380] - check status of pending research
- `crates/g3-core/src/lib.rs`
- `inject_completed_research()` [1080..1120] - injects completed research into context
- Called at start of each tool iteration and before user prompt in interactive mode
- `crates/g3-cli/src/commands.rs`
- `/research` command [125..160] - lists all research tasks with status
**Flow:**
1. Agent calls `research(query)` → returns immediately with research_id
2. Scout agent runs in background tokio task
3. On completion, `PendingResearchManager.complete()` stores result
4. At next iteration start or user prompt, `inject_completed_research()` adds to context
5. Agent can check status with `research_status` tool or user with `/research` command

View File

@@ -38,6 +38,7 @@ pub async fn handle_command<W: UiWriter>(
output.print(" /fragments - List dehydrated context fragments (ACD)");
output.print(" /rehydrate - Restore a dehydrated fragment by ID");
output.print(" /resume - List and switch to a previous session");
output.print(" /research - List pending/completed research tasks");
output.print(" /project <path> - Load a project from the given absolute path");
output.print(" /unproject - Unload the current project and reset context");
output.print(" /dump - Dump entire context window to file for debugging");
@@ -130,6 +131,42 @@ pub async fn handle_command<W: UiWriter>(
}
Ok(true)
}
"/research" => {
let manager = agent.get_pending_research_manager();
let all_tasks = manager.list_all();
if all_tasks.is_empty() {
output.print("📋 No research tasks (pending or completed).");
} else {
output.print(&format!("📋 Research Tasks ({} total):\n", all_tasks.len()));
for task in all_tasks {
let status_emoji = match task.status {
g3_core::pending_research::ResearchStatus::Pending => "🔄",
g3_core::pending_research::ResearchStatus::Complete => "",
g3_core::pending_research::ResearchStatus::Failed => "",
};
let injected_marker = if task.injected { " (injected)" } else { "" };
output.print(&format!(
" {} `{}` - {} ({}){}\n Query: {}",
status_emoji,
task.id,
task.status,
task.elapsed_display(),
injected_marker,
if task.query.len() > 60 {
format!("{}...", &task.query.chars().take(57).collect::<String>())
} else {
task.query.clone()
}
));
output.print("");
}
}
Ok(true)
}
cmd if cmd.starts_with("/run") => {
let parts: Vec<&str> = cmd.splitn(2, ' ').collect();
if parts.len() < 2 || parts[1].trim().is_empty() {

View File

@@ -211,6 +211,14 @@ pub async fn run_interactive<W: UiWriter>(
// Display context window progress bar before each prompt
display_context_progress(&agent, &output);
// Check for completed research and inject into context
// This happens before prompting the user for input
let injected_count = agent.inject_completed_research();
if injected_count > 0 {
println!("📋 {} research result(s) ready - injected into context", injected_count);
println!();
}
// Build prompt
let prompt = build_prompt(in_multiline, agent_name, &active_project);

View File

@@ -6,6 +6,7 @@ pub mod context_window;
pub mod error_handling;
pub mod feedback_extraction;
pub mod paths;
pub mod pending_research;
pub mod project;
pub mod provider_config;
pub mod provider_registration;
@@ -150,6 +151,8 @@ pub struct Agent<W: UiWriter> {
auto_memory: bool,
/// Whether aggressive context dehydration is enabled (--acd flag)
acd_enabled: bool,
/// Manager for async research tasks
pending_research_manager: pending_research::PendingResearchManager,
}
impl<W: UiWriter> Agent<W> {
@@ -203,6 +206,7 @@ impl<W: UiWriter> Agent<W> {
agent_name: None,
auto_memory: false,
acd_enabled: false,
pending_research_manager: pending_research::PendingResearchManager::new(),
}
}
@@ -1071,6 +1075,51 @@ impl<W: UiWriter> Agent<W> {
self.context_window.add_message(message);
}
/// Check for completed research tasks and inject them into the context.
///
/// This should be called at natural break points:
/// - End of each tool iteration (before next LLM call)
/// - Before prompting user in interactive mode
///
/// Returns the number of research results injected.
pub fn inject_completed_research(&mut self) -> usize {
let completed = self.pending_research_manager.take_completed();
if completed.is_empty() {
return 0;
}
for task in &completed {
let message_content = match task.status {
pending_research::ResearchStatus::Complete => {
format!(
"📋 **Research completed** (id: `{}`): {}\n\n{}",
task.id,
task.query,
task.result.as_deref().unwrap_or("No result available")
)
}
pending_research::ResearchStatus::Failed => {
format!(
"❌ **Research failed** (id: `{}`): {}\n\nError: {}",
task.id,
task.query,
task.result.as_deref().unwrap_or("Unknown error")
)
}
pending_research::ResearchStatus::Pending => continue, // Skip pending tasks
};
// Inject as a user message so the agent sees and responds to it
let message = Message::new(MessageRole::User, message_content);
self.context_window.add_message(message);
debug!("Injected research result for task {}", task.id);
}
completed.len()
}
/// Execute a tool call and return the result.
/// This is a public wrapper around execute_tool for use by external callers
/// like the planner's fast-discovery feature.
@@ -1431,6 +1480,10 @@ impl<W: UiWriter> Agent<W> {
&self.config
}
pub fn get_pending_research_manager(&self) -> &pending_research::PendingResearchManager {
&self.pending_research_manager
}
pub fn set_requirements_sha(&mut self, sha: String) {
self.requirements_sha = Some(sha);
}
@@ -2043,6 +2096,14 @@ Skip if nothing new. Be brief."#;
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
}
// Check for completed research and inject into context
// This happens at the start of each iteration, before the LLM call
let injected_count = self.inject_completed_research();
if injected_count > 0 {
debug!("Injected {} completed research result(s) into context", injected_count);
self.ui_writer.println(&format!("📋 {} research result(s) ready and injected into context", injected_count));
}
// Get provider info for logging, then drop it to avoid borrow issues
let (provider_name, provider_model) = {
let provider = self.providers.get(None)?;
@@ -2852,6 +2913,7 @@ Skip if nothing new. Be brief."#;
requirements_sha: self.requirements_sha.as_deref(),
context_total_tokens: self.context_window.total_tokens,
context_used_tokens: self.context_window.used_tokens,
pending_research_manager: &self.pending_research_manager,
};
// Dispatch to the appropriate tool handler

View File

@@ -0,0 +1,436 @@
//! Pending research manager for async research tasks.
//!
//! This module manages research tasks that run in the background while the agent
//! continues with other work. Research results are stored until they can be
//! injected into the conversation at a natural break point.
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant};
use tracing::debug;
/// Unique identifier for a research task
pub type ResearchId = String;
/// Status of a research task
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum ResearchStatus {
/// Research is in progress
Pending,
/// Research completed successfully
Complete,
/// Research failed with an error
Failed,
}
impl std::fmt::Display for ResearchStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ResearchStatus::Pending => write!(f, "pending"),
ResearchStatus::Complete => write!(f, "complete"),
ResearchStatus::Failed => write!(f, "failed"),
}
}
}
/// A research task being tracked by the manager
#[derive(Debug, Clone)]
pub struct ResearchTask {
/// Unique identifier for this research task
pub id: ResearchId,
/// The original research query
pub query: String,
/// Current status of the research
pub status: ResearchStatus,
/// The research result (report or error message)
pub result: Option<String>,
/// When the research was initiated
pub started_at: Instant,
/// Whether this result has been injected into the conversation
pub injected: bool,
}
impl ResearchTask {
/// Create a new pending research task
pub fn new(id: ResearchId, query: String) -> Self {
Self {
id,
query,
status: ResearchStatus::Pending,
result: None,
started_at: Instant::now(),
injected: false,
}
}
/// Get the elapsed time since the research started
pub fn elapsed(&self) -> Duration {
self.started_at.elapsed()
}
/// Format elapsed time for display
pub fn elapsed_display(&self) -> String {
let secs = self.elapsed().as_secs();
if secs < 60 {
format!("{}s", secs)
} else {
format!("{}m {}s", secs / 60, secs % 60)
}
}
}
/// Thread-safe manager for pending research tasks
#[derive(Debug, Clone)]
pub struct PendingResearchManager {
tasks: Arc<Mutex<HashMap<ResearchId, ResearchTask>>>,
}
impl Default for PendingResearchManager {
fn default() -> Self {
Self::new()
}
}
impl PendingResearchManager {
/// Create a new pending research manager
pub fn new() -> Self {
Self {
tasks: Arc::new(Mutex::new(HashMap::new())),
}
}
/// Generate a unique research ID
pub fn generate_id() -> ResearchId {
use std::time::{SystemTime, UNIX_EPOCH};
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_millis();
// Use timestamp + random suffix for uniqueness
format!("research_{:x}_{:04x}", timestamp, rand::random::<u16>())
}
/// Register a new research task
///
/// Returns the research ID for tracking
pub fn register(&self, query: &str) -> ResearchId {
let id = Self::generate_id();
let task = ResearchTask::new(id.clone(), query.to_string());
let mut tasks = self.tasks.lock().unwrap();
tasks.insert(id.clone(), task);
debug!("Registered research task: {} for query: {}", id, query);
id
}
/// Update a research task with its result
pub fn complete(&self, id: &ResearchId, result: String) {
let mut tasks = self.tasks.lock().unwrap();
if let Some(task) = tasks.get_mut(id) {
task.status = ResearchStatus::Complete;
task.result = Some(result);
debug!("Research task {} completed successfully", id);
}
}
/// Mark a research task as failed
pub fn fail(&self, id: &ResearchId, error: String) {
let mut tasks = self.tasks.lock().unwrap();
if let Some(task) = tasks.get_mut(id) {
task.status = ResearchStatus::Failed;
task.result = Some(error);
debug!("Research task {} failed", id);
}
}
/// Get the status of a specific research task
pub fn get_status(&self, id: &ResearchId) -> Option<(ResearchStatus, Option<String>)> {
let tasks = self.tasks.lock().unwrap();
tasks.get(id).map(|t| (t.status.clone(), t.result.clone()))
}
/// Get a specific research task
pub fn get(&self, id: &ResearchId) -> Option<ResearchTask> {
let tasks = self.tasks.lock().unwrap();
tasks.get(id).cloned()
}
/// List all pending (not yet injected) research tasks
pub fn list_pending(&self) -> Vec<ResearchTask> {
let tasks = self.tasks.lock().unwrap();
tasks
.values()
.filter(|t| !t.injected)
.cloned()
.collect()
}
/// List all research tasks (including injected ones)
pub fn list_all(&self) -> Vec<ResearchTask> {
let tasks = self.tasks.lock().unwrap();
tasks.values().cloned().collect()
}
/// Get count of pending (in-progress) research tasks
pub fn pending_count(&self) -> usize {
let tasks = self.tasks.lock().unwrap();
tasks
.values()
.filter(|t| t.status == ResearchStatus::Pending)
.count()
}
/// Get count of completed but not yet injected research tasks
pub fn ready_count(&self) -> usize {
let tasks = self.tasks.lock().unwrap();
tasks
.values()
.filter(|t| !t.injected && t.status != ResearchStatus::Pending)
.count()
}
/// Take all completed research tasks that haven't been injected yet
///
/// Marks them as injected so they won't be returned again
pub fn take_completed(&self) -> Vec<ResearchTask> {
let mut tasks = self.tasks.lock().unwrap();
let mut completed = Vec::new();
for task in tasks.values_mut() {
if !task.injected && task.status != ResearchStatus::Pending {
task.injected = true;
completed.push(task.clone());
}
}
debug!("Took {} completed research tasks for injection", completed.len());
completed
}
/// Remove a research task (e.g., after it's been fully processed)
pub fn remove(&self, id: &ResearchId) -> Option<ResearchTask> {
let mut tasks = self.tasks.lock().unwrap();
tasks.remove(id)
}
/// Clear all completed and injected tasks (cleanup)
pub fn cleanup_injected(&self) {
let mut tasks = self.tasks.lock().unwrap();
tasks.retain(|_, t| !t.injected);
}
/// Check if there are any tasks (pending or ready)
pub fn has_tasks(&self) -> bool {
let tasks = self.tasks.lock().unwrap();
!tasks.is_empty()
}
/// Format a summary of pending research for display
pub fn format_status_summary(&self) -> Option<String> {
let tasks = self.tasks.lock().unwrap();
let pending: Vec<_> = tasks.values().filter(|t| t.status == ResearchStatus::Pending).collect();
let ready: Vec<_> = tasks.values().filter(|t| !t.injected && t.status != ResearchStatus::Pending).collect();
if pending.is_empty() && ready.is_empty() {
return None;
}
let mut parts = Vec::new();
if !pending.is_empty() {
parts.push(format!("🔍 {} researching", pending.len()));
}
if !ready.is_empty() {
parts.push(format!("📋 {} ready", ready.len()));
}
Some(parts.join(" | "))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread;
use std::time::Duration;
#[test]
fn test_register_and_get() {
let manager = PendingResearchManager::new();
let id = manager.register("How to use tokio?");
let task = manager.get(&id).unwrap();
assert_eq!(task.query, "How to use tokio?");
assert_eq!(task.status, ResearchStatus::Pending);
assert!(task.result.is_none());
assert!(!task.injected);
}
#[test]
fn test_complete_research() {
let manager = PendingResearchManager::new();
let id = manager.register("Test query");
manager.complete(&id, "Research report here".to_string());
let (status, result) = manager.get_status(&id).unwrap();
assert_eq!(status, ResearchStatus::Complete);
assert_eq!(result.unwrap(), "Research report here");
}
#[test]
fn test_fail_research() {
let manager = PendingResearchManager::new();
let id = manager.register("Test query");
manager.fail(&id, "Connection timeout".to_string());
let (status, result) = manager.get_status(&id).unwrap();
assert_eq!(status, ResearchStatus::Failed);
assert_eq!(result.unwrap(), "Connection timeout");
}
#[test]
fn test_take_completed() {
let manager = PendingResearchManager::new();
let id1 = manager.register("Query 1");
let id2 = manager.register("Query 2");
let id3 = manager.register("Query 3");
// Complete two, leave one pending
manager.complete(&id1, "Report 1".to_string());
manager.fail(&id2, "Error".to_string());
// id3 stays pending
// Take completed
let completed = manager.take_completed();
assert_eq!(completed.len(), 2);
// Taking again should return empty (already injected)
let completed_again = manager.take_completed();
assert!(completed_again.is_empty());
// Pending count should be 1
assert_eq!(manager.pending_count(), 1);
}
#[test]
fn test_list_pending() {
let manager = PendingResearchManager::new();
let id1 = manager.register("Query 1");
let id2 = manager.register("Query 2");
manager.complete(&id1, "Report".to_string());
// Both should be in list_pending (not injected yet)
let pending = manager.list_pending();
assert_eq!(pending.len(), 2);
// Take completed
manager.take_completed();
// Now only the actually pending one should be listed
let pending = manager.list_pending();
assert_eq!(pending.len(), 1);
assert_eq!(pending[0].id, id2);
}
#[test]
fn test_format_status_summary() {
let manager = PendingResearchManager::new();
// Empty - no summary
assert!(manager.format_status_summary().is_none());
// One pending
let id1 = manager.register("Query 1");
let summary = manager.format_status_summary().unwrap();
assert!(summary.contains("1 researching"));
// One pending, one ready
let id2 = manager.register("Query 2");
manager.complete(&id2, "Report".to_string());
let summary = manager.format_status_summary().unwrap();
assert!(summary.contains("1 researching"));
assert!(summary.contains("1 ready"));
}
#[test]
fn test_thread_safety() {
let manager = PendingResearchManager::new();
let manager_clone = manager.clone();
let id = manager.register("Concurrent test");
let id_clone = id.clone();
let handle = thread::spawn(move || {
thread::sleep(Duration::from_millis(10));
manager_clone.complete(&id_clone, "Result from thread".to_string());
});
// Main thread checks status
loop {
if let Some((status, _)) = manager.get_status(&id) {
if status == ResearchStatus::Complete {
break;
}
}
thread::sleep(Duration::from_millis(5));
}
handle.join().unwrap();
let (status, result) = manager.get_status(&id).unwrap();
assert_eq!(status, ResearchStatus::Complete);
assert_eq!(result.unwrap(), "Result from thread");
}
#[test]
fn test_elapsed_display() {
let manager = PendingResearchManager::new();
let id = manager.register("Test");
let task = manager.get(&id).unwrap();
let display = task.elapsed_display();
// Should be "0s" or similar (just started)
assert!(display.contains('s'));
}
#[test]
fn test_cleanup_injected() {
let manager = PendingResearchManager::new();
let id1 = manager.register("Query 1");
let id2 = manager.register("Query 2");
manager.complete(&id1, "Report 1".to_string());
manager.complete(&id2, "Report 2".to_string());
// Take and inject
manager.take_completed();
// Both should still exist
assert_eq!(manager.list_all().len(), 2);
// Cleanup injected
manager.cleanup_injected();
// Should be empty now
assert_eq!(manager.list_all().len(), 0);
}
#[test]
fn test_generate_id_uniqueness() {
let ids: Vec<_> = (0..100).map(|_| PendingResearchManager::generate_id()).collect();
let unique: std::collections::HashSet<_> = ids.iter().collect();
assert_eq!(ids.len(), unique.len(), "Generated IDs should be unique");
}
}

View File

@@ -91,7 +91,7 @@ If you create temporary files for verification or investigation, place these in
const SHARED_WEB_RESEARCH: &str = "\
# Web Research
When you need to look up documentation, search for resources, find data online, or research a topic to complete your task, use the `research` tool.
When you need to look up documentation, search for resources, find data online, or research a topic to complete your task, use the `research` tool. **Research is asynchronous** - it runs in the background while you continue working.
**Use the `research` tool** for any web research tasks:
- Researching APIs, SDKs, libraries, frameworks, or tools
@@ -99,7 +99,12 @@ When you need to look up documentation, search for resources, find data online,
- Investigating bugs, issues, or error messages
- Looking up documentation or specifications
Simply call `research` with a specific query describing what you need to know. The tool returns a structured research brief with options, trade-offs, and recommendations.
**How async research works:**
1. Call `research` with your query - it returns immediately with a `research_id`
2. Continue with other work while research runs in the background (30-120 seconds)
3. Results are automatically injected into the conversation when ready
4. Use `research_status` to check progress if needed
5. If you need results before continuing, say so and yield the turn to the user
IMPORTANT: If the user asks you to just respond with text (like \"just say hello\" or \"tell me about X\"), do NOT use tools. Simply respond with the requested text directly. Only use tools when you need to execute commands or complete tasks that require action.

View File

@@ -258,7 +258,7 @@ fn create_core_tools(exclude_research: bool) -> Vec<Tool> {
if !exclude_research {
tools.push(Tool {
name: "research".to_string(),
description: "Perform web-based research on a topic and return a structured research brief. Use this tool when you need to research APIs, SDKs, libraries, approaches, bugs, documentation, or anything else that requires web-based research. The tool spawns a specialized research agent that browses the web and returns a concise, decision-ready report.".to_string(),
description: "Initiate web-based research on a topic. This tool is ASYNCHRONOUS - it spawns a research agent in the background and returns immediately with a research_id. Results are automatically injected into the conversation when ready. Use this when you need to research APIs, SDKs, libraries, approaches, bugs, or documentation. If you need the results before continuing, say so and yield the turn to the user. Check status with research_status tool.".to_string(),
input_schema: json!({
"type": "object",
"properties": {
@@ -270,6 +270,22 @@ fn create_core_tools(exclude_research: bool) -> Vec<Tool> {
"required": ["query"]
}),
});
// research_status tool - check status of pending research
tools.push(Tool {
name: "research_status".to_string(),
description: "Check the status of pending research tasks. Call without arguments to list all pending research, or with a research_id to check a specific task. Use this to see if research has completed before it's automatically injected.".to_string(),
input_schema: json!({
"type": "object",
"properties": {
"research_id": {
"type": "string",
"description": "Optional: specific research_id to check. If omitted, lists all pending research tasks."
}
},
"required": []
}),
});
}
// Workspace memory tool (memory is auto-loaded at startup, only remember is needed)
@@ -509,9 +525,9 @@ mod tests {
let tools = create_core_tools(false);
// Should have the core tools: shell, background_process, read_file, read_image,
// write_file, str_replace, screenshot,
// todo_read, todo_write, coverage, code_search, research, remember
// (13 total - memory is auto-loaded, only remember tool needed)
assert_eq!(tools.len(), 14);
// todo_read, todo_write, coverage, code_search, research, research_status, remember
// (15 total - memory is auto-loaded, only remember tool needed)
assert_eq!(tools.len(), 15);
}
#[test]
@@ -525,15 +541,15 @@ mod tests {
fn test_create_tool_definitions_core_only() {
let config = ToolConfig::default();
let tools = create_tool_definitions(config);
assert_eq!(tools.len(), 14);
assert_eq!(tools.len(), 15);
}
#[test]
fn test_create_tool_definitions_all_enabled() {
let config = ToolConfig::new(true, true);
let tools = create_tool_definitions(config);
// 13 core + 15 webdriver = 28
assert_eq!(tools.len(), 29);
// 15 core + 15 webdriver = 30
assert_eq!(tools.len(), 30);
}
#[test]
@@ -551,8 +567,8 @@ mod tests {
let tools_with_research = create_core_tools(false);
let tools_without_research = create_core_tools(true);
assert_eq!(tools_with_research.len(), 14);
assert_eq!(tools_without_research.len(), 13);
assert_eq!(tools_with_research.len(), 15);
assert_eq!(tools_without_research.len(), 13); // research + research_status both excluded
assert!(tools_with_research.iter().any(|t| t.name == "research"));
assert!(!tools_without_research.iter().any(|t| t.name == "research"));

View File

@@ -43,6 +43,7 @@ pub async fn dispatch_tool<W: UiWriter>(
// Research tool
"research" => research::execute_research(tool_call, ctx).await,
"research_status" => research::execute_research_status(tool_call, ctx).await,
// Workspace memory tools
"remember" => memory::execute_remember(tool_call, ctx).await,

View File

@@ -120,6 +120,7 @@ mod tests {
use crate::acd::Fragment;
use crate::ui_writer::NullUiWriter;
use crate::background_process::BackgroundProcessManager;
use crate::pending_research::PendingResearchManager;
use serial_test::serial;
use crate::webdriver_session::WebDriverSession;
use g3_providers::{Message, MessageRole};
@@ -135,6 +136,7 @@ mod tests {
todo_content: Arc<RwLock<String>>,
pending_images: Vec<g3_providers::ImageContent>,
config: g3_config::Config,
pending_research_manager: PendingResearchManager,
}
impl TestContext {
@@ -147,6 +149,7 @@ mod tests {
todo_content: Arc::new(RwLock::new(String::new())),
pending_images: Vec::new(),
config: g3_config::Config::default(),
pending_research_manager: PendingResearchManager::new(),
}
}
}
@@ -169,6 +172,7 @@ mod tests {
requirements_sha: None,
context_total_tokens: 100000,
context_used_tokens: 10000,
pending_research_manager: &test_ctx.pending_research_manager,
};
let tool_call = ToolCall {
@@ -199,6 +203,7 @@ mod tests {
requirements_sha: None,
context_total_tokens: 100000,
context_used_tokens: 10000,
pending_research_manager: &test_ctx.pending_research_manager,
};
let tool_call = ToolCall {
@@ -229,6 +234,7 @@ mod tests {
requirements_sha: None,
context_total_tokens: 100000,
context_used_tokens: 10000,
pending_research_manager: &test_ctx.pending_research_manager,
};
let tool_call = ToolCall {

View File

@@ -5,6 +5,7 @@ use std::sync::Arc;
use tokio::sync::RwLock;
use crate::background_process::BackgroundProcessManager;
use crate::pending_research::PendingResearchManager;
use crate::paths::{ensure_session_dir, get_session_todo_path, get_todo_path};
use crate::ui_writer::UiWriter;
use crate::webdriver_session::WebDriverSession;
@@ -27,6 +28,7 @@ pub struct ToolContext<'a, W: UiWriter> {
pub requirements_sha: Option<&'a str>,
pub context_total_tokens: u32,
pub context_used_tokens: u32,
pub pending_research_manager: &'a PendingResearchManager,
}
impl<'a, W: UiWriter> ToolContext<'a, W> {

View File

@@ -1,9 +1,15 @@
//! Research tool: spawns a scout agent to perform web-based research.
//!
//! The research tool is **asynchronous** - it spawns the scout agent in the background
//! and returns immediately with a research_id. The agent can continue with other work
//! while research is in progress. Results are automatically injected into the conversation
//! when ready, or the agent can check status with the `research_status` tool.
use anyhow::Result;
use std::process::Stdio;
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::Command;
use tracing::{debug, error};
use crate::ui_writer::UiWriter;
use crate::ToolCall;
@@ -19,6 +25,7 @@ const REPORT_END_MARKER: &str = "---SCOUT_REPORT_END---";
///
/// Parses tool call headers from the scout output and returns human-readable
/// progress messages. Returns None for lines that should be suppressed.
#[allow(dead_code)] // Used in tests, may be used for progress display in future
fn translate_progress(line: &str) -> Option<String> {
// Strip ANSI codes first for pattern matching
let clean_line = strip_ansi_codes(line);
@@ -118,6 +125,7 @@ fn translate_progress(line: &str) -> Option<String> {
}
/// Extract domain from a URL for cleaner display.
#[allow(dead_code)] // Used in tests
fn extract_domain(url: &str) -> Option<&str> {
// Remove protocol
let without_protocol = url
@@ -131,6 +139,7 @@ fn extract_domain(url: &str) -> Option<&str> {
/// Truncate a command to a maximum length for display.
/// Preserves the beginning of the command and adds "..." if truncated.
#[allow(dead_code)] // Used in tests
fn truncate_command_snippet(cmd: &str, max_len: usize) -> String {
// Take just the first line if multi-line
let first_line = cmd.lines().next().unwrap_or(cmd);
@@ -149,6 +158,14 @@ const CONTEXT_ERROR_PATTERNS: &[&str] = &[
"too many tokens", "exceeds the model", "context window", "max_tokens",
];
/// Execute the research tool - spawns scout agent in background and returns immediately.
///
/// This is the **async** version of research. It:
/// 1. Registers a new research task with the PendingResearchManager
/// 2. Spawns the scout agent in a background tokio task
/// 3. Returns immediately with a placeholder message containing the research_id
/// 4. The background task updates the manager when research completes
/// 5. Results are injected into the conversation at the next natural break point
pub async fn execute_research<W: UiWriter>(
tool_call: &ToolCall,
ctx: &mut ToolContext<'_, W>,
@@ -159,20 +176,74 @@ pub async fn execute_research<W: UiWriter>(
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing required 'query' parameter"))?;
// Register the research task and get an ID
let research_id = ctx.pending_research_manager.register(query);
// Clone values needed for the background task
let query_owned = query.to_string();
let research_id_clone = research_id.clone();
let manager = ctx.pending_research_manager.clone();
let browser = ctx.config.webdriver.browser.clone();
// Find the g3 executable path
let g3_path = std::env::current_exe()
.unwrap_or_else(|_| std::path::PathBuf::from("g3"));
// Spawn the scout agent in a background task
tokio::spawn(async move {
let result = run_scout_agent(&g3_path, &query_owned, browser).await;
match result {
Ok(report) => {
debug!("Research {} completed successfully", research_id_clone);
manager.complete(&research_id_clone, report);
}
Err(e) => {
error!("Research {} failed: {}", research_id_clone, e);
manager.fail(&research_id_clone, e.to_string());
}
}
});
// Return immediately with placeholder
let placeholder = format!(
"🔍 **Research initiated** (id: `{}`)
\
**Query:** {}
\
Research is running in the background. You can:
- Continue with other work - results will be automatically provided when ready
- Check status with `research_status` tool
- If you need the results before continuing, say so and yield the turn to the user
\
_Estimated time: 30-120 seconds depending on query complexity_",
research_id,
query
);
Ok(placeholder)
}
/// Run the scout agent and return the research report.
/// This is the blocking part that runs in a background task.
async fn run_scout_agent(
g3_path: &std::path::Path,
query: &str,
browser: WebDriverBrowser,
) -> Result<String> {
// Build the command with appropriate webdriver flags
let mut cmd = Command::new(&g3_path);
let mut cmd = Command::new(g3_path);
cmd
.arg("--agent")
.arg("scout")
.arg("--new-session") // Always start fresh for research
.arg("--quiet"); // Suppress log file creation
// Propagate the webdriver browser choice from the parent g3 instance
match ctx.config.webdriver.browser {
// Propagate the webdriver browser choice
match browser {
WebDriverBrowser::ChromeHeadless => { cmd.arg("--chrome-headless"); }
WebDriverBrowser::Safari => { cmd.arg("--webdriver"); }
}
@@ -204,15 +275,9 @@ pub async fn execute_research<W: UiWriter>(
stderr_output
});
// Collect stdout lines, showing only translated progress messages
// Collect stdout lines (no progress display in background)
while let Some(line) = reader.next_line().await? {
all_output.push(line.clone());
// Show translated progress for tool calls
if let Some(progress_msg) = translate_progress(&line) {
// Update the status line in-place (no spinner)
ctx.ui_writer.update_tool_output_line(&progress_msg);
}
all_output.push(line);
}
// Collect stderr output
@@ -234,79 +299,116 @@ pub async fn execute_research<W: UiWriter>(
.any(|pattern| combined_output.contains(pattern));
if is_context_error {
let error_msg = format!(
"❌ **Scout Agent Error: Context Window Exhausted**\n\n\
return Err(anyhow::anyhow!(
"Context Window Exhausted\n\n\
The research query required more context than the model supports.\n\n\
**Suggestions:**\n\
- Try a more specific, narrower query\n\
- Break the research into smaller sub-questions\n\
- Use a model with a larger context window\n\n\
**Technical Details:**\n\
Exit code: {}\n\
{}",
exit_code,
if !stderr_text.is_empty() { format!("Error output: {}", stderr_text.chars().take(500).collect::<String>()) } else { String::new() }
);
ctx.ui_writer.println(&error_msg);
return Ok(error_msg);
Exit code: {}",
exit_code
));
}
// Generic error with details
let error_msg = format!(
"❌ **Scout Agent Failed**\n\n\
return Err(anyhow::anyhow!(
"Scout Agent Failed\n\n\
Exit code: {}\n\n\
{}{}",
exit_code,
if !stderr_text.is_empty() { format!("**Error output:**\n{}\n\n", stderr_text.chars().take(1000).collect::<String>()) } else { String::new() },
if all_output.len() > 0 { format!("**Last output lines:**\n{}", all_output.iter().rev().take(10).rev().cloned().collect::<Vec<_>>().join("\n")) } else { String::new() }
);
ctx.ui_writer.println(&error_msg);
return Ok(error_msg);
if !all_output.is_empty() { format!("**Last output lines:**\n{}", all_output.iter().rev().take(10).rev().cloned().collect::<Vec<_>>().join("\n")) } else { String::new() }
));
}
// Join all output and extract the report between markers
let full_output = all_output.join("\n");
let report = match extract_report(&full_output) {
Ok(r) => r,
Err(e) => {
// Check if this looks like a context exhaustion issue
let combined = format!("{} {}", full_output, stderr_output.join(" ")).to_lowercase();
let is_context_error = CONTEXT_ERROR_PATTERNS.iter()
.any(|pattern| combined.contains(pattern));
extract_report(&full_output)
}
let error_msg = if is_context_error {
format!(
"❌ **Scout Agent Error: Context Window Exhausted**\n\n\
The scout agent ran out of context before completing the research report.\n\n\
**Suggestions:**\n\
- Try a more specific, narrower query\n\
- Break the research into smaller sub-questions\n\n\
**Technical Details:**\n\
{}",
e
)
} else {
format!(
"❌ **Scout Agent Error: Report Extraction Failed**\n\n\
{}\n\n\
The scout agent completed but did not produce a valid report.\n\
This may indicate the agent encountered an error during research.",
e
)
/// Execute the research_status tool - check status of pending research tasks.
pub async fn execute_research_status<W: UiWriter>(
tool_call: &ToolCall,
ctx: &mut ToolContext<'_, W>,
) -> Result<String> {
let research_id = tool_call
.args
.get("research_id")
.and_then(|v| v.as_str());
if let Some(id) = research_id {
// Check specific research task
match ctx.pending_research_manager.get(&id.to_string()) {
Some(task) => {
let status_emoji = match task.status {
crate::pending_research::ResearchStatus::Pending => "🔄",
crate::pending_research::ResearchStatus::Complete => "",
crate::pending_research::ResearchStatus::Failed => "",
};
ctx.ui_writer.println(&error_msg);
return Ok(error_msg);
let mut output = format!(
"{} **Research Status** (id: `{}`)\n\n\
**Query:** {}\n\
**Status:** {}\n\
**Elapsed:** {}\n",
status_emoji,
task.id,
task.query,
task.status,
task.elapsed_display()
);
if task.injected {
output.push_str("\n_Results have already been injected into the conversation._\n");
} else if task.status != crate::pending_research::ResearchStatus::Pending {
output.push_str("\n_Results will be injected at the next opportunity._\n");
}
Ok(output)
}
None => Ok(format!("❓ No research task found with id: `{}`", id)),
}
} else {
// List all pending research tasks
let tasks = ctx.pending_research_manager.list_pending();
if tasks.is_empty() {
return Ok("📋 No pending research tasks.".to_string());
}
let mut output = format!("📋 **Pending Research Tasks** ({} total)\n\n", tasks.len());
for task in tasks {
let status_emoji = match task.status {
crate::pending_research::ResearchStatus::Pending => "🔄",
crate::pending_research::ResearchStatus::Complete => "",
crate::pending_research::ResearchStatus::Failed => "",
};
// Print the research brief to the console for scrollback reference
// The report is printed without stripping ANSI codes to preserve formatting
ctx.ui_writer.println("");
ctx.ui_writer.println(&report);
ctx.ui_writer.println("");
output.push_str(&format!(
"{} `{}` - {} ({})\n Query: {}\n\n",
status_emoji,
task.id,
task.status,
task.elapsed_display(),
truncate_query(&task.query, 60)
));
}
Ok(report)
Ok(output)
}
}
/// Truncate a query for display
fn truncate_query(query: &str, max_len: usize) -> String {
if query.chars().count() <= max_len {
query.to_string()
} else {
let truncated: String = query.chars().take(max_len - 3).collect();
format!("{}...", truncated)
}
}
/// Extract the research report from scout output.
@@ -347,10 +449,10 @@ fn extract_report(output: &str) -> Result<String> {
let report_content = output[report_start..original_end].trim();
if report_content.is_empty() {
return Ok("Scout agent returned an empty report.".to_string());
return Ok("Scout agent returned an empty report.".to_string());
}
Ok(format!("📋 Research Report:\n\n{}", report_content))
Ok(report_content.to_string())
}
/// Find the position of a marker in text that may contain ANSI codes.
@@ -372,7 +474,7 @@ fn find_marker_position(text: &str, marker: &str) -> Option<usize> {
/// Handles common ANSI sequences like:
/// - CSI sequences: \x1b[...m (colors, styles)
/// - OSC sequences: \x1b]...\x07 (terminal titles, etc.)
fn strip_ansi_codes(s: &str) -> String {
pub fn strip_ansi_codes(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut chars = s.chars().peekable();
@@ -595,4 +697,14 @@ Some trailing text"#;
assert!(result.starts_with(" > `grep"));
assert!(result.contains("..."));
}
#[test]
fn test_truncate_query() {
assert_eq!(truncate_query("short query", 50), "short query");
let long_query = "This is a very long research query that should be truncated for display purposes";
let result = truncate_query(long_query, 40);
assert!(result.len() <= 40);
assert!(result.ends_with("..."));
}
}