feat: async research tool - runs in background, returns immediately
The research tool now spawns the scout agent in a background tokio task and returns immediately with a research_id placeholder. This allows the agent to continue working while research runs (30-120 seconds). Key changes: - New PendingResearchManager for tracking async research tasks - research tool returns immediately with placeholder containing research_id - research_status tool to check progress of pending research - Auto-injection of completed research at natural break points: - Start of each tool iteration (before LLM call) - Before prompting user in interactive mode - /research CLI command to list all research tasks - Updated system prompt to explain async behavior The agent can: - Continue with other work while research runs - Check status with research_status tool - Yield turn to user if results are critical before continuing
This commit is contained in:
@@ -120,6 +120,7 @@ mod tests {
|
||||
use crate::acd::Fragment;
|
||||
use crate::ui_writer::NullUiWriter;
|
||||
use crate::background_process::BackgroundProcessManager;
|
||||
use crate::pending_research::PendingResearchManager;
|
||||
use serial_test::serial;
|
||||
use crate::webdriver_session::WebDriverSession;
|
||||
use g3_providers::{Message, MessageRole};
|
||||
@@ -135,6 +136,7 @@ mod tests {
|
||||
todo_content: Arc<RwLock<String>>,
|
||||
pending_images: Vec<g3_providers::ImageContent>,
|
||||
config: g3_config::Config,
|
||||
pending_research_manager: PendingResearchManager,
|
||||
}
|
||||
|
||||
impl TestContext {
|
||||
@@ -147,6 +149,7 @@ mod tests {
|
||||
todo_content: Arc::new(RwLock::new(String::new())),
|
||||
pending_images: Vec::new(),
|
||||
config: g3_config::Config::default(),
|
||||
pending_research_manager: PendingResearchManager::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -169,6 +172,7 @@ mod tests {
|
||||
requirements_sha: None,
|
||||
context_total_tokens: 100000,
|
||||
context_used_tokens: 10000,
|
||||
pending_research_manager: &test_ctx.pending_research_manager,
|
||||
};
|
||||
|
||||
let tool_call = ToolCall {
|
||||
@@ -199,6 +203,7 @@ mod tests {
|
||||
requirements_sha: None,
|
||||
context_total_tokens: 100000,
|
||||
context_used_tokens: 10000,
|
||||
pending_research_manager: &test_ctx.pending_research_manager,
|
||||
};
|
||||
|
||||
let tool_call = ToolCall {
|
||||
@@ -229,6 +234,7 @@ mod tests {
|
||||
requirements_sha: None,
|
||||
context_total_tokens: 100000,
|
||||
context_used_tokens: 10000,
|
||||
pending_research_manager: &test_ctx.pending_research_manager,
|
||||
};
|
||||
|
||||
let tool_call = ToolCall {
|
||||
|
||||
@@ -5,6 +5,7 @@ use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::background_process::BackgroundProcessManager;
|
||||
use crate::pending_research::PendingResearchManager;
|
||||
use crate::paths::{ensure_session_dir, get_session_todo_path, get_todo_path};
|
||||
use crate::ui_writer::UiWriter;
|
||||
use crate::webdriver_session::WebDriverSession;
|
||||
@@ -27,6 +28,7 @@ pub struct ToolContext<'a, W: UiWriter> {
|
||||
pub requirements_sha: Option<&'a str>,
|
||||
pub context_total_tokens: u32,
|
||||
pub context_used_tokens: u32,
|
||||
pub pending_research_manager: &'a PendingResearchManager,
|
||||
}
|
||||
|
||||
impl<'a, W: UiWriter> ToolContext<'a, W> {
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
//! Research tool: spawns a scout agent to perform web-based research.
|
||||
//!
|
||||
//! The research tool is **asynchronous** - it spawns the scout agent in the background
|
||||
//! and returns immediately with a research_id. The agent can continue with other work
|
||||
//! while research is in progress. Results are automatically injected into the conversation
|
||||
//! when ready, or the agent can check status with the `research_status` tool.
|
||||
|
||||
use anyhow::Result;
|
||||
use std::process::Stdio;
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command;
|
||||
use tracing::{debug, error};
|
||||
|
||||
use crate::ui_writer::UiWriter;
|
||||
use crate::ToolCall;
|
||||
@@ -19,6 +25,7 @@ const REPORT_END_MARKER: &str = "---SCOUT_REPORT_END---";
|
||||
///
|
||||
/// Parses tool call headers from the scout output and returns human-readable
|
||||
/// progress messages. Returns None for lines that should be suppressed.
|
||||
#[allow(dead_code)] // Used in tests, may be used for progress display in future
|
||||
fn translate_progress(line: &str) -> Option<String> {
|
||||
// Strip ANSI codes first for pattern matching
|
||||
let clean_line = strip_ansi_codes(line);
|
||||
@@ -118,6 +125,7 @@ fn translate_progress(line: &str) -> Option<String> {
|
||||
}
|
||||
|
||||
/// Extract domain from a URL for cleaner display.
|
||||
#[allow(dead_code)] // Used in tests
|
||||
fn extract_domain(url: &str) -> Option<&str> {
|
||||
// Remove protocol
|
||||
let without_protocol = url
|
||||
@@ -131,6 +139,7 @@ fn extract_domain(url: &str) -> Option<&str> {
|
||||
|
||||
/// Truncate a command to a maximum length for display.
|
||||
/// Preserves the beginning of the command and adds "..." if truncated.
|
||||
#[allow(dead_code)] // Used in tests
|
||||
fn truncate_command_snippet(cmd: &str, max_len: usize) -> String {
|
||||
// Take just the first line if multi-line
|
||||
let first_line = cmd.lines().next().unwrap_or(cmd);
|
||||
@@ -149,6 +158,14 @@ const CONTEXT_ERROR_PATTERNS: &[&str] = &[
|
||||
"too many tokens", "exceeds the model", "context window", "max_tokens",
|
||||
];
|
||||
|
||||
/// Execute the research tool - spawns scout agent in background and returns immediately.
|
||||
///
|
||||
/// This is the **async** version of research. It:
|
||||
/// 1. Registers a new research task with the PendingResearchManager
|
||||
/// 2. Spawns the scout agent in a background tokio task
|
||||
/// 3. Returns immediately with a placeholder message containing the research_id
|
||||
/// 4. The background task updates the manager when research completes
|
||||
/// 5. Results are injected into the conversation at the next natural break point
|
||||
pub async fn execute_research<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &mut ToolContext<'_, W>,
|
||||
@@ -159,20 +176,74 @@ pub async fn execute_research<W: UiWriter>(
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing required 'query' parameter"))?;
|
||||
|
||||
// Register the research task and get an ID
|
||||
let research_id = ctx.pending_research_manager.register(query);
|
||||
|
||||
// Clone values needed for the background task
|
||||
let query_owned = query.to_string();
|
||||
let research_id_clone = research_id.clone();
|
||||
let manager = ctx.pending_research_manager.clone();
|
||||
let browser = ctx.config.webdriver.browser.clone();
|
||||
|
||||
// Find the g3 executable path
|
||||
let g3_path = std::env::current_exe()
|
||||
.unwrap_or_else(|_| std::path::PathBuf::from("g3"));
|
||||
|
||||
// Spawn the scout agent in a background task
|
||||
tokio::spawn(async move {
|
||||
let result = run_scout_agent(&g3_path, &query_owned, browser).await;
|
||||
|
||||
match result {
|
||||
Ok(report) => {
|
||||
debug!("Research {} completed successfully", research_id_clone);
|
||||
manager.complete(&research_id_clone, report);
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Research {} failed: {}", research_id_clone, e);
|
||||
manager.fail(&research_id_clone, e.to_string());
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Return immediately with placeholder
|
||||
let placeholder = format!(
|
||||
"🔍 **Research initiated** (id: `{}`)
|
||||
|
||||
\
|
||||
**Query:** {}
|
||||
|
||||
\
|
||||
Research is running in the background. You can:
|
||||
- Continue with other work - results will be automatically provided when ready
|
||||
- Check status with `research_status` tool
|
||||
- If you need the results before continuing, say so and yield the turn to the user
|
||||
|
||||
\
|
||||
_Estimated time: 30-120 seconds depending on query complexity_",
|
||||
research_id,
|
||||
query
|
||||
);
|
||||
|
||||
Ok(placeholder)
|
||||
}
|
||||
|
||||
/// Run the scout agent and return the research report.
|
||||
/// This is the blocking part that runs in a background task.
|
||||
async fn run_scout_agent(
|
||||
g3_path: &std::path::Path,
|
||||
query: &str,
|
||||
browser: WebDriverBrowser,
|
||||
) -> Result<String> {
|
||||
// Build the command with appropriate webdriver flags
|
||||
let mut cmd = Command::new(&g3_path);
|
||||
let mut cmd = Command::new(g3_path);
|
||||
cmd
|
||||
.arg("--agent")
|
||||
.arg("scout")
|
||||
.arg("--new-session") // Always start fresh for research
|
||||
.arg("--quiet"); // Suppress log file creation
|
||||
|
||||
// Propagate the webdriver browser choice from the parent g3 instance
|
||||
match ctx.config.webdriver.browser {
|
||||
// Propagate the webdriver browser choice
|
||||
match browser {
|
||||
WebDriverBrowser::ChromeHeadless => { cmd.arg("--chrome-headless"); }
|
||||
WebDriverBrowser::Safari => { cmd.arg("--webdriver"); }
|
||||
}
|
||||
@@ -204,15 +275,9 @@ pub async fn execute_research<W: UiWriter>(
|
||||
stderr_output
|
||||
});
|
||||
|
||||
// Collect stdout lines, showing only translated progress messages
|
||||
// Collect stdout lines (no progress display in background)
|
||||
while let Some(line) = reader.next_line().await? {
|
||||
all_output.push(line.clone());
|
||||
|
||||
// Show translated progress for tool calls
|
||||
if let Some(progress_msg) = translate_progress(&line) {
|
||||
// Update the status line in-place (no spinner)
|
||||
ctx.ui_writer.update_tool_output_line(&progress_msg);
|
||||
}
|
||||
all_output.push(line);
|
||||
}
|
||||
|
||||
// Collect stderr output
|
||||
@@ -234,79 +299,116 @@ pub async fn execute_research<W: UiWriter>(
|
||||
.any(|pattern| combined_output.contains(pattern));
|
||||
|
||||
if is_context_error {
|
||||
let error_msg = format!(
|
||||
"❌ **Scout Agent Error: Context Window Exhausted**\n\n\
|
||||
return Err(anyhow::anyhow!(
|
||||
"Context Window Exhausted\n\n\
|
||||
The research query required more context than the model supports.\n\n\
|
||||
**Suggestions:**\n\
|
||||
- Try a more specific, narrower query\n\
|
||||
- Break the research into smaller sub-questions\n\
|
||||
- Use a model with a larger context window\n\n\
|
||||
**Technical Details:**\n\
|
||||
Exit code: {}\n\
|
||||
{}",
|
||||
exit_code,
|
||||
if !stderr_text.is_empty() { format!("Error output: {}", stderr_text.chars().take(500).collect::<String>()) } else { String::new() }
|
||||
);
|
||||
ctx.ui_writer.println(&error_msg);
|
||||
return Ok(error_msg);
|
||||
Exit code: {}",
|
||||
exit_code
|
||||
));
|
||||
}
|
||||
|
||||
// Generic error with details
|
||||
let error_msg = format!(
|
||||
"❌ **Scout Agent Failed**\n\n\
|
||||
return Err(anyhow::anyhow!(
|
||||
"Scout Agent Failed\n\n\
|
||||
Exit code: {}\n\n\
|
||||
{}{}",
|
||||
exit_code,
|
||||
if !stderr_text.is_empty() { format!("**Error output:**\n{}\n\n", stderr_text.chars().take(1000).collect::<String>()) } else { String::new() },
|
||||
if all_output.len() > 0 { format!("**Last output lines:**\n{}", all_output.iter().rev().take(10).rev().cloned().collect::<Vec<_>>().join("\n")) } else { String::new() }
|
||||
);
|
||||
ctx.ui_writer.println(&error_msg);
|
||||
return Ok(error_msg);
|
||||
if !all_output.is_empty() { format!("**Last output lines:**\n{}", all_output.iter().rev().take(10).rev().cloned().collect::<Vec<_>>().join("\n")) } else { String::new() }
|
||||
));
|
||||
}
|
||||
|
||||
// Join all output and extract the report between markers
|
||||
let full_output = all_output.join("\n");
|
||||
|
||||
let report = match extract_report(&full_output) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
// Check if this looks like a context exhaustion issue
|
||||
let combined = format!("{} {}", full_output, stderr_output.join(" ")).to_lowercase();
|
||||
let is_context_error = CONTEXT_ERROR_PATTERNS.iter()
|
||||
.any(|pattern| combined.contains(pattern));
|
||||
|
||||
let error_msg = if is_context_error {
|
||||
format!(
|
||||
"❌ **Scout Agent Error: Context Window Exhausted**\n\n\
|
||||
The scout agent ran out of context before completing the research report.\n\n\
|
||||
**Suggestions:**\n\
|
||||
- Try a more specific, narrower query\n\
|
||||
- Break the research into smaller sub-questions\n\n\
|
||||
**Technical Details:**\n\
|
||||
{}",
|
||||
e
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"❌ **Scout Agent Error: Report Extraction Failed**\n\n\
|
||||
{}\n\n\
|
||||
The scout agent completed but did not produce a valid report.\n\
|
||||
This may indicate the agent encountered an error during research.",
|
||||
e
|
||||
)
|
||||
};
|
||||
ctx.ui_writer.println(&error_msg);
|
||||
return Ok(error_msg);
|
||||
extract_report(&full_output)
|
||||
}
|
||||
|
||||
/// Execute the research_status tool - check status of pending research tasks.
|
||||
pub async fn execute_research_status<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &mut ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
let research_id = tool_call
|
||||
.args
|
||||
.get("research_id")
|
||||
.and_then(|v| v.as_str());
|
||||
|
||||
if let Some(id) = research_id {
|
||||
// Check specific research task
|
||||
match ctx.pending_research_manager.get(&id.to_string()) {
|
||||
Some(task) => {
|
||||
let status_emoji = match task.status {
|
||||
crate::pending_research::ResearchStatus::Pending => "🔄",
|
||||
crate::pending_research::ResearchStatus::Complete => "✅",
|
||||
crate::pending_research::ResearchStatus::Failed => "❌",
|
||||
};
|
||||
|
||||
let mut output = format!(
|
||||
"{} **Research Status** (id: `{}`)\n\n\
|
||||
**Query:** {}\n\
|
||||
**Status:** {}\n\
|
||||
**Elapsed:** {}\n",
|
||||
status_emoji,
|
||||
task.id,
|
||||
task.query,
|
||||
task.status,
|
||||
task.elapsed_display()
|
||||
);
|
||||
|
||||
if task.injected {
|
||||
output.push_str("\n_Results have already been injected into the conversation._\n");
|
||||
} else if task.status != crate::pending_research::ResearchStatus::Pending {
|
||||
output.push_str("\n_Results will be injected at the next opportunity._\n");
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
None => Ok(format!("❓ No research task found with id: `{}`", id)),
|
||||
}
|
||||
};
|
||||
|
||||
// Print the research brief to the console for scrollback reference
|
||||
// The report is printed without stripping ANSI codes to preserve formatting
|
||||
ctx.ui_writer.println("");
|
||||
ctx.ui_writer.println(&report);
|
||||
ctx.ui_writer.println("");
|
||||
|
||||
Ok(report)
|
||||
} else {
|
||||
// List all pending research tasks
|
||||
let tasks = ctx.pending_research_manager.list_pending();
|
||||
|
||||
if tasks.is_empty() {
|
||||
return Ok("📋 No pending research tasks.".to_string());
|
||||
}
|
||||
|
||||
let mut output = format!("📋 **Pending Research Tasks** ({} total)\n\n", tasks.len());
|
||||
|
||||
for task in tasks {
|
||||
let status_emoji = match task.status {
|
||||
crate::pending_research::ResearchStatus::Pending => "🔄",
|
||||
crate::pending_research::ResearchStatus::Complete => "✅",
|
||||
crate::pending_research::ResearchStatus::Failed => "❌",
|
||||
};
|
||||
|
||||
output.push_str(&format!(
|
||||
"{} `{}` - {} ({})\n Query: {}\n\n",
|
||||
status_emoji,
|
||||
task.id,
|
||||
task.status,
|
||||
task.elapsed_display(),
|
||||
truncate_query(&task.query, 60)
|
||||
));
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
|
||||
/// Truncate a query for display
|
||||
fn truncate_query(query: &str, max_len: usize) -> String {
|
||||
if query.chars().count() <= max_len {
|
||||
query.to_string()
|
||||
} else {
|
||||
let truncated: String = query.chars().take(max_len - 3).collect();
|
||||
format!("{}...", truncated)
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the research report from scout output.
|
||||
@@ -347,10 +449,10 @@ fn extract_report(output: &str) -> Result<String> {
|
||||
let report_content = output[report_start..original_end].trim();
|
||||
|
||||
if report_content.is_empty() {
|
||||
return Ok("❌ Scout agent returned an empty report.".to_string());
|
||||
return Ok("Scout agent returned an empty report.".to_string());
|
||||
}
|
||||
|
||||
Ok(format!("📋 Research Report:\n\n{}", report_content))
|
||||
Ok(report_content.to_string())
|
||||
}
|
||||
|
||||
/// Find the position of a marker in text that may contain ANSI codes.
|
||||
@@ -372,7 +474,7 @@ fn find_marker_position(text: &str, marker: &str) -> Option<usize> {
|
||||
/// Handles common ANSI sequences like:
|
||||
/// - CSI sequences: \x1b[...m (colors, styles)
|
||||
/// - OSC sequences: \x1b]...\x07 (terminal titles, etc.)
|
||||
fn strip_ansi_codes(s: &str) -> String {
|
||||
pub fn strip_ansi_codes(s: &str) -> String {
|
||||
let mut result = String::with_capacity(s.len());
|
||||
let mut chars = s.chars().peekable();
|
||||
|
||||
@@ -595,4 +697,14 @@ Some trailing text"#;
|
||||
assert!(result.starts_with(" > `grep"));
|
||||
assert!(result.contains("..."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_query() {
|
||||
assert_eq!(truncate_query("short query", 50), "short query");
|
||||
|
||||
let long_query = "This is a very long research query that should be truncated for display purposes";
|
||||
let result = truncate_query(long_query, 40);
|
||||
assert!(result.len() <= 40);
|
||||
assert!(result.ends_with("..."));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user