feat: async research tool - runs in background, returns immediately

The research tool now spawns the scout agent in a background tokio task and returns immediately with a research_id placeholder. This allows the agent to continue working while research runs (30-120 seconds). Key changes: - New PendingResearchManager for tracking async research tasks - research tool returns immediately with placeholder containing research_id - research_status tool to check progress of pending research - Auto-injection of completed research at natural break points: - Start of each tool iteration (before LLM call) - Before prompting user in interactive mode - /research CLI command to list all research tasks - Updated system prompt to explain async behavior The agent can: - Continue with other work while research runs - Check status with research_status tool - Yield turn to user if results are critical before continuing
2026-01-30 13:00:02 +11:00
parent 2e21502357
commit 5ab1598e03
11 changed files with 797 additions and 81 deletions
--- a/crates/g3-core/src/tools/acd.rs
+++ b/crates/g3-core/src/tools/acd.rs
@@ -120,6 +120,7 @@ mod tests {
    use crate::acd::Fragment;
    use crate::ui_writer::NullUiWriter;
    use crate::background_process::BackgroundProcessManager;
+    use crate::pending_research::PendingResearchManager;
    use serial_test::serial;
    use crate::webdriver_session::WebDriverSession;
    use g3_providers::{Message, MessageRole};
@@ -135,6 +136,7 @@ mod tests {
        todo_content: Arc<RwLock<String>>,
        pending_images: Vec<g3_providers::ImageContent>,
        config: g3_config::Config,
+        pending_research_manager: PendingResearchManager,
    }

    impl TestContext {
@@ -147,6 +149,7 @@ mod tests {
                todo_content: Arc::new(RwLock::new(String::new())),
                pending_images: Vec::new(),
                config: g3_config::Config::default(),
+                pending_research_manager: PendingResearchManager::new(),
            }
        }
    }
@@ -169,6 +172,7 @@ mod tests {
            requirements_sha: None,
            context_total_tokens: 100000,
            context_used_tokens: 10000,
+            pending_research_manager: &test_ctx.pending_research_manager,
        };

        let tool_call = ToolCall {
@@ -199,6 +203,7 @@ mod tests {
            requirements_sha: None,
            context_total_tokens: 100000,
            context_used_tokens: 10000,
+            pending_research_manager: &test_ctx.pending_research_manager,
        };

        let tool_call = ToolCall {
@@ -229,6 +234,7 @@ mod tests {
            requirements_sha: None,
            context_total_tokens: 100000,
            context_used_tokens: 10000,
+            pending_research_manager: &test_ctx.pending_research_manager,
        };

        let tool_call = ToolCall {
--- a/crates/g3-core/src/tools/executor.rs
+++ b/crates/g3-core/src/tools/executor.rs
@@ -5,6 +5,7 @@ use std::sync::Arc;
 use tokio::sync::RwLock;

 use crate::background_process::BackgroundProcessManager;
+use crate::pending_research::PendingResearchManager;
 use crate::paths::{ensure_session_dir, get_session_todo_path, get_todo_path};
 use crate::ui_writer::UiWriter;
 use crate::webdriver_session::WebDriverSession;
@@ -27,6 +28,7 @@ pub struct ToolContext<'a, W: UiWriter> {
    pub requirements_sha: Option<&'a str>,
    pub context_total_tokens: u32,
    pub context_used_tokens: u32,
+    pub pending_research_manager: &'a PendingResearchManager,
 }

 impl<'a, W: UiWriter> ToolContext<'a, W> {
--- a/crates/g3-core/src/tools/research.rs
+++ b/crates/g3-core/src/tools/research.rs
@@ -1,9 +1,15 @@
 //! Research tool: spawns a scout agent to perform web-based research.
+//!
+//! The research tool is **asynchronous** - it spawns the scout agent in the background
+//! and returns immediately with a research_id. The agent can continue with other work
+//! while research is in progress. Results are automatically injected into the conversation
+//! when ready, or the agent can check status with the `research_status` tool.

 use anyhow::Result;
 use std::process::Stdio;
 use tokio::io::{AsyncBufReadExt, BufReader};
 use tokio::process::Command;
+use tracing::{debug, error};

 use crate::ui_writer::UiWriter;
 use crate::ToolCall;
@@ -19,6 +25,7 @@ const REPORT_END_MARKER: &str = "---SCOUT_REPORT_END---";
 ///
 /// Parses tool call headers from the scout output and returns human-readable
 /// progress messages. Returns None for lines that should be suppressed.
+#[allow(dead_code)] // Used in tests, may be used for progress display in future
 fn translate_progress(line: &str) -> Option<String> {
    // Strip ANSI codes first for pattern matching
    let clean_line = strip_ansi_codes(line);
@@ -118,6 +125,7 @@ fn translate_progress(line: &str) -> Option<String> {
 }

 /// Extract domain from a URL for cleaner display.
+#[allow(dead_code)] // Used in tests
 fn extract_domain(url: &str) -> Option<&str> {
    // Remove protocol
    let without_protocol = url
@@ -131,6 +139,7 @@ fn extract_domain(url: &str) -> Option<&str> {

 /// Truncate a command to a maximum length for display.
 /// Preserves the beginning of the command and adds "..." if truncated.
+#[allow(dead_code)] // Used in tests
 fn truncate_command_snippet(cmd: &str, max_len: usize) -> String {
    // Take just the first line if multi-line
    let first_line = cmd.lines().next().unwrap_or(cmd);
@@ -149,6 +158,14 @@ const CONTEXT_ERROR_PATTERNS: &[&str] = &[
    "too many tokens", "exceeds the model", "context window", "max_tokens",
 ];

+/// Execute the research tool - spawns scout agent in background and returns immediately.
+///
+/// This is the **async** version of research. It:
+/// 1. Registers a new research task with the PendingResearchManager
+/// 2. Spawns the scout agent in a background tokio task
+/// 3. Returns immediately with a placeholder message containing the research_id
+/// 4. The background task updates the manager when research completes
+/// 5. Results are injected into the conversation at the next natural break point
 pub async fn execute_research<W: UiWriter>(
    tool_call: &ToolCall,
    ctx: &mut ToolContext<'_, W>,
@@ -159,20 +176,74 @@ pub async fn execute_research<W: UiWriter>(
        .and_then(|v| v.as_str())
        .ok_or_else(|| anyhow::anyhow!("Missing required 'query' parameter"))?;

+    // Register the research task and get an ID
+    let research_id = ctx.pending_research_manager.register(query);
+    
+    // Clone values needed for the background task
+    let query_owned = query.to_string();
+    let research_id_clone = research_id.clone();
+    let manager = ctx.pending_research_manager.clone();
+    let browser = ctx.config.webdriver.browser.clone();
+    
    // Find the g3 executable path
    let g3_path = std::env::current_exe()
        .unwrap_or_else(|_| std::path::PathBuf::from("g3"));

+    // Spawn the scout agent in a background task
+    tokio::spawn(async move {
+        let result = run_scout_agent(&g3_path, &query_owned, browser).await;
+        
+        match result {
+            Ok(report) => {
+                debug!("Research {} completed successfully", research_id_clone);
+                manager.complete(&research_id_clone, report);
+            }
+            Err(e) => {
+                error!("Research {} failed: {}", research_id_clone, e);
+                manager.fail(&research_id_clone, e.to_string());
+            }
+        }
+    });
+
+    // Return immediately with placeholder
+    let placeholder = format!(
+        "🔍 **Research initiated** (id: `{}`)
+
+\
+**Query:** {}
+
+\
+Research is running in the background. You can:
+- Continue with other work - results will be automatically provided when ready
+- Check status with `research_status` tool
+- If you need the results before continuing, say so and yield the turn to the user
+
+\
+_Estimated time: 30-120 seconds depending on query complexity_",
+        research_id,
+        query
+    );
+    
+    Ok(placeholder)
+}
+
+/// Run the scout agent and return the research report.
+/// This is the blocking part that runs in a background task.
+async fn run_scout_agent(
+    g3_path: &std::path::Path,
+    query: &str,
+    browser: WebDriverBrowser,
+) -> Result<String> {
    // Build the command with appropriate webdriver flags
-    let mut cmd = Command::new(&g3_path);
+    let mut cmd = Command::new(g3_path);
    cmd
        .arg("--agent")
        .arg("scout")
        .arg("--new-session")  // Always start fresh for research
        .arg("--quiet");  // Suppress log file creation

-    // Propagate the webdriver browser choice from the parent g3 instance
-    match ctx.config.webdriver.browser {
+    // Propagate the webdriver browser choice
+    match browser {
        WebDriverBrowser::ChromeHeadless => { cmd.arg("--chrome-headless"); }
        WebDriverBrowser::Safari => { cmd.arg("--webdriver"); }
    }
@@ -204,15 +275,9 @@ pub async fn execute_research<W: UiWriter>(
        stderr_output
    });

-    // Collect stdout lines, showing only translated progress messages
+    // Collect stdout lines (no progress display in background)
    while let Some(line) = reader.next_line().await? {
-        all_output.push(line.clone());
-        
-        // Show translated progress for tool calls
-        if let Some(progress_msg) = translate_progress(&line) {
-            // Update the status line in-place (no spinner)
-            ctx.ui_writer.update_tool_output_line(&progress_msg);
-        }
+        all_output.push(line);
    }
    
    // Collect stderr output
@@ -234,79 +299,116 @@ pub async fn execute_research<W: UiWriter>(
            .any(|pattern| combined_output.contains(pattern));
        
        if is_context_error {
-            let error_msg = format!(
-                "❌ **Scout Agent Error: Context Window Exhausted**\n\n\
+            return Err(anyhow::anyhow!(
+                "Context Window Exhausted\n\n\
                The research query required more context than the model supports.\n\n\
                **Suggestions:**\n\
                - Try a more specific, narrower query\n\
                - Break the research into smaller sub-questions\n\
                - Use a model with a larger context window\n\n\
-                **Technical Details:**\n\
-                Exit code: {}\n\
-                {}",
-                exit_code,
-                if !stderr_text.is_empty() { format!("Error output: {}", stderr_text.chars().take(500).collect::<String>()) } else { String::new() }
-            );
-            ctx.ui_writer.println(&error_msg);
-            return Ok(error_msg);
+                Exit code: {}",
+                exit_code
+            ));
        }
        
        // Generic error with details
-        let error_msg = format!(
-            "❌ **Scout Agent Failed**\n\n\
+        return Err(anyhow::anyhow!(
+            "Scout Agent Failed\n\n\
            Exit code: {}\n\n\
            {}{}",
            exit_code,
            if !stderr_text.is_empty() { format!("**Error output:**\n{}\n\n", stderr_text.chars().take(1000).collect::<String>()) } else { String::new() },
-            if all_output.len() > 0 { format!("**Last output lines:**\n{}", all_output.iter().rev().take(10).rev().cloned().collect::<Vec<_>>().join("\n")) } else { String::new() }
-        );
-        ctx.ui_writer.println(&error_msg);
-        return Ok(error_msg);
+            if !all_output.is_empty() { format!("**Last output lines:**\n{}", all_output.iter().rev().take(10).rev().cloned().collect::<Vec<_>>().join("\n")) } else { String::new() }
+        ));
    }

    // Join all output and extract the report between markers
    let full_output = all_output.join("\n");
    
-    let report = match extract_report(&full_output) {
-        Ok(r) => r,
-        Err(e) => {
-            // Check if this looks like a context exhaustion issue
-            let combined = format!("{} {}", full_output, stderr_output.join(" ")).to_lowercase();
-            let is_context_error = CONTEXT_ERROR_PATTERNS.iter()
-                .any(|pattern| combined.contains(pattern));
-            
-            let error_msg = if is_context_error {
-                format!(
-                    "❌ **Scout Agent Error: Context Window Exhausted**\n\n\
-                    The scout agent ran out of context before completing the research report.\n\n\
-                    **Suggestions:**\n\
-                    - Try a more specific, narrower query\n\
-                    - Break the research into smaller sub-questions\n\n\
-                    **Technical Details:**\n\
-                    {}",
-                    e
-                )
-            } else {
-                format!(
-                    "❌ **Scout Agent Error: Report Extraction Failed**\n\n\
-                    {}\n\n\
-                    The scout agent completed but did not produce a valid report.\n\
-                    This may indicate the agent encountered an error during research.",
-                    e
-                )
-            };
-            ctx.ui_writer.println(&error_msg);
-            return Ok(error_msg);
+    extract_report(&full_output)
+}
+
+/// Execute the research_status tool - check status of pending research tasks.
+pub async fn execute_research_status<W: UiWriter>(
+    tool_call: &ToolCall,
+    ctx: &mut ToolContext<'_, W>,
+) -> Result<String> {
+    let research_id = tool_call
+        .args
+        .get("research_id")
+        .and_then(|v| v.as_str());
+
+    if let Some(id) = research_id {
+        // Check specific research task
+        match ctx.pending_research_manager.get(&id.to_string()) {
+            Some(task) => {
+                let status_emoji = match task.status {
+                    crate::pending_research::ResearchStatus::Pending => "🔄",
+                    crate::pending_research::ResearchStatus::Complete => "✅",
+                    crate::pending_research::ResearchStatus::Failed => "❌",
+                };
+                
+                let mut output = format!(
+                    "{} **Research Status** (id: `{}`)\n\n\
+                    **Query:** {}\n\
+                    **Status:** {}\n\
+                    **Elapsed:** {}\n",
+                    status_emoji,
+                    task.id,
+                    task.query,
+                    task.status,
+                    task.elapsed_display()
+                );
+                
+                if task.injected {
+                    output.push_str("\n_Results have already been injected into the conversation._\n");
+                } else if task.status != crate::pending_research::ResearchStatus::Pending {
+                    output.push_str("\n_Results will be injected at the next opportunity._\n");
+                }
+                
+                Ok(output)
+            }
+            None => Ok(format!("❓ No research task found with id: `{}`", id)),
        }
-    };
-    
-    // Print the research brief to the console for scrollback reference
-    // The report is printed without stripping ANSI codes to preserve formatting
-    ctx.ui_writer.println("");
-    ctx.ui_writer.println(&report);
-    ctx.ui_writer.println("");
-    
-    Ok(report)
+    } else {
+        // List all pending research tasks
+        let tasks = ctx.pending_research_manager.list_pending();
+        
+        if tasks.is_empty() {
+            return Ok("📋 No pending research tasks.".to_string());
+        }
+        
+        let mut output = format!("📋 **Pending Research Tasks** ({} total)\n\n", tasks.len());
+        
+        for task in tasks {
+            let status_emoji = match task.status {
+                crate::pending_research::ResearchStatus::Pending => "🔄",
+                crate::pending_research::ResearchStatus::Complete => "✅",
+                crate::pending_research::ResearchStatus::Failed => "❌",
+            };
+            
+            output.push_str(&format!(
+                "{} `{}` - {} ({})\n   Query: {}\n\n",
+                status_emoji,
+                task.id,
+                task.status,
+                task.elapsed_display(),
+                truncate_query(&task.query, 60)
+            ));
+        }
+        
+        Ok(output)
+    }
+}
+
+/// Truncate a query for display
+fn truncate_query(query: &str, max_len: usize) -> String {
+    if query.chars().count() <= max_len {
+        query.to_string()
+    } else {
+        let truncated: String = query.chars().take(max_len - 3).collect();
+        format!("{}...", truncated)
+    }
 }

 /// Extract the research report from scout output.
@@ -347,10 +449,10 @@ fn extract_report(output: &str) -> Result<String> {
    let report_content = output[report_start..original_end].trim();
    
    if report_content.is_empty() {
-        return Ok("❌ Scout agent returned an empty report.".to_string());
+        return Ok("Scout agent returned an empty report.".to_string());
    }
    
-    Ok(format!("📋 Research Report:\n\n{}", report_content))
+    Ok(report_content.to_string())
 }

 /// Find the position of a marker in text that may contain ANSI codes.
@@ -372,7 +474,7 @@ fn find_marker_position(text: &str, marker: &str) -> Option<usize> {
 /// Handles common ANSI sequences like:
 /// - CSI sequences: \x1b[...m (colors, styles)
 /// - OSC sequences: \x1b]...\x07 (terminal titles, etc.)
-fn strip_ansi_codes(s: &str) -> String {
+pub fn strip_ansi_codes(s: &str) -> String {
    let mut result = String::with_capacity(s.len());
    let mut chars = s.chars().peekable();
    
@@ -595,4 +697,14 @@ Some trailing text"#;
        assert!(result.starts_with(" > `grep"));
        assert!(result.contains("..."));
    }
+
+    #[test]
+    fn test_truncate_query() {
+        assert_eq!(truncate_query("short query", 50), "short query");
+        
+        let long_query = "This is a very long research query that should be truncated for display purposes";
+        let result = truncate_query(long_query, 40);
+        assert!(result.len() <= 40);
+        assert!(result.ends_with("..."));
+    }
 }