Remove final_output tool - let summaries stream naturally

- Remove final_output from tool definitions, dispatch, and misc tools
- Update system prompts to request summaries as regular markdown text
- Remove print_final_output from UiWriter trait and all implementations
- Remove final_output handling from agent core logic
- Rename final_output_summary → summary in session continuation
- Delete final_output test files
- Update tool count tests (12→11, 27→26)

This allows LLM summaries to stream through the markdown formatter
for a more natural, responsive user experience instead of buffering
everything into a tool call.
This commit is contained in:
Dhanji R. Prasanna
2026-01-09 14:57:24 +11:00
parent bebf04c7bd
commit 777191b3cb
17 changed files with 262 additions and 726 deletions

View File

@@ -100,21 +100,29 @@ pub fn extract_coach_feedback<W>(
where
W: UiWriter + Clone + Send + Sync + 'static,
{
// Try session log first (most reliable)
// Try session log first - now looks for last assistant message (primary method)
if let Some(session_id) = agent.get_session_id() {
if let Some(feedback) = try_extract_last_assistant_message(&session_id, config) {
debug!("Extracted coach feedback from last assistant message: {} chars", feedback.len());
return ExtractedFeedback::new(feedback, FeedbackSource::ConversationHistory);
}
}
// Fallback: Try session log with final_output pattern (backwards compatibility)
if let Some(session_id) = agent.get_session_id() {
if let Some(feedback) = try_extract_from_session_log(&session_id, config) {
debug!("Extracted coach feedback from session log: {} chars", feedback.len());
debug!("Extracted coach feedback from session log (final_output): {} chars", feedback.len());
return ExtractedFeedback::new(feedback, FeedbackSource::SessionLog);
}
}
// Try native tool call JSON parsing
// Fallback: Try native tool call JSON parsing (backwards compatibility)
if let Some(feedback) = try_extract_from_native_tool_call(&coach_result.response) {
debug!("Extracted coach feedback from native tool call: {} chars", feedback.len());
return ExtractedFeedback::new(feedback, FeedbackSource::NativeToolCall);
}
// Try conversation history
// Fallback: Try conversation history with final_output pattern (backwards compatibility)
if let Some(session_id) = agent.get_session_id() {
if let Some(feedback) = try_extract_from_conversation_history(&session_id, config) {
debug!("Extracted coach feedback from conversation history: {} chars", feedback.len());
@@ -122,7 +130,7 @@ where
}
}
// Try TaskResult parsing
// Fallback: Try TaskResult parsing (extracts last text block)
let extracted = coach_result.extract_final_output();
if !extracted.is_empty() {
debug!("Extracted coach feedback from task result: {} chars", extracted.len());
@@ -134,6 +142,73 @@ where
ExtractedFeedback::new(config.default_feedback.clone(), FeedbackSource::DefaultFallback)
}
/// Try to extract the last assistant message from session log (PRIMARY method)
/// This is the preferred extraction method - looks for the last substantial
/// assistant message content, regardless of whether it used final_output tool.
fn try_extract_last_assistant_message(
session_id: &str,
config: &FeedbackExtractionConfig,
) -> Option<String> {
// Try new .g3/sessions/<session_id>/session.json path first
let log_file_path = crate::get_session_file(session_id);
// Fall back to old logs/ path if new path doesn't exist
let log_file_path = if log_file_path.exists() {
log_file_path
} else {
let logs_path = config.logs_dir.clone().unwrap_or_else(logs_dir);
logs_path.join(format!("g3_session_{}.json", session_id))
};
if !log_file_path.exists() {
debug!("Session log file not found: {:?}", log_file_path);
return None;
}
let log_content = std::fs::read_to_string(&log_file_path).ok()?;
let log_json: Value = serde_json::from_str(&log_content).ok()?;
// Try to get conversation history from context_window
let messages = log_json
.get("context_window")?
.get("conversation_history")?
.as_array()?;
// Search backwards for the last assistant message with text content
for msg in messages.iter().rev() {
let role = msg.get("role").and_then(|v| v.as_str())?;
if role.eq_ignore_ascii_case("assistant") {
if let Some(content) = msg.get("content") {
// Handle string content
if let Some(content_str) = content.as_str() {
let trimmed = content_str.trim();
// Skip empty or very short responses (likely just tool calls)
if !trimmed.is_empty() && trimmed.len() > 10 {
return Some(trimmed.to_string());
}
}
// Handle array content (native tool calling format)
// Look for text blocks in the array
if let Some(content_array) = content.as_array() {
for block in content_array {
if block.get("type").and_then(|v| v.as_str()) == Some("text") {
if let Some(text) = block.get("text").and_then(|v| v.as_str()) {
let trimmed = text.trim();
if !trimmed.is_empty() && trimmed.len() > 10 {
return Some(trimmed.to_string());
}
}
}
}
}
}
}
}
None
}
/// Try to extract feedback from session log file
fn try_extract_from_session_log(
session_id: &str,

View File

@@ -1365,8 +1365,8 @@ impl<W: UiWriter> Agent<W> {
}
/// Save a session continuation artifact
/// Called when final_output is invoked to enable session resumption
pub fn save_session_continuation(&self, final_output_summary: Option<String>) {
/// Save session continuation for potential resumption
pub fn save_session_continuation(&self, summary: Option<String>) {
use crate::session_continuation::{save_continuation, SessionContinuation};
let session_id = match &self.session_id {
@@ -1398,7 +1398,7 @@ impl<W: UiWriter> Agent<W> {
self.is_agent_mode,
self.agent_name.clone(),
session_id,
final_output_summary,
summary,
session_log_path.to_string_lossy().to_string(),
self.context_window.percentage_used(),
todo_snapshot,
@@ -1494,9 +1494,9 @@ impl<W: UiWriter> Agent<W> {
}
}
// Fall back to using final_output summary + TODO
// Fall back to using session summary + TODO
let mut context_msg = String::new();
if let Some(ref summary) = continuation.final_output_summary {
if let Some(ref summary) = continuation.summary {
context_msg.push_str(&format!("Previous session summary:\n{}\n\n", summary));
}
if let Some(ref todo) = continuation.todo_snapshot {
@@ -1601,7 +1601,7 @@ impl<W: UiWriter> Agent<W> {
let mut any_tool_executed = false; // Track if ANY tool was executed across all iterations
let mut auto_summary_attempts = 0; // Track auto-summary prompt attempts
const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 5; // Limit auto-summary retries (increased from 2 for better recovery)
let final_output_called = false; // Track if final_output was called
//
// Note: Session-level duplicate tracking was removed - we only prevent sequential duplicates (DUP IN CHUNK, DUP IN MSG)
let mut turn_accumulated_usage: Option<g3_providers::Usage> = None; // Track token usage for timing footer
@@ -2004,8 +2004,8 @@ impl<W: UiWriter> Agent<W> {
String::new()
};
// Don't display text before final_output - it will be in the summary
if !new_content.trim().is_empty() && tool_call.tool != "final_output" {
// Display any new text content
if !new_content.trim().is_empty() {
#[allow(unused_assignments)]
if !response_started {
self.ui_writer.print_agent_prompt();
@@ -2019,40 +2019,37 @@ impl<W: UiWriter> Agent<W> {
// Execute the tool with formatted output
// Skip printing tool call details for final_output
if tool_call.tool != "final_output" {
// Finish streaming markdown before showing tool output
self.ui_writer.finish_streaming_markdown();
// Finish streaming markdown before showing tool output
self.ui_writer.finish_streaming_markdown();
// Tool call header
self.ui_writer.print_tool_header(&tool_call.tool, Some(&tool_call.args));
if let Some(args_obj) = tool_call.args.as_object() {
for (key, value) in args_obj {
let value_str = match value {
serde_json::Value::String(s) => {
if tool_call.tool == "shell" && key == "command" {
if let Some(first_line) = s.lines().next() {
if s.lines().count() > 1 {
format!("{}...", first_line)
} else {
first_line.to_string()
}
// Tool call header
self.ui_writer.print_tool_header(&tool_call.tool, Some(&tool_call.args));
if let Some(args_obj) = tool_call.args.as_object() {
for (key, value) in args_obj {
let value_str = match value {
serde_json::Value::String(s) => {
if tool_call.tool == "shell" && key == "command" {
if let Some(first_line) = s.lines().next() {
if s.lines().count() > 1 {
format!("{}...", first_line)
} else {
s.clone()
first_line.to_string()
}
} else if s.chars().count() > 100 {
streaming::truncate_for_display(s, 100)
} else {
s.clone()
}
} else if s.chars().count() > 100 {
streaming::truncate_for_display(s, 100)
} else {
s.clone()
}
_ => value.to_string(),
};
self.ui_writer.print_tool_arg(key, &value_str);
}
}
_ => value.to_string(),
};
self.ui_writer.print_tool_arg(key, &value_str);
}
self.ui_writer.print_tool_output_header();
}
self.ui_writer.print_tool_output_header();
// Clone working_dir to avoid borrow checker issues
let working_dir = self.working_dir.clone();
@@ -2082,11 +2079,7 @@ impl<W: UiWriter> Agent<W> {
));
// Display tool execution result with proper indentation
if tool_call.tool == "final_output" {
// For final_output, use the dedicated method that renders markdown
// with a spinner animation
self.ui_writer.print_final_output(&tool_result);
} else {
{
let output_lines: Vec<&str> = tool_result.lines().collect();
// Check if UI wants full output (machine mode) or truncated (human mode)
@@ -2192,47 +2185,13 @@ impl<W: UiWriter> Agent<W> {
self.context_window.add_message(tool_message);
self.context_window.add_message(result_message);
// Check if this was a final_output tool call
if tool_call.tool == "final_output" {
// Finish the streaming markdown formatter before final_output
self.ui_writer.finish_streaming_markdown();
// Save context window BEFORE returning so the session log includes final_output
self.save_context_window("completed");
// The summary was already displayed via print_final_output
// Don't add it to full_response to avoid duplicate printing
// full_response is intentionally left empty/unchanged
let _ttft =
first_token_time.unwrap_or_else(|| stream_start.elapsed());
// Add timing if needed
let final_response = if show_timing {
format!(
"🕝 {} | 💭 {}",
Self::format_duration(stream_start.elapsed()),
Self::format_duration(_ttft)
)
} else {
// Return empty string since content was already displayed
String::new()
};
return Ok(TaskResult::new(
final_response,
self.context_window.clone(),
));
}
// Closure marker with timing
if tool_call.tool != "final_output" {
let tokens_delta = self.context_window.used_tokens.saturating_sub(tokens_before);
self.ui_writer
.print_tool_timing(&Self::format_duration(exec_duration),
tokens_delta,
self.context_window.percentage_used());
self.ui_writer.print_agent_prompt();
}
let tokens_delta = self.context_window.used_tokens.saturating_sub(tokens_before);
self.ui_writer
.print_tool_timing(&Self::format_duration(exec_duration),
tokens_delta,
self.context_window.percentage_used());
self.ui_writer.print_agent_prompt();
// Update the request with the new context for next iteration
request.messages = self.context_window.conversation_history.clone();
@@ -2251,7 +2210,7 @@ impl<W: UiWriter> Agent<W> {
// The content was already displayed during streaming and added to current_response.
// Adding it again would cause duplication when the agent message is printed.
// The only time we should add to full_response is:
// 1. For final_output tool (handled separately)
// 1. At the end when no tools were executed
// 2. At the end when no tools were executed (handled in the "no tool executed" branch)
tool_executed = true;
@@ -2324,7 +2283,7 @@ impl<W: UiWriter> Agent<W> {
// No tools were executed in this iteration
// Check if we got any meaningful response at all
// We need to check the parser's text buffer as well, since the LLM
// might have responded with text but no final_output tool call
// might have responded with text but no tool calls
let text_content = parser.get_text_content();
let has_text_response = !text_content.trim().is_empty()
|| !current_response.trim().is_empty();
@@ -2376,10 +2335,10 @@ impl<W: UiWriter> Agent<W> {
));
}
// If tools were executed in previous iterations but final_output wasn't called,
// If tools were executed in previous iterations,
// break to let the outer loop's auto-continue logic handle it
if any_tool_executed && !final_output_called {
debug!("Tools were executed but final_output not called - breaking to auto-continue");
if any_tool_executed {
debug!("Tools were executed, continuing - breaking to auto-continue");
// NOTE: We intentionally do NOT set full_response here.
// The content was already displayed during streaming.
// Setting full_response would cause duplication when the
@@ -2529,15 +2488,15 @@ impl<W: UiWriter> Agent<W> {
warn!("Unexecuted tool call detected in buffer after stream ended");
}
// Auto-continue if tools were executed but final_output was never called
// Auto-continue if tools were executed and we are in autonomous mode
// OR if the LLM emitted an incomplete tool call (truncated JSON)
// OR if the LLM emitted a complete tool call that wasn't executed
// This ensures we don't return control when the LLM clearly intended to call a tool
// Note: We removed the redundant condition (any_tool_executed && is_empty_response)
// because it's already covered by (any_tool_executed && !final_output_called)
// because it's already covered by (any_tool_executed )
// Auto-continue is only enabled in autonomous mode - in interactive mode,
// the user may be asking questions and we should return control to them
let should_auto_continue = self.is_autonomous && ((any_tool_executed && !final_output_called)
let should_auto_continue = self.is_autonomous && ((any_tool_executed )
|| has_incomplete_tool_call
|| has_unexecuted_tool_call);
if should_auto_continue {
@@ -2569,11 +2528,11 @@ impl<W: UiWriter> Agent<W> {
);
} else {
warn!(
"LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {}/{})",
"LLM stopped after executing tools ({} iterations, auto-continue attempt {}/{})",
iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
);
self.ui_writer.print_context_status(
"\n🔄 Model stopped without calling final_output. Auto-continuing...\n"
"\n🔄 Model stopped without providing summary. Auto-continuing...\n"
);
}
@@ -2602,7 +2561,7 @@ impl<W: UiWriter> Agent<W> {
} else {
Message::new(
MessageRole::User,
"Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(),
"Please continue until you are done. Provide a summary when complete.".to_string(),
)
};
self.context_window.add_message(continue_prompt);
@@ -2613,22 +2572,22 @@ impl<W: UiWriter> Agent<W> {
} else {
// Max attempts reached, give up gracefully
warn!(
"Max auto-continue attempts ({}) reached after {} iterations. Conditions: any_tool_executed={}, final_output_called={}, has_incomplete={}, has_unexecuted={}, is_empty_response={}",
"Max auto-continue attempts ({}) reached after {} iterations. Conditions: any_tool_executed={}, has_incomplete={}, has_unexecuted={}, is_empty_response={}",
MAX_AUTO_SUMMARY_ATTEMPTS,
iteration_count,
any_tool_executed,
final_output_called,
has_incomplete_tool_call,
has_unexecuted_tool_call,
is_empty_response
);
self.ui_writer.print_agent_response(
&format!("\n⚠️ The model stopped without calling final_output after {} auto-continue attempts.\n", MAX_AUTO_SUMMARY_ATTEMPTS)
&format!("\n⚠️ The model stopped without providing a summary after {} auto-continue attempts.\n", MAX_AUTO_SUMMARY_ATTEMPTS)
);
}
} else if has_response {
// Only set full_response if it's empty (first iteration without tools)
// This prevents duplication when the agent responds without calling final_output
// This prevents duplication when the agent responds
// NOTE: We intentionally do NOT set full_response here anymore.
// The content was already displayed during streaming via print_agent_response().
// Setting full_response would cause the CLI to print it again.
@@ -2772,12 +2731,6 @@ impl<W: UiWriter> Agent<W> {
// Dispatch to the appropriate tool handler
let result = tool_dispatch::dispatch_tool(tool_call, &mut ctx).await?;
// Handle special case: final_output needs to save session continuation
if tool_call.tool == "final_output" {
let summary = tool_call.args.get("summary").and_then(|v| v.as_str());
self.save_session_continuation(summary.map(|s| s.to_string()));
}
Ok(result)
}

View File

@@ -34,7 +34,7 @@ IMPORTANT: You must call tools to achieve goals. When you receive a request:
2. Call the appropriate tool with the required parameters
3. Continue or complete the task based on the result
4. If you repeatedly try something and it fails, try a different approach
5. Call the final_output tool with a detailed summary when done.
5. When your task is complete, provide a detailed summary of what was accomplished.
For shell commands: Use the shell tool with the exact command needed. Avoid commands that produce a large amount of output, and consider piping those outputs to files. Example: If asked to list files, immediately call the shell tool with command parameter \"ls\".
If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir.
@@ -201,9 +201,6 @@ Short description for providers without native calling specs:
- Format: {\"tool\": \"str_replace\", \"args\": {\"file_path\": \"path/to/file\", \"diff\": \"--- old\\n-old text\\n+++ new\\n+new text\"}
- Example: {\"tool\": \"str_replace\", \"args\": {\"file_path\": \"src/main.rs\", \"diff\": \"--- old\\n-old_code();\\n+++ new\\n+new_code();\"}
- **final_output**: Signal task completion with a detailed summary of work done in markdown format
- Format: {\"tool\": \"final_output\", \"args\": {\"summary\": \"what_was_accomplished\"}
- **todo_read**: Read the current session's TODO list from todo.g3.md (session-scoped)
- Format: {\"tool\": \"todo_read\", \"args\": {}}
- Example: {\"tool\": \"todo_read\", \"args\": {}}
@@ -227,7 +224,7 @@ Short description for providers without native calling specs:
1. Analyze the request and break down into smaller tasks if appropriate
2. Execute ONE tool at a time. An exception exists for when you're writing files. See below.
3. STOP when the original request was satisfied
4. Call the final_output tool when done
4. When your task is complete, provide a detailed summary of what was accomplished
For reading files, prioritize use of code_search tool use with multiple search requests per call instead of read_file, if it makes sense.

View File

@@ -32,8 +32,8 @@ pub struct SessionContinuation {
pub created_at: String,
/// Original session ID
pub session_id: String,
/// The last final_output summary
pub final_output_summary: Option<String>,
/// Session summary (last assistant response)
pub summary: Option<String>,
/// Path to the full session log (g3_session_*.json)
pub session_log_path: String,
/// Context window usage percentage when saved
@@ -50,7 +50,7 @@ impl SessionContinuation {
is_agent_mode: bool,
agent_name: Option<String>,
session_id: String,
final_output_summary: Option<String>,
summary: Option<String>,
session_log_path: String,
context_percentage: f32,
todo_snapshot: Option<String>,
@@ -62,7 +62,7 @@ impl SessionContinuation {
agent_name,
created_at: chrono::Utc::now().to_rfc3339(),
session_id,
final_output_summary,
summary,
session_log_path,
context_percentage,
todo_snapshot,

View File

@@ -17,8 +17,15 @@ impl TaskResult {
}
}
/// Extract the final_output content from the response (for coach feedback in autonomous mode)
/// This looks for the complete final_output content, not just the last block
/// Extract a summary from the response (for coach feedback in autonomous mode)
/// This looks for the last substantial text block in the response.
/// Kept for backwards compatibility - prefer using extract_last_block() directly.
pub fn extract_summary(&self) -> String {
self.extract_last_block()
}
/// Legacy method - extract the final_output content from the response
/// Now just delegates to extract_last_block() for backwards compatibility
pub fn extract_final_output(&self) -> String {
// Remove any timing information at the end
let content_without_timing = if let Some(timing_pos) = self.response.rfind("\n⏱️") {
@@ -27,30 +34,23 @@ impl TaskResult {
&self.response
};
// Look for the final_output marker pattern
// The final_output content typically appears after the tool is called
// and is the substantive content that follows
// For backwards compatibility, still check for final_output marker
// but primarily just return the last substantial block
self.extract_last_block_from(content_without_timing)
}
// First, try to find if there's a clear final_output section
// This would be the content after the last tool execution
if let Some(final_output_pos) = content_without_timing.rfind("final_output") {
// Find the content that follows the final_output call
// Skip past the tool call line and any immediate formatting
if let Some(content_start) = content_without_timing[final_output_pos..].find('\n') {
let start_pos = final_output_pos + content_start + 1;
let final_content = &content_without_timing[start_pos..];
/// Extract the last block from a given string
fn extract_last_block_from(&self, content: &str) -> String {
// Split by double newlines to find the last substantial block
let blocks: Vec<&str> = content.split("\n\n").collect();
// Trim and return the complete content
let trimmed = final_content.trim();
if !trimmed.is_empty() {
return trimmed.to_string();
}
}
}
// Fallback to the original extract_last_block behavior if we can't find final_output
// This maintains backward compatibility
self.extract_last_block()
// Find the last non-empty block that isn't just whitespace
blocks
.iter()
.rev()
.find(|block| !block.trim().is_empty())
.map(|block| block.trim().to_string())
.unwrap_or_else(|| content.trim().to_string())
}
/// Extract the last block from the response (for coach feedback in autonomous mode)
@@ -138,33 +138,32 @@ mod tests {
fn test_extract_final_output() {
let context_window = ContextWindow::new(1000);
// Test case 1: Response with final_output tool call
let response_with_final_output = "Analyzing files...\n\nCalling final_output\n\nThis is the complete feedback\nwith multiple lines\nand important details\n\n⏱️ 2.3s".to_string();
let result = TaskResult::new(response_with_final_output, context_window.clone());
// Test case 1: Response with multiple blocks - extracts last substantial block
let response_with_blocks = "Analyzing files...\n\nCalling some tool\n\nThis is the complete feedback\nwith multiple lines\nand important details\n\n⏱️ 2.3s".to_string();
let result = TaskResult::new(response_with_blocks, context_window.clone());
assert_eq!(
result.extract_final_output(),
"This is the complete feedback\nwith multiple lines\nand important details"
);
// Test case 2: Response with IMPLEMENTATION_APPROVED in final_output
// Test case 2: Response with IMPLEMENTATION_APPROVED as last block
let response_approved =
"Review complete\n\nfinal_output called\n\nIMPLEMENTATION_APPROVED".to_string();
"Review complete\n\nAnalysis done\n\nIMPLEMENTATION_APPROVED".to_string();
let result = TaskResult::new(response_approved, context_window.clone());
assert_eq!(result.extract_final_output(), "IMPLEMENTATION_APPROVED");
assert!(result.is_approved());
// Test case 3: Response with detailed feedback in final_output
let response_feedback = "Checking implementation...\n\nfinal_output\n\nThe following issues need to be addressed:\n1. Missing error handling in main.rs\n2. Tests are not comprehensive\n3. Documentation needs improvement\n\nPlease fix these issues.".to_string();
// Test case 3: Response with detailed feedback as last block
let response_feedback = "Checking implementation...\n\nAnalysis complete\n\nThe following issues need to be addressed:\n1. Missing error handling in main.rs\n2. Tests are not comprehensive\n3. Documentation needs improvement\n\nPlease fix these issues.".to_string();
let result = TaskResult::new(response_feedback, context_window.clone());
let extracted = result.extract_final_output();
assert!(extracted.contains("The following issues need to be addressed:"));
assert!(extracted.contains("1. Missing error handling"));
// Now extracts just the last block (after the last \n\n)
assert!(extracted.contains("Please fix these issues."));
assert!(!result.is_approved());
// Test case 4: Response without final_output (fallback to extract_last_block)
let response_no_final_output = "Some analysis\n\nFinal thoughts here".to_string();
let result = TaskResult::new(response_no_final_output, context_window.clone());
// Test case 4: Simple response - extracts last block
let response_simple = "Some analysis\n\nFinal thoughts here".to_string();
let result = TaskResult::new(response_simple, context_window.clone());
assert_eq!(result.extract_final_output(), "Final thoughts here");
// Test case 5: Empty response

View File

@@ -157,20 +157,6 @@ fn create_core_tools() -> Vec<Tool> {
"required": ["file_path", "diff"]
}),
},
Tool {
name: "final_output".to_string(),
description: "Signal task completion with a detailed summary".to_string(),
input_schema: json!({
"type": "object",
"properties": {
"summary": {
"type": "string",
"description": "A detailed summary in markdown of what was accomplished"
}
},
"required": ["summary"]
}),
},
Tool {
name: "take_screenshot".to_string(),
description: "Capture a screenshot of a specific application window. You MUST specify the window_id parameter with the application name (e.g., 'Safari', 'Terminal', 'Google Chrome'). The tool will automatically use the native screencapture command with the application's window ID for a clean capture. Use list_windows first to identify available windows.".to_string(),
@@ -462,8 +448,8 @@ mod tests {
let tools = create_core_tools();
// Should have the core tools: shell, background_process, read_file, read_image,
// write_file, str_replace, final_output, take_screenshot,
// todo_read, todo_write, code_coverage, code_search (12 total)
assert_eq!(tools.len(), 12);
// todo_read, todo_write, code_coverage, code_search (11 total)
assert_eq!(tools.len(), 11);
}
#[test]
@@ -477,15 +463,15 @@ mod tests {
fn test_create_tool_definitions_core_only() {
let config = ToolConfig::default();
let tools = create_tool_definitions(config);
assert_eq!(tools.len(), 12);
assert_eq!(tools.len(), 11);
}
#[test]
fn test_create_tool_definitions_all_enabled() {
let config = ToolConfig::new(true, true);
let tools = create_tool_definitions(config);
// 12 core + 15 webdriver = 27
assert_eq!(tools.len(), 27);
// 11 core + 15 webdriver = 26
assert_eq!(tools.len(), 26);
}
#[test]

View File

@@ -37,11 +37,6 @@ pub async fn dispatch_tool<W: UiWriter>(
"todo_write" => todo::execute_todo_write(tool_call, ctx).await,
// Miscellaneous tools
"final_output" => {
let result = misc::execute_final_output(tool_call, ctx).await?;
// Note: Session continuation saving is handled by the caller
Ok(result)
}
"take_screenshot" => misc::execute_take_screenshot(tool_call, ctx).await,
"code_coverage" => misc::execute_code_coverage(tool_call, ctx).await,
"code_search" => misc::execute_code_search(tool_call, ctx).await,

View File

@@ -1,4 +1,4 @@
//! Miscellaneous tools: final_output, take_screenshot, code_coverage, code_search.
//! Miscellaneous tools: take_screenshot, code_coverage, code_search.
use anyhow::Result;
use tracing::debug;
@@ -8,42 +8,6 @@ use crate::ToolCall;
use super::executor::ToolContext;
/// Execute the `final_output` tool.
pub async fn execute_final_output<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing final_output tool call");
let summary_str = tool_call.args.get("summary").and_then(|v| v.as_str());
// In autonomous mode, check for incomplete TODO items before allowing completion
if ctx.is_autonomous {
let todo_content = ctx.todo_content.read().await;
let has_incomplete_todos = todo_content
.lines()
.any(|line| line.trim().starts_with("- [ ]"));
drop(todo_content);
if has_incomplete_todos {
return Ok(
"There are still incomplete TODO items. Please continue until \
*ALL* TODO items in *ALL* phases are marked complete, and \
*ONLY* then call `final_output`."
.to_string(),
);
}
}
// Return the summary or a default message
// Note: Session continuation saving is handled by the caller (Agent)
if let Some(summary) = summary_str {
Ok(summary.to_string())
} else {
Ok("✅ Turn completed".to_string())
}
}
/// Execute the `take_screenshot` tool.
pub async fn execute_take_screenshot<W: UiWriter>(
tool_call: &ToolCall,

View File

@@ -66,10 +66,6 @@ pub trait UiWriter: Send + Sync {
/// Returns the index of the selected option
fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize;
/// Print the final output summary with markdown formatting
/// Shows a spinner while formatting, then renders the markdown
fn print_final_output(&self, summary: &str);
/// Filter JSON tool calls from streaming content for display.
/// This is a UI concern - the raw content should be preserved for logging.
/// Default implementation passes through unchanged.
@@ -125,7 +121,4 @@ impl UiWriter for NullUiWriter {
fn prompt_user_choice(&self, _message: &str, _options: &[&str]) -> usize {
0
}
fn print_final_output(&self, _summary: &str) {
// No-op for null writer
}
}