Remove final_output tool and improve scout report handback

final_output removal:
- Remove final_output from tool definitions and dispatch
- Update system prompts to request summaries as regular text
- Remove final_output_called field from StreamingState
- Update auto_continue tests to remove final_output_called parameter
- Remove final_output test from tool_execution_test.rs
- Update planner and flock prompts to not reference final_output
- Keep backwards-compat code in feedback_extraction.rs and task_result.rs

Scout report handback:
- Change from file-based to delimiter-based report extraction
- Scout outputs report between ---SCOUT_REPORT_START/END--- markers
- Research tool extracts content between markers, strips ANSI codes
- Add comprehensive tests for extraction and ANSI stripping

657 tests pass.
This commit is contained in:
Dhanji R. Prasanna
2026-01-10 13:43:04 +11:00
parent cab2fb187a
commit 0aa1287ca6
9 changed files with 247 additions and 95 deletions

View File

@@ -4,7 +4,6 @@
//! 1. Empty/trivial responses (just timing lines)
//! 2. Incomplete tool calls
//! 3. Unexecuted tool calls
//! 4. Missing final_output after tool execution
/// Helper function to check if a response is considered "empty" or trivial
/// This mirrors the logic in lib.rs for detecting empty responses
@@ -117,37 +116,26 @@ fn test_max_auto_summary_attempts_is_reasonable() {
// =============================================================================
/// Simulates the should_auto_continue logic from lib.rs
/// After removing final_output, the logic is simpler:
/// - Continue if there's an incomplete tool call
/// - Continue if there's an unexecuted tool call
/// - Continue if tool executed but response is empty (LLM stuttered)
fn should_auto_continue(
any_tool_executed: bool,
final_output_called: bool,
has_incomplete_tool_call: bool,
has_unexecuted_tool_call: bool,
is_empty_response: bool,
) -> bool {
(any_tool_executed && !final_output_called)
|| has_incomplete_tool_call
has_incomplete_tool_call
|| has_unexecuted_tool_call
|| (any_tool_executed && is_empty_response)
}
#[test]
fn test_auto_continue_after_tool_no_final_output() {
// Tool executed but no final_output - should continue
assert!(should_auto_continue(
true, // any_tool_executed
false, // final_output_called
false, // has_incomplete_tool_call
false, // has_unexecuted_tool_call
false, // is_empty_response
));
}
#[test]
fn test_auto_continue_with_final_output() {
// Tool executed AND final_output called - should NOT continue
fn test_auto_continue_tool_executed_with_response() {
// Tool executed with substantive response - should NOT continue
assert!(!should_auto_continue(
true, // any_tool_executed
true, // final_output_called
false, // has_incomplete_tool_call
false, // has_unexecuted_tool_call
false, // is_empty_response
@@ -159,7 +147,6 @@ fn test_auto_continue_incomplete_tool_call() {
// Incomplete tool call - should continue regardless of other flags
assert!(should_auto_continue(
false, // any_tool_executed
false, // final_output_called
true, // has_incomplete_tool_call
false, // has_unexecuted_tool_call
false, // is_empty_response
@@ -171,7 +158,6 @@ fn test_auto_continue_unexecuted_tool_call() {
// Unexecuted tool call - should continue
assert!(should_auto_continue(
false, // any_tool_executed
false, // final_output_called
false, // has_incomplete_tool_call
true, // has_unexecuted_tool_call
false, // is_empty_response
@@ -183,7 +169,6 @@ fn test_auto_continue_empty_response_after_tool() {
// Empty response after tool execution - should continue
assert!(should_auto_continue(
true, // any_tool_executed
false, // final_output_called
false, // has_incomplete_tool_call
false, // has_unexecuted_tool_call
true, // is_empty_response
@@ -196,7 +181,6 @@ fn test_auto_continue_empty_response_no_tool() {
// (This is a normal case where LLM just didn't respond)
assert!(!should_auto_continue(
false, // any_tool_executed
false, // final_output_called
false, // has_incomplete_tool_call
false, // has_unexecuted_tool_call
true, // is_empty_response
@@ -208,7 +192,6 @@ fn test_auto_continue_no_conditions_met() {
// No tools, no incomplete calls, substantive response - should NOT continue
assert!(!should_auto_continue(
false, // any_tool_executed
false, // final_output_called
false, // has_incomplete_tool_call
false, // has_unexecuted_tool_call
false, // is_empty_response
@@ -216,19 +199,22 @@ fn test_auto_continue_no_conditions_met() {
}
// =============================================================================
// Test: Redundant condition detection
// Test: Edge cases
// =============================================================================
#[test]
fn test_redundant_empty_response_condition() {
// This test documents that (any_tool_executed && is_empty_response) is redundant
// when (any_tool_executed && !final_output_called) is already true
fn test_auto_continue_multiple_conditions() {
// Multiple conditions true - should still continue
assert!(should_auto_continue(
true, // any_tool_executed
true, // has_incomplete_tool_call
true, // has_unexecuted_tool_call
true, // is_empty_response
));
// Case: tool executed, no final_output, empty response
let result_with_empty = should_auto_continue(true, false, false, false, true);
let result_without_empty = should_auto_continue(true, false, false, false, false);
// Only incomplete tool call
assert!(should_auto_continue(false, true, false, false));
// Both should be true because (any_tool_executed && !final_output_called) is true
assert_eq!(result_with_empty, result_without_empty,
"The is_empty_response condition is redundant when any_tool_executed && !final_output_called");
// Only unexecuted tool call
assert!(should_auto_continue(false, false, true, false));
}

View File

@@ -326,27 +326,6 @@ mod todo_tests {
}
}
// =============================================================================
// Test: final_output tool
// =============================================================================
mod final_output_tests {
use super::*;
#[test]
fn test_final_output_tool_call() {
let tool_call = make_tool_call(
"final_output",
json!({
"summary": "Task completed successfully.\n\n## Changes Made\n- Added feature X"
}),
);
assert_eq!(tool_call.tool, "final_output");
assert!(tool_call.args.get("summary").is_some());
}
}
// =============================================================================
// Test: code_search tool
// =============================================================================