Remove final_output tool - let summaries stream naturally
- Remove final_output from tool definitions, dispatch, and misc tools - Update system prompts to request summaries as regular markdown text - Remove print_final_output from UiWriter trait and all implementations - Remove final_output handling from agent core logic - Rename final_output_summary → summary in session continuation - Delete final_output test files - Update tool count tests (12→11, 27→26) This allows LLM summaries to stream through the markdown formatter for a more natural, responsive user experience instead of buffering everything into a tool call.
This commit is contained in:
@@ -148,9 +148,6 @@ fn extract_coach_feedback_from_logs(
|
||||
coach_agent: &g3_core::Agent<ConsoleUiWriter>,
|
||||
output: &SimpleOutput,
|
||||
) -> Result<String> {
|
||||
// CORRECT APPROACH: Get the session ID from the current coach agent
|
||||
// and read its specific log file directly
|
||||
|
||||
// Get the coach agent's session ID
|
||||
let session_id = coach_agent
|
||||
.get_session_id()
|
||||
@@ -167,74 +164,78 @@ fn extract_coach_feedback_from_logs(
|
||||
logs_dir.join(format!("g3_session_{}.json", session_id))
|
||||
};
|
||||
|
||||
// Read the coach agent's specific log file
|
||||
if log_file_path.exists() {
|
||||
if let Ok(log_content) = std::fs::read_to_string(&log_file_path) {
|
||||
if let Ok(log_json) = serde_json::from_str::<serde_json::Value>(&log_content) {
|
||||
if let Some(context_window) = log_json.get("context_window") {
|
||||
if let Some(conversation_history) = context_window.get("conversation_history") {
|
||||
if let Some(messages) = conversation_history.as_array() {
|
||||
// Go backwards through the conversation to find the last tool result
|
||||
// that corresponds to a final_output tool call
|
||||
for i in (0..messages.len()).rev() {
|
||||
let msg = &messages[i];
|
||||
|
||||
// Check if this is a User message with "Tool result:"
|
||||
if let Some(role) = msg.get("role") {
|
||||
if let Some(role_str) = role.as_str() {
|
||||
if role_str == "User" || role_str == "user" {
|
||||
if let Some(content) = msg.get("content") {
|
||||
if let Some(content_str) = content.as_str() {
|
||||
if content_str.starts_with("Tool result:") {
|
||||
// Found a tool result, now check the preceding message
|
||||
// to verify it was a final_output tool call
|
||||
if i > 0 {
|
||||
let prev_msg = &messages[i - 1];
|
||||
if let Some(prev_role) = prev_msg.get("role") {
|
||||
if let Some(prev_role_str) = prev_role.as_str() {
|
||||
if prev_role_str == "assistant" || prev_role_str == "Assistant" {
|
||||
if let Some(prev_content) = prev_msg.get("content") {
|
||||
if let Some(prev_content_str) = prev_content.as_str() {
|
||||
// Check if the previous assistant message contains a final_output tool call
|
||||
if prev_content_str.contains("\"tool\": \"final_output\"") {
|
||||
// This is a final_output tool result
|
||||
let feedback = if content_str.starts_with("Tool result: ") {
|
||||
content_str.strip_prefix("Tool result: ")
|
||||
.unwrap_or(content_str)
|
||||
.to_string()
|
||||
} else {
|
||||
content_str.to_string()
|
||||
};
|
||||
|
||||
output.print(&format!(
|
||||
"Coach feedback extracted: {} characters (from {} total)",
|
||||
feedback.len(),
|
||||
content_str.len()
|
||||
));
|
||||
output.print(&format!("Coach feedback:\n{}", feedback));
|
||||
|
||||
output.print(&format!(
|
||||
"✅ Extracted coach feedback from session: {} (verified final_output tool)",
|
||||
session_id
|
||||
));
|
||||
return Ok(feedback);
|
||||
} else {
|
||||
output.print(&format!(
|
||||
"⚠️ Skipping tool result at index {} - not a final_output tool call",
|
||||
i
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Try to extract from session log
|
||||
if let Some(feedback) = try_extract_feedback_from_log(&log_file_path, output) {
|
||||
output.print(&format!(
|
||||
"✅ Extracted coach feedback from session: {}",
|
||||
session_id
|
||||
));
|
||||
return Ok(feedback);
|
||||
}
|
||||
|
||||
// Fallback: use the TaskResult's extract_summary method
|
||||
let fallback = coach_result.extract_summary();
|
||||
if !fallback.is_empty() {
|
||||
output.print(&format!(
|
||||
"✅ Extracted coach feedback from response: {} chars",
|
||||
fallback.len()
|
||||
));
|
||||
return Ok(fallback);
|
||||
}
|
||||
|
||||
// Last resort: return an error instead of panicking
|
||||
Err(anyhow::anyhow!(
|
||||
"Could not extract coach feedback from session: {}\n\
|
||||
Log file path: {:?}\n\
|
||||
Log file exists: {}\n\
|
||||
Coach result response length: {} chars",
|
||||
session_id,
|
||||
log_file_path,
|
||||
log_file_path.exists(),
|
||||
coach_result.response.len()
|
||||
))
|
||||
}
|
||||
|
||||
/// Helper function to extract feedback from a session log file
|
||||
/// Looks for the last assistant message with substantial text content
|
||||
fn try_extract_feedback_from_log(
|
||||
log_file_path: &std::path::Path,
|
||||
_output: &SimpleOutput,
|
||||
) -> Option<String> {
|
||||
if !log_file_path.exists() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let log_content = std::fs::read_to_string(log_file_path).ok()?;
|
||||
let log_json: serde_json::Value = serde_json::from_str(&log_content).ok()?;
|
||||
|
||||
let messages = log_json
|
||||
.get("context_window")?
|
||||
.get("conversation_history")?
|
||||
.as_array()?;
|
||||
|
||||
// Search backwards for the last assistant message with text content
|
||||
for msg in messages.iter().rev() {
|
||||
let role = msg.get("role").and_then(|v| v.as_str())?;
|
||||
|
||||
if role.eq_ignore_ascii_case("assistant") {
|
||||
if let Some(content) = msg.get("content") {
|
||||
// Handle string content
|
||||
if let Some(content_str) = content.as_str() {
|
||||
let trimmed = content_str.trim();
|
||||
// Skip empty or very short responses (likely just tool calls)
|
||||
if !trimmed.is_empty() && trimmed.len() > 10 {
|
||||
return Some(trimmed.to_string());
|
||||
}
|
||||
}
|
||||
// Handle array content (native tool calling format)
|
||||
if let Some(content_array) = content.as_array() {
|
||||
for block in content_array {
|
||||
if block.get("type").and_then(|v| v.as_str()) == Some("text") {
|
||||
if let Some(text) = block.get("text").and_then(|v| v.as_str()) {
|
||||
let trimmed = text.trim();
|
||||
if !trimmed.is_empty() && trimmed.len() > 10 {
|
||||
return Some(trimmed.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -244,18 +245,7 @@ fn extract_coach_feedback_from_logs(
|
||||
}
|
||||
}
|
||||
|
||||
// If we couldn't extract from logs, panic with detailed error
|
||||
panic!(
|
||||
"CRITICAL: Could not extract coach feedback from session: {}\n\
|
||||
Log file path: {:?}\n\
|
||||
Log file exists: {}\n\
|
||||
This indicates the coach did not call final_output tool or the log is corrupted.\n\
|
||||
Coach result response length: {} chars",
|
||||
session_id,
|
||||
log_file_path,
|
||||
log_file_path.exists(),
|
||||
coach_result.response.len()
|
||||
);
|
||||
None
|
||||
}
|
||||
|
||||
use clap::Parser;
|
||||
@@ -1492,7 +1482,7 @@ async fn run_interactive<W: UiWriter>(
|
||||
" Context: {:.1}% used",
|
||||
continuation.context_percentage
|
||||
));
|
||||
if let Some(ref summary) = continuation.final_output_summary {
|
||||
if let Some(ref summary) = continuation.summary {
|
||||
let preview: String = summary.chars().take(80).collect();
|
||||
output.print(&format!(" Last output: {}...", preview));
|
||||
}
|
||||
@@ -2614,16 +2604,16 @@ Review the current state of the project and provide a concise critique focusing
|
||||
5. Use UI tools such as webdriver to test functionality thoroughly
|
||||
|
||||
CRITICAL INSTRUCTIONS:
|
||||
1. You MUST use the final_output tool to provide your feedback
|
||||
2. The summary in final_output should be CONCISE and ACTIONABLE
|
||||
1. Provide your feedback as your final response message
|
||||
2. Your feedback should be CONCISE and ACTIONABLE
|
||||
3. Focus ONLY on what needs to be fixed or improved
|
||||
4. Do NOT include your analysis process, file contents, or compilation output in the summary
|
||||
4. Do NOT include your analysis process, file contents, or compilation output in your final feedback
|
||||
|
||||
If the implementation thoroughly meets all requirements, compiles and is fully tested (especially UI flows) *WITHOUT* minor gaps or errors:
|
||||
- Call final_output with summary: 'IMPLEMENTATION_APPROVED'
|
||||
- Respond with: 'IMPLEMENTATION_APPROVED'
|
||||
|
||||
If improvements are needed:
|
||||
- Call final_output with a brief summary listing ONLY the specific issues to fix
|
||||
- Respond with a brief summary listing ONLY the specific issues to fix
|
||||
|
||||
Remember: Be clear in your review and concise in your feedback. APPROVE iff the implementation works and thoroughly fits the requirements (implementation > 95% complete). Be rigorous, especially by testing that all UI features work.",
|
||||
requirements
|
||||
|
||||
@@ -108,8 +108,4 @@ impl UiWriter for MachineUiWriter {
|
||||
0
|
||||
}
|
||||
|
||||
fn print_final_output(&self, summary: &str) {
|
||||
println!("FINAL_OUTPUT:");
|
||||
println!("{}", summary);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -362,36 +362,6 @@ impl UiWriter for ConsoleUiWriter {
|
||||
}
|
||||
}
|
||||
|
||||
fn print_final_output(&self, summary: &str) {
|
||||
// Print a header separator
|
||||
println!("\x1b[1;35m━━━ Summary ━━━\x1b[0m");
|
||||
println!();
|
||||
|
||||
// Use the same streaming markdown formatter for consistency
|
||||
let mut skin = MadSkin::default();
|
||||
skin.bold.set_fg(termimad::crossterm::style::Color::Green);
|
||||
skin.italic.set_fg(termimad::crossterm::style::Color::Cyan);
|
||||
skin.inline_code.set_fg(termimad::crossterm::style::Color::Rgb {
|
||||
r: 216,
|
||||
g: 177,
|
||||
b: 114,
|
||||
});
|
||||
|
||||
let mut formatter = StreamingMarkdownFormatter::new(skin);
|
||||
|
||||
// Process the entire summary through the formatter
|
||||
let formatted = formatter.process(summary);
|
||||
print!("{}", formatted);
|
||||
|
||||
// Flush any remaining buffered content
|
||||
let remaining = formatter.finish();
|
||||
print!("{}", remaining);
|
||||
let _ = io::stdout().flush();
|
||||
|
||||
// Print a footer separator
|
||||
println!();
|
||||
println!("\x1b[1;35m━━━━━━━━━━━━━━━\x1b[0m");
|
||||
}
|
||||
|
||||
fn filter_json_tool_calls(&self, content: &str) -> String {
|
||||
// Apply JSON tool call filtering for display
|
||||
|
||||
@@ -1,178 +0,0 @@
|
||||
//! Quick test to verify final_output rendering works with streaming markdown
|
||||
//! Run with: cargo test -p g3-cli --test test_final_output -- --nocapture
|
||||
|
||||
use std::io::{self, Write};
|
||||
|
||||
#[test]
|
||||
fn test_final_output_visual() {
|
||||
use g3_cli::streaming_markdown::StreamingMarkdownFormatter;
|
||||
use termimad::MadSkin;
|
||||
|
||||
// Create the test markdown
|
||||
let test_markdown = r##"# Task Completed Successfully
|
||||
|
||||
Here's a summary of what was accomplished:
|
||||
|
||||
## Rust Code Example
|
||||
|
||||
Created a new function to handle user authentication:
|
||||
|
||||
```rust
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// Authenticates a user with the given credentials
|
||||
pub async fn authenticate(username: &str, password: &str) -> Result<User, AuthError> {
|
||||
let hash = hash_password(password)?;
|
||||
|
||||
if let Some(user) = db.find_user(username).await? {
|
||||
if user.password_hash == hash {
|
||||
Ok(user)
|
||||
} else {
|
||||
Err(AuthError::InvalidPassword)
|
||||
}
|
||||
} else {
|
||||
Err(AuthError::UserNotFound)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Python Example
|
||||
|
||||
Also added a Python script for data processing:
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
from typing import List, Dict
|
||||
|
||||
def process_data(items: List[Dict]) -> pd.DataFrame:
|
||||
"""Process raw items into a cleaned DataFrame."""
|
||||
df = pd.DataFrame(items)
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
||||
df = df.dropna(subset=['value'])
|
||||
return df.sort_values('timestamp')
|
||||
```
|
||||
|
||||
## JavaScript/TypeScript
|
||||
|
||||
Frontend component:
|
||||
|
||||
```typescript
|
||||
interface User {
|
||||
id: string;
|
||||
name: string;
|
||||
email: string;
|
||||
}
|
||||
|
||||
const UserCard: React.FC<{ user: User }> = ({ user }) => {
|
||||
return (
|
||||
<div className="user-card">
|
||||
<h3>{user.name}</h3>
|
||||
<p>{user.email}</p>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
```
|
||||
|
||||
## Shell Commands
|
||||
|
||||
Deployment script:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
echo "Building project..."
|
||||
cargo build --release
|
||||
|
||||
echo "Running tests..."
|
||||
cargo test --all
|
||||
|
||||
echo "Deploying to production..."
|
||||
rsync -avz ./target/release/app server:/opt/app/
|
||||
```
|
||||
|
||||
## JSON Configuration
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "my-project",
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"serde": "1.0",
|
||||
"tokio": { "version": "1.0", "features": ["full"] }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Other Markdown Features
|
||||
|
||||
This section tests that **bold text**, *italic text*, and `inline code` still work correctly.
|
||||
|
||||
### Lists
|
||||
|
||||
- First item
|
||||
- Second item with **bold**
|
||||
- Third item with `code`
|
||||
|
||||
### Numbered List
|
||||
|
||||
1. Step one
|
||||
2. Step two
|
||||
3. Step three
|
||||
|
||||
### Blockquote
|
||||
|
||||
> This is a blockquote that should be rendered
|
||||
> with proper styling by termimad.
|
||||
|
||||
### Table
|
||||
|
||||
| Language | Extension | Use Case |
|
||||
|----------|-----------|----------|
|
||||
| Rust | .rs | Systems |
|
||||
| Python | .py | Scripts |
|
||||
| TypeScript | .ts | Frontend |
|
||||
|
||||
## Code Without Language
|
||||
|
||||
```
|
||||
This is a code block without a language specified.
|
||||
It should still be rendered as code, just without
|
||||
syntax highlighting.
|
||||
```
|
||||
|
||||
## Final Notes
|
||||
|
||||
All changes have been tested and verified. The implementation:
|
||||
|
||||
- ✅ Handles multiple languages
|
||||
- ✅ Preserves markdown formatting
|
||||
- ✅ Works with nested structures
|
||||
- ✅ Gracefully handles edge cases
|
||||
"##;
|
||||
|
||||
// Create a styled markdown skin (same as in print_final_output)
|
||||
let mut skin = MadSkin::default();
|
||||
skin.bold.set_fg(termimad::crossterm::style::Color::Green);
|
||||
skin.italic.set_fg(termimad::crossterm::style::Color::Cyan);
|
||||
skin.inline_code.set_fg(termimad::crossterm::style::Color::Rgb {
|
||||
r: 216,
|
||||
g: 177,
|
||||
b: 114,
|
||||
});
|
||||
|
||||
// Print header
|
||||
println!("\n\x1b[1;35m━━━ Summary ━━━\x1b[0m\n");
|
||||
|
||||
// Use the streaming markdown formatter (same as print_final_output now uses)
|
||||
let mut formatter = StreamingMarkdownFormatter::new(skin);
|
||||
let formatted = formatter.process(test_markdown);
|
||||
print!("{}", formatted);
|
||||
let remaining = formatter.finish();
|
||||
print!("{}", remaining);
|
||||
|
||||
// Print footer
|
||||
println!("\n\x1b[1;35m━━━━━━━━━━━━━━━\x1b[0m");
|
||||
|
||||
let _ = io::stdout().flush();
|
||||
}
|
||||
Reference in New Issue
Block a user