Fix continuation errors: auto-continue when final_output not called

- Add final_output_called flag to track if LLM properly completed
- Auto-continue with prompt if tools executed but final_output missing
- Remove unused last_action_was_tool and any_text_response variables
- Simplifies previous complex incomplete response detection logic
This commit is contained in:
Dhanji R. Prasanna
2025-12-20 15:32:12 +11:00
parent ba8bd371fc
commit fbf31e5f68
5 changed files with 789 additions and 49 deletions

View File

@@ -3,12 +3,14 @@ pub mod error_handling;
pub mod feedback_extraction;
pub mod project;
pub mod retry;
pub mod session_continuation;
pub mod task_result;
pub mod ui_writer;
pub use task_result::TaskResult;
pub use retry::{RetryConfig, RetryResult, execute_with_retry, retry_operation};
pub use feedback_extraction::{ExtractedFeedback, FeedbackSource, FeedbackExtractionConfig, extract_coach_feedback};
pub use session_continuation::{SessionContinuation, load_continuation, save_continuation, clear_continuation, has_valid_continuation, get_session_dir, load_context_from_session_log};
// Export agent prompt generation for CLI use
pub use prompts::get_agent_system_prompt;
@@ -606,6 +608,23 @@ impl ContextWindow {
}
}
/// Clear the conversation history while preserving system messages
/// Used by /clear command to start fresh
pub fn clear_conversation(&mut self) {
// Keep only system messages (system prompt, README, etc.)
let system_messages: Vec<Message> = self.conversation_history
.iter()
.filter(|m| matches!(m.role, MessageRole::System))
.cloned()
.collect();
self.conversation_history = system_messages;
self.used_tokens = self.conversation_history.iter()
.map(|m| Self::estimate_tokens(&m.content))
.sum();
self.last_thinning_percentage = 0;
}
pub fn remaining_tokens(&self) -> u32 {
self.total_tokens.saturating_sub(self.used_tokens)
}
@@ -3074,6 +3093,133 @@ impl<W: UiWriter> Agent<W> {
self.requirements_sha = Some(sha);
}
/// Save a session continuation artifact
/// Called when final_output is invoked to enable session resumption
pub fn save_session_continuation(&self, final_output_summary: Option<String>) {
use crate::session_continuation::{save_continuation, SessionContinuation};
let session_id = match &self.session_id {
Some(id) => id.clone(),
None => {
debug!("No session ID, skipping continuation save");
return;
}
};
// Get the session log path
let logs_dir = get_logs_dir();
let session_log_path = logs_dir.join(format!("g3_session_{}.json", session_id));
// Get current TODO content
let todo_snapshot = std::fs::read_to_string(get_todo_path()).ok();
// Get working directory
let working_directory = std::env::current_dir()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_else(|_| ".".to_string());
let continuation = SessionContinuation::new(
session_id,
final_output_summary,
session_log_path.to_string_lossy().to_string(),
self.context_window.percentage_used(),
todo_snapshot,
working_directory,
);
if let Err(e) = save_continuation(&continuation) {
error!("Failed to save session continuation: {}", e);
} else {
debug!("Saved session continuation artifact");
}
}
/// Clear session state and continuation artifacts (for /clear command)
pub fn clear_session(&mut self) {
use crate::session_continuation::clear_continuation;
// Clear the context window (keep system prompt)
self.context_window.clear_conversation();
// Clear continuation artifacts
if let Err(e) = clear_continuation() {
error!("Failed to clear continuation artifacts: {}", e);
}
info!("Session cleared");
}
/// Restore session from a continuation artifact
/// Returns true if full context was restored, false if only summary was used
pub fn restore_from_continuation(
&mut self,
continuation: &crate::session_continuation::SessionContinuation,
) -> Result<bool> {
use std::path::PathBuf;
let session_log_path = PathBuf::from(&continuation.session_log_path);
// If context < 80%, try to restore full context
if continuation.can_restore_full_context() && session_log_path.exists() {
// Load the session log
let json = std::fs::read_to_string(&session_log_path)?;
let session_data: serde_json::Value = serde_json::from_str(&json)?;
// Extract conversation history
if let Some(context_window) = session_data.get("context_window") {
if let Some(history) = context_window.get("conversation_history") {
if let Some(messages) = history.as_array() {
// Clear current conversation (keep system messages)
self.context_window.clear_conversation();
// Restore messages from session log (skip system messages as they're preserved)
for msg in messages {
let role_str = msg.get("role").and_then(|r| r.as_str()).unwrap_or("user");
let content = msg.get("content").and_then(|c| c.as_str()).unwrap_or("");
let role = match role_str {
"system" => continue, // Skip system messages, already preserved
"assistant" => MessageRole::Assistant,
_ => MessageRole::User,
};
self.context_window.add_message(Message {
role,
id: String::new(),
content: content.to_string(),
cache_control: None,
});
}
info!("Restored full context from session log");
return Ok(true);
}
}
}
}
// Fall back to using final_output summary + TODO
let mut context_msg = String::new();
if let Some(ref summary) = continuation.final_output_summary {
context_msg.push_str(&format!("Previous session summary:\n{}\n\n", summary));
}
if let Some(ref todo) = continuation.todo_snapshot {
context_msg.push_str(&format!("Current TODO state:\n{}\n", todo));
}
if !context_msg.is_empty() {
self.context_window.add_message(Message {
role: MessageRole::User,
id: String::new(),
content: format!("[Session Resumed]\n\n{}", context_msg),
cache_control: None,
});
}
info!("Restored session from summary");
Ok(false)
}
async fn stream_completion(
&mut self,
request: CompletionRequest,
@@ -3731,8 +3877,7 @@ impl<W: UiWriter> Agent<W> {
let mut any_tool_executed = false; // Track if ANY tool was executed across all iterations
let mut auto_summary_attempts = 0; // Track auto-summary prompt attempts
const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 2; // Limit auto-summary retries
let mut last_action_was_tool = false; // Track if the last action was a tool call (vs text response)
let mut any_text_response = false; // Track if LLM ever provided a text response
let mut final_output_called = false; // Track if final_output was called
let mut executed_tools_in_session: std::collections::HashSet<String> = std::collections::HashSet::new(); // Track executed tools to prevent duplicates
// Check if we need to summarize before starting
@@ -4427,6 +4572,7 @@ impl<W: UiWriter> Agent<W> {
// Check if this was a final_output tool call
if tool_call.tool == "final_output" {
// Save context window BEFORE returning so the session log includes final_output
final_output_called = true;
self.save_context_window("completed");
// The summary was already displayed via print_final_output
@@ -4482,7 +4628,6 @@ impl<W: UiWriter> Agent<W> {
tool_executed = true;
any_tool_executed = true; // Track across all iterations
last_action_was_tool = true; // Last action was a tool call
// Add to executed tools set to prevent re-execution in this session
executed_tools_in_session.insert(tool_key.clone());
@@ -4533,8 +4678,6 @@ impl<W: UiWriter> Agent<W> {
self.ui_writer.print_agent_response(&filtered_content);
self.ui_writer.flush();
current_response.push_str(&filtered_content);
last_action_was_tool = false; // Text response received
any_text_response = true;
}
}
}
@@ -4790,50 +4933,56 @@ impl<W: UiWriter> Agent<W> {
let has_response = !current_response.is_empty() || !full_response.is_empty();
if !has_response {
if any_tool_executed && last_action_was_tool && !any_text_response {
// Only auto-prompt for summary if:
// 1. Tools were executed in previous iterations
// 2. The last action was a tool call (not a text response)
// 3. No text response was ever provided by the LLM
if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS {
// Auto-prompt for a summary by adding a follow-up message
auto_summary_attempts += 1;
warn!(
"LLM stopped without final response after executing tools ({} iterations, auto-summary attempt {})",
iteration_count, auto_summary_attempts
);
self.ui_writer.print_context_status(
"\n🔄 Model stopped without response. Auto-prompting for summary...\n"
);
// Add a follow-up message asking for summary
let summary_prompt = Message::new(
MessageRole::User,
"Please provide a brief summary of what was accomplished and any next steps.".to_string(),
);
self.context_window.add_message(summary_prompt);
request.messages = self.context_window.conversation_history.clone();
// Continue the loop to get the summary
continue;
} else {
// Max auto-summary attempts reached, give up gracefully
warn!(
"Max auto-summary attempts ({}) reached, returning without summary",
MAX_AUTO_SUMMARY_ATTEMPTS
);
self.ui_writer.print_agent_response(
"\n⚠️ The model stopped without providing a final response after multiple attempts.\n"
);
}
} else {
// Auto-continue if tools were executed but final_output was never called
// This is the simple rule: LLM must call final_output before returning control
if any_tool_executed && !final_output_called {
if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS {
auto_summary_attempts += 1;
warn!(
"Loop exited without any response after {} iterations",
iteration_count
"LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {})",
iteration_count, auto_summary_attempts
);
self.ui_writer.print_context_status(
"\n🔄 Model stopped without calling final_output. Auto-continuing...\n"
);
// Add any text response to context before prompting for continuation
if has_response {
let response_text = if !current_response.is_empty() {
current_response.clone()
} else {
full_response.clone()
};
if !response_text.trim().is_empty() {
let assistant_msg = Message::new(
MessageRole::Assistant,
response_text.trim().to_string(),
);
self.context_window.add_message(assistant_msg);
}
}
// Add a follow-up message asking for continuation
let continue_prompt = Message::new(
MessageRole::User,
"Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(),
);
self.context_window.add_message(continue_prompt);
request.messages = self.context_window.conversation_history.clone();
// Continue the loop
continue;
} else {
// Max attempts reached, give up gracefully
warn!(
"Max auto-continue attempts ({}) reached, returning without final_output",
MAX_AUTO_SUMMARY_ATTEMPTS
);
self.ui_writer.print_agent_response(
"\n⚠️ The model stopped without calling final_output after multiple attempts.\n"
);
}
} else {
} else if has_response {
// Only set full_response if it's empty (first iteration without tools)
// This prevents duplication when the agent responds without calling final_output
if full_response.is_empty() && !current_response.is_empty() {
@@ -5387,11 +5536,15 @@ impl<W: UiWriter> Agent<W> {
"final_output" => {
if let Some(summary) = tool_call.args.get("summary") {
if let Some(summary_str) = summary.as_str() {
// Save session continuation artifact
self.save_session_continuation(Some(summary_str.to_string()));
Ok(summary_str.to_string())
} else {
self.save_session_continuation(None);
Ok("✅ Turn completed".to_string())
}
} else {
self.save_session_continuation(None);
Ok("✅ Turn completed".to_string())
}
}

View File

@@ -0,0 +1,226 @@
//! Session continuation support for long-running interactive sessions.
//!
//! This module provides functionality to save and restore session state,
//! allowing users to resume work across multiple g3 invocations.
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use tracing::{debug, error, info, warn};
/// Version of the session continuation format
const CONTINUATION_VERSION: &str = "1.0";
/// Session continuation artifact containing all information needed to resume a session
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SessionContinuation {
/// Version of the continuation format
pub version: String,
/// Timestamp when the continuation was saved
pub created_at: String,
/// Original session ID
pub session_id: String,
/// The last final_output summary
pub final_output_summary: Option<String>,
/// Path to the full session log (g3_session_*.json)
pub session_log_path: String,
/// Context window usage percentage when saved
pub context_percentage: f32,
/// Snapshot of the TODO list content
pub todo_snapshot: Option<String>,
/// Working directory where the session was running
pub working_directory: String,
}
impl SessionContinuation {
/// Create a new session continuation artifact
pub fn new(
session_id: String,
final_output_summary: Option<String>,
session_log_path: String,
context_percentage: f32,
todo_snapshot: Option<String>,
working_directory: String,
) -> Self {
Self {
version: CONTINUATION_VERSION.to_string(),
created_at: chrono::Utc::now().to_rfc3339(),
session_id,
final_output_summary,
session_log_path,
context_percentage,
todo_snapshot,
working_directory,
}
}
/// Check if the context can be fully restored (< 80% used)
pub fn can_restore_full_context(&self) -> bool {
self.context_percentage < 80.0
}
}
/// Get the path to the .g3/session directory
pub fn get_session_dir() -> PathBuf {
let current_dir = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
current_dir.join(".g3").join("session")
}
/// Get the path to the latest.json continuation file
pub fn get_latest_continuation_path() -> PathBuf {
get_session_dir().join("latest.json")
}
/// Ensure the .g3/session directory exists
pub fn ensure_session_dir() -> Result<PathBuf> {
let session_dir = get_session_dir();
if !session_dir.exists() {
std::fs::create_dir_all(&session_dir)?;
debug!("Created session directory: {:?}", session_dir);
}
Ok(session_dir)
}
/// Save a session continuation artifact
pub fn save_continuation(continuation: &SessionContinuation) -> Result<PathBuf> {
let session_dir = ensure_session_dir()?;
let latest_path = session_dir.join("latest.json");
let json = serde_json::to_string_pretty(continuation)?;
std::fs::write(&latest_path, &json)?;
info!("Saved session continuation to {:?}", latest_path);
Ok(latest_path)
}
/// Load the latest session continuation artifact if it exists
pub fn load_continuation() -> Result<Option<SessionContinuation>> {
let latest_path = get_latest_continuation_path();
if !latest_path.exists() {
debug!("No continuation file found at {:?}", latest_path);
return Ok(None);
}
let json = std::fs::read_to_string(&latest_path)?;
let continuation: SessionContinuation = serde_json::from_str(&json)?;
// Validate version
if continuation.version != CONTINUATION_VERSION {
warn!(
"Continuation version mismatch: expected {}, got {}",
CONTINUATION_VERSION, continuation.version
);
}
info!("Loaded session continuation from {:?}", latest_path);
Ok(Some(continuation))
}
/// Clear all session continuation artifacts (for /clear command)
pub fn clear_continuation() -> Result<()> {
let session_dir = get_session_dir();
if session_dir.exists() {
// Remove all files in the session directory
for entry in std::fs::read_dir(&session_dir)? {
let entry = entry?;
let path = entry.path();
if path.is_file() {
std::fs::remove_file(&path)?;
debug!("Removed session file: {:?}", path);
}
}
info!("Cleared session continuation artifacts");
}
Ok(())
}
/// Check if a continuation exists and is valid
pub fn has_valid_continuation() -> bool {
match load_continuation() {
Ok(Some(continuation)) => {
// Check if the session log still exists
let session_log_path = PathBuf::from(&continuation.session_log_path);
if !session_log_path.exists() {
warn!("Session log no longer exists: {:?}", session_log_path);
return false;
}
// Check if we're in the same working directory
let current_dir = std::env::current_dir()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
if current_dir != continuation.working_directory {
debug!(
"Working directory changed: {} -> {}",
continuation.working_directory, current_dir
);
// Still valid, but user should be aware
}
true
}
Ok(None) => false,
Err(e) => {
error!("Error checking continuation: {}", e);
false
}
}
}
/// Load the full context window from a session log file
pub fn load_context_from_session_log(session_log_path: &Path) -> Result<Option<serde_json::Value>> {
if !session_log_path.exists() {
return Ok(None);
}
let json = std::fs::read_to_string(session_log_path)?;
let session_data: serde_json::Value = serde_json::from_str(&json)?;
Ok(Some(session_data))
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_session_continuation_creation() {
let continuation = SessionContinuation::new(
"test_session_123".to_string(),
Some("Task completed successfully".to_string()),
"/path/to/session.json".to_string(),
45.0,
Some("- [x] Task 1\n- [ ] Task 2".to_string()),
"/home/user/project".to_string(),
);
assert_eq!(continuation.version, CONTINUATION_VERSION);
assert_eq!(continuation.session_id, "test_session_123");
assert!(continuation.can_restore_full_context());
}
#[test]
fn test_can_restore_full_context() {
let mut continuation = SessionContinuation::new(
"test".to_string(),
None,
"path".to_string(),
50.0,
None,
".".to_string(),
);
assert!(continuation.can_restore_full_context()); // 50% < 80%
continuation.context_percentage = 80.0;
assert!(!continuation.can_restore_full_context()); // 80% >= 80%
continuation.context_percentage = 95.0;
assert!(!continuation.can_restore_full_context()); // 95% >= 80%
}
}