ensure system prompt is always added first

This commit is contained in:
Dhanji Prasanna
2025-11-20 08:45:03 +11:00
parent b6e226df67
commit 14c8d066c9

View File

@@ -4,7 +4,6 @@ pub mod project;
pub mod task_result; pub mod task_result;
pub mod ui_writer; pub mod ui_writer;
use std::process::exit;
pub use task_result::TaskResult; pub use task_result::TaskResult;
#[cfg(test)] #[cfg(test)]
@@ -951,7 +950,24 @@ impl<W: UiWriter> Agent<W> {
ui_writer.print_context_status(&format!("⚠️ {}", warning)); ui_writer.print_context_status(&format!("⚠️ {}", warning));
} }
// If README content is provided, add it as the first system message // Add system prompt as the FIRST message (before README)
// This ensures the agent always has proper tool usage instructions
let provider = providers.get(None)?;
let provider_has_native_tool_calling = provider.has_native_tool_calling();
let _ = provider; // Drop provider reference to avoid borrowing issues
let system_prompt = if provider_has_native_tool_calling {
// For native tool calling providers, use a more explicit system prompt
SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string()
} else {
// For non-native providers (embedded models), use JSON format instructions
SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE.to_string()
};
let system_message = Message::new(MessageRole::System, system_prompt);
context_window.add_message(system_message);
// If README content is provided, add it as a second system message (after the main system prompt)
if let Some(readme) = readme_content { if let Some(readme) = readme_content {
let readme_message = Message::new(MessageRole::System, readme); let readme_message = Message::new(MessageRole::System, readme);
context_window.add_message(readme_message); context_window.add_message(readme_message);
@@ -1013,6 +1029,35 @@ impl<W: UiWriter> Agent<W> {
}) })
} }
/// Validate that the system prompt is the first message in the conversation history.
/// This is a critical invariant that must be maintained for proper agent operation.
///
/// # Panics
/// Panics if:
/// - The conversation history is empty
/// - The first message is not a System message
/// - The first message doesn't contain the system prompt markers
fn validate_system_prompt_is_first(&self) {
if self.context_window.conversation_history.is_empty() {
panic!(
"FATAL: Conversation history is empty. System prompt must be the first message."
);
}
let first_message = &self.context_window.conversation_history[0];
if !matches!(first_message.role, MessageRole::System) {
panic!(
"FATAL: First message is not a System message. Found: {:?}",
first_message.role
);
}
if !first_message.content.contains("You are G3") {
panic!("FATAL: First system message does not contain the system prompt. This likely means the README was added before the system prompt.");
}
}
/// Convert cache config string to CacheControl enum /// Convert cache config string to CacheControl enum
fn parse_cache_control(cache_config: &str) -> Option<CacheControl> { fn parse_cache_control(cache_config: &str) -> Option<CacheControl> {
match cache_config { match cache_config {
@@ -1256,7 +1301,7 @@ impl<W: UiWriter> Agent<W> {
async fn execute_single_task( async fn execute_single_task(
&mut self, &mut self,
description: &str, description: &str,
show_prompt: bool, _show_prompt: bool,
_show_code: bool, _show_code: bool,
show_timing: bool, show_timing: bool,
cancellation_token: CancellationToken, cancellation_token: CancellationToken,
@@ -1264,50 +1309,15 @@ impl<W: UiWriter> Agent<W> {
// Reset the JSON tool call filter state at the start of each new task // Reset the JSON tool call filter state at the start of each new task
// This prevents the filter from staying in suppression mode between user interactions // This prevents the filter from staying in suppression mode between user interactions
fixed_filter_json::reset_fixed_json_tool_state(); fixed_filter_json::reset_fixed_json_tool_state();
// Validate that the system prompt is the first message (critical invariant)
self.validate_system_prompt_is_first();
// Generate session ID based on the initial prompt if this is a new session // Generate session ID based on the initial prompt if this is a new session
if self.session_id.is_none() { if self.session_id.is_none() {
self.session_id = Some(self.generate_session_id(description)); self.session_id = Some(self.generate_session_id(description));
} }
// Only add system message if this is the first interaction (empty conversation history)
if self.context_window.conversation_history.is_empty() {
let provider = self.providers.get(None)?;
let provider_has_native_tool_calling = provider.has_native_tool_calling();
let provider_name_for_system = provider.name().to_string();
drop(provider); // Drop provider reference to avoid borrowing issues
let system_prompt = if provider_has_native_tool_calling {
// For native tool calling providers, use a more explicit system prompt
SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string()
} else {
// For non-native providers (embedded models), use JSON format instructions
SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE.to_string()
};
if show_prompt {
self.ui_writer.print_system_prompt(&system_prompt);
}
// Add system message to context window
let system_message = {
// Check if we should use cache control for system message
if let Some(cache_config) = match provider_name_for_system.as_str() {
"anthropic" => self.config.providers.anthropic.as_ref()
.and_then(|c| c.cache_config.as_ref())
.and_then(|config| Self::parse_cache_control(config)),
_ => None,
} {
let provider = self.providers.get(None)?;
Message::with_cache_control_validated(MessageRole::System, system_prompt, cache_config, provider)
} else {
Message::new(MessageRole::System, system_prompt)
}
};
self.context_window.add_message(system_message);
}
// Add user message to context window // Add user message to context window
let user_message = Message::new(MessageRole::User, format!("Task: {}", description)); let user_message = Message::new(MessageRole::User, format!("Task: {}", description));
self.context_window.add_message(user_message); self.context_window.add_message(user_message);
@@ -1318,8 +1328,8 @@ impl<W: UiWriter> Agent<W> {
// Check if provider supports native tool calling and add tools if so // Check if provider supports native tool calling and add tools if so
let provider = self.providers.get(None)?; let provider = self.providers.get(None)?;
let provider_name = provider.name().to_string(); let provider_name = provider.name().to_string();
let has_native_tool_calling = provider.has_native_tool_calling(); let _has_native_tool_calling = provider.has_native_tool_calling();
let supports_cache_control = provider.supports_cache_control(); let _supports_cache_control = provider.supports_cache_control();
let tools = if provider.has_native_tool_calling() { let tools = if provider.has_native_tool_calling() {
Some(Self::create_tool_definitions( Some(Self::create_tool_definitions(
self.config.webdriver.enabled, self.config.webdriver.enabled,
@@ -1329,7 +1339,7 @@ impl<W: UiWriter> Agent<W> {
} else { } else {
None None
}; };
drop(provider); // Drop the provider reference to avoid borrowing issues let _ = provider; // Drop the provider reference to avoid borrowing issues
// Get max_tokens from provider configuration, falling back to sensible defaults // Get max_tokens from provider configuration, falling back to sensible defaults
let max_tokens = Some(self.resolve_max_tokens(&provider_name)); let max_tokens = Some(self.resolve_max_tokens(&provider_name));
@@ -1689,17 +1699,21 @@ impl<W: UiWriter> Agent<W> {
pub fn reload_readme(&mut self) -> Result<bool> { pub fn reload_readme(&mut self) -> Result<bool> {
info!("Manual README reload triggered"); info!("Manual README reload triggered");
// Check if the first message in conversation history is a system message with README content // Check if the second message in conversation history is a system message with README content
// (The first message should always be the system prompt)
let has_readme = self let has_readme = self
.context_window .context_window
.conversation_history .conversation_history
.first() .get(1) // Check the SECOND message (index 1)
.map(|m| { .map(|m| {
matches!(m.role, MessageRole::System) matches!(m.role, MessageRole::System)
&& (m.content.contains("Project README") && (m.content.contains("Project README")
|| m.content.contains("Agent Configuration")) || m.content.contains("Agent Configuration"))
}) })
.unwrap_or(false); .unwrap_or(false);
// Validate that the system prompt is still first
self.validate_system_prompt_is_first();
if !has_readme { if !has_readme {
return Ok(false); return Ok(false);
@@ -1723,8 +1737,8 @@ impl<W: UiWriter> Agent<W> {
} }
if found_any { if found_any {
// Replace the first message with the new content // Replace the second message (README) with the new content
if let Some(first_msg) = self.context_window.conversation_history.first_mut() { if let Some(first_msg) = self.context_window.conversation_history.get_mut(1) {
first_msg.content = combined_content; first_msg.content = combined_content;
info!("README content reloaded successfully"); info!("README content reloaded successfully");
Ok(true) Ok(true)
@@ -5473,6 +5487,16 @@ mod integration_tests {
// Implement Drop to clean up safaridriver process // Implement Drop to clean up safaridriver process
impl<W: UiWriter> Drop for Agent<W> { impl<W: UiWriter> Drop for Agent<W> {
fn drop(&mut self) { fn drop(&mut self) {
// Validate system prompt invariant on drop (agent exit)
// This catches any bugs where the conversation history was corrupted during execution
if !self.context_window.conversation_history.is_empty() {
if let Err(e) = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
self.validate_system_prompt_is_first();
})) {
eprintln!("\n⚠️ FATAL ERROR ON EXIT: System prompt validation failed: {:?}", e);
}
}
// Try to kill safaridriver process if it's still running // Try to kill safaridriver process if it's still running
// We need to use try_lock since we can't await in Drop // We need to use try_lock since we can't await in Drop
if let Ok(mut process_guard) = self.safaridriver_process.try_write() { if let Ok(mut process_guard) = self.safaridriver_process.try_write() {