Compare commits

...

13 Commits

Author SHA1 Message Date
Jochen
21af6ba574 fix temperature for summary request too. 2025-12-02 21:20:16 +11:00
Jochen
ae16243f49 Fix temperature param + add thinking for anthropic
The temperature param was not passed to the llm.
Now support anthropic models in 'thinking' mode.
2025-12-02 17:24:55 +11:00
Dhanji R. Prasanna
9ee0468b87 test for system message 2025-12-02 14:45:12 +11:00
Dhanji R. Prasanna
d9ad244197 add markdown format only to final_output and fix todo duplication 2025-12-02 14:26:22 +11:00
Dhanji R. Prasanna
a6537e4dba todo_write outputs entire list 2025-12-02 13:48:05 +11:00
Dhanji R. Prasanna
df3f25f2f0 test for resume unfinished todos 2025-12-02 11:07:13 +11:00
Dhanji R. Prasanna
f8f989d4c6 resume unfinished TODOs 2025-12-02 11:06:58 +11:00
Dhanji R. Prasanna
0e4c935a70 clean up TODO output 2025-12-02 06:48:58 +11:00
Dhanji R. Prasanna
1b4ea93ba4 token counting bugfix 2025-12-01 14:52:10 +11:00
Dhanji R. Prasanna
4496eee046 fix compaction to restore system message 2025-12-01 14:38:21 +11:00
Dhanji R. Prasanna
8928fb92be append instead of replace system msg 2025-11-29 16:13:00 +11:00
Dhanji R. Prasanna
81fd2ab92f unused var 2025-11-29 15:44:30 +11:00
Jochen
af7fb8f7f1 Merge pull request #38 from dhanji/jochen-debug-with-ids
dumps context window for monitoring sizes, also add message id for internal debugging
2025-11-28 16:43:26 +11:00
12 changed files with 921 additions and 216 deletions

View File

@@ -24,6 +24,8 @@ temperature = 0.3 # Slightly higher temperature for more creative implementatio
# Options: "ephemeral", "5minute", "1hour" # Options: "ephemeral", "5minute", "1hour"
# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs. # Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
# enable_1m_context = true # optional, more expensive # enable_1m_context = true # optional, more expensive
# thinking_budget_tokens = 10000 # Optional: Enable extended thinking mode with token budget
# Allows the model to "think" before responding. Useful for complex reasoning tasks.
# Multiple OpenAI-compatible providers can be configured with custom names # Multiple OpenAI-compatible providers can be configured with custom names

View File

@@ -105,4 +105,9 @@ impl UiWriter for MachineUiWriter {
// Default to first option (index 0) for automation // Default to first option (index 0) for automation
0 0
} }
fn print_final_output(&self, summary: &str) {
println!("FINAL_OUTPUT:");
println!("{}", summary);
}
} }

View File

@@ -1,78 +1,22 @@
use g3_core::ui_writer::UiWriter; use g3_core::ui_writer::UiWriter;
use std::io::{self, Write}; use std::io::{self, Write};
use std::sync::Mutex; use termimad::MadSkin;
/// Console implementation of UiWriter that prints to stdout /// Console implementation of UiWriter that prints to stdout
pub struct ConsoleUiWriter { pub struct ConsoleUiWriter {
current_tool_name: Mutex<Option<String>>, current_tool_name: std::sync::Mutex<Option<String>>,
current_tool_args: Mutex<Vec<(String, String)>>, current_tool_args: std::sync::Mutex<Vec<(String, String)>>,
current_output_line: Mutex<Option<String>>, current_output_line: std::sync::Mutex<Option<String>>,
output_line_printed: Mutex<bool>, output_line_printed: std::sync::Mutex<bool>,
in_todo_tool: Mutex<bool>,
} }
impl ConsoleUiWriter { impl ConsoleUiWriter {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
current_tool_name: Mutex::new(None), current_tool_name: std::sync::Mutex::new(None),
current_tool_args: Mutex::new(Vec::new()), current_tool_args: std::sync::Mutex::new(Vec::new()),
current_output_line: Mutex::new(None), current_output_line: std::sync::Mutex::new(None),
output_line_printed: Mutex::new(false), output_line_printed: std::sync::Mutex::new(false),
in_todo_tool: Mutex::new(false),
}
}
fn print_todo_line(&self, line: &str) {
// Transform and print todo list lines elegantly
let trimmed = line.trim();
// Skip the "📝 TODO list:" prefix line
if trimmed.starts_with("📝 TODO list:") || trimmed == "📝 TODO list is empty" {
return;
}
// Handle empty lines
if trimmed.is_empty() {
println!();
return;
}
// Detect indentation level
let indent_count = line.chars().take_while(|c| c.is_whitespace()).count();
let indent = " ".repeat(indent_count / 2); // Convert spaces to visual indent
// Format based on line type
if trimmed.starts_with("- [ ]") {
// Incomplete task
let task = trimmed.strip_prefix("- [ ]").unwrap_or(trimmed).trim();
println!("{}{}", indent, task);
} else if trimmed.starts_with("- [x]") || trimmed.starts_with("- [X]") {
// Completed task
let task = trimmed
.strip_prefix("- [x]")
.or_else(|| trimmed.strip_prefix("- [X]"))
.unwrap_or(trimmed)
.trim();
println!("{}\x1b[2m☑ {}\x1b[0m", indent, task);
} else if trimmed.starts_with("- ") {
// Regular bullet point
let item = trimmed.strip_prefix("- ").unwrap_or(trimmed).trim();
println!("{}{}", indent, item);
} else if trimmed.starts_with("# ") {
// Heading
let heading = trimmed.strip_prefix("# ").unwrap_or(trimmed).trim();
println!("\n\x1b[1m{}\x1b[0m", heading);
} else if trimmed.starts_with("## ") {
// Subheading
let subheading = trimmed.strip_prefix("## ").unwrap_or(trimmed).trim();
println!("\n\x1b[1m{}\x1b[0m", subheading);
} else if trimmed.starts_with("**") && trimmed.ends_with("**") {
// Bold text (section marker)
let text = trimmed.trim_start_matches("**").trim_end_matches("**");
println!("{}\x1b[1m{}\x1b[0m", indent, text);
} else {
// Regular text or note
println!("{}{}", indent, trimmed);
} }
} }
} }
@@ -138,13 +82,6 @@ impl UiWriter for ConsoleUiWriter {
// Store the tool name and clear args for collection // Store the tool name and clear args for collection
*self.current_tool_name.lock().unwrap() = Some(tool_name.to_string()); *self.current_tool_name.lock().unwrap() = Some(tool_name.to_string());
self.current_tool_args.lock().unwrap().clear(); self.current_tool_args.lock().unwrap().clear();
// Check if this is a todo tool call
let is_todo = tool_name == "todo_read" || tool_name == "todo_write";
*self.in_todo_tool.lock().unwrap() = is_todo;
// For todo tools, we'll skip the normal header and print a custom one later
if is_todo {}
} }
fn print_tool_arg(&self, key: &str, value: &str) { fn print_tool_arg(&self, key: &str, value: &str) {
@@ -167,13 +104,10 @@ impl UiWriter for ConsoleUiWriter {
} }
fn print_tool_output_header(&self) { fn print_tool_output_header(&self) {
// Skip normal header for todo tools
if *self.in_todo_tool.lock().unwrap() {
println!(); // Just add a newline
return;
}
println!(); println!();
// Reset output_line_printed at the start of a new tool output
// This ensures the header isn't cleared by update_tool_output_line
*self.output_line_printed.lock().unwrap() = false;
// Now print the tool header with the most important arg in bold green // Now print the tool header with the most important arg in bold green
if let Some(tool_name) = self.current_tool_name.lock().unwrap().as_ref() { if let Some(tool_name) = self.current_tool_name.lock().unwrap().as_ref() {
let args = self.current_tool_args.lock().unwrap(); let args = self.current_tool_args.lock().unwrap();
@@ -259,21 +193,14 @@ impl UiWriter for ConsoleUiWriter {
} }
fn print_tool_output_line(&self, line: &str) { fn print_tool_output_line(&self, line: &str) {
// Special handling for todo tools // Skip the TODO list header line
if *self.in_todo_tool.lock().unwrap() { if line.starts_with("📝 TODO list:") {
self.print_todo_line(line);
return; return;
} }
println!("\x1b[2m{}\x1b[0m", line); println!("\x1b[2m{}\x1b[0m", line);
} }
fn print_tool_output_summary(&self, count: usize) { fn print_tool_output_summary(&self, count: usize) {
// Skip for todo tools
if *self.in_todo_tool.lock().unwrap() {
return;
}
println!( println!(
"\x1b[2m({} line{})\x1b[0m", "\x1b[2m({} line{})\x1b[0m",
count, count,
@@ -282,13 +209,6 @@ impl UiWriter for ConsoleUiWriter {
} }
fn print_tool_timing(&self, duration_str: &str) { fn print_tool_timing(&self, duration_str: &str) {
// For todo tools, just print a simple completion message
if *self.in_todo_tool.lock().unwrap() {
println!();
*self.in_todo_tool.lock().unwrap() = false;
return;
}
// Parse the duration string to determine color // Parse the duration string to determine color
// Format is like "1.5s", "500ms", "2m 30.0s" // Format is like "1.5s", "500ms", "2m 30.0s"
let color_code = if duration_str.ends_with("ms") { let color_code = if duration_str.ends_with("ms") {
@@ -390,4 +310,44 @@ impl UiWriter for ConsoleUiWriter {
let _ = io::stdout().flush(); let _ = io::stdout().flush();
} }
} }
fn print_final_output(&self, summary: &str) {
// Show spinner while "formatting"
let spinner_frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
let message = "summarizing work done...";
// Brief spinner animation (about 0.5 seconds)
for i in 0..5 {
let frame = spinner_frames[i % spinner_frames.len()];
print!("\r\x1b[36m{} {}\x1b[0m", frame, message);
let _ = io::stdout().flush();
std::thread::sleep(std::time::Duration::from_millis(100));
}
// Clear the spinner line
print!("\r\x1b[2K");
let _ = io::stdout().flush();
// Create a styled markdown skin
let mut skin = MadSkin::default();
// Customize colors for better terminal appearance
skin.bold.set_fg(termimad::crossterm::style::Color::Green);
skin.italic.set_fg(termimad::crossterm::style::Color::Cyan);
skin.headers[0].set_fg(termimad::crossterm::style::Color::Magenta);
skin.headers[1].set_fg(termimad::crossterm::style::Color::Magenta);
skin.code_block.set_fg(termimad::crossterm::style::Color::Yellow);
skin.inline_code.set_fg(termimad::crossterm::style::Color::Yellow);
// Print a header separator
println!("\x1b[1;35m━━━ Summary ━━━\x1b[0m");
println!();
// Render the markdown
let rendered = skin.term_text(summary);
print!("{}", rendered);
// Print a footer separator
println!();
println!("\x1b[1;35m━━━━━━━━━━━━━━━\x1b[0m");
}
} }

View File

@@ -42,6 +42,7 @@ pub struct AnthropicConfig {
pub temperature: Option<f32>, pub temperature: Option<f32>,
pub cache_config: Option<String>, // "ephemeral", "5minute", "1hour", or None to disable pub cache_config: Option<String>, // "ephemeral", "5minute", "1hour", or None to disable
pub enable_1m_context: Option<bool>, // Enable 1m context window (costs extra) pub enable_1m_context: Option<bool>, // Enable 1m context window (costs extra)
pub thinking_budget_tokens: Option<u32>, // Budget tokens for extended thinking
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -340,14 +340,18 @@ impl ContextWindow {
} }
/// Update token usage from provider response /// Update token usage from provider response
/// NOTE: This only updates cumulative_tokens (total API usage tracking).
/// It does NOT update used_tokens because:
/// 1. prompt_tokens represents the ENTIRE context sent to API (already tracked via add_message)
/// 2. completion_tokens will be tracked when the assistant message is added via add_message
/// Adding total_tokens here would cause double/triple counting and break the 80% threshold check.
pub fn update_usage_from_response(&mut self, usage: &g3_providers::Usage) { pub fn update_usage_from_response(&mut self, usage: &g3_providers::Usage) {
// Add the tokens from this response to our running total // Only update cumulative tokens for API usage tracking
// The usage.total_tokens represents tokens used in this single API call // Do NOT update used_tokens - that's tracked via add_message to avoid double counting
self.used_tokens += usage.total_tokens;
self.cumulative_tokens += usage.total_tokens; self.cumulative_tokens += usage.total_tokens;
debug!( debug!(
"Added {} tokens from provider response (used: {}/{}, cumulative: {})", "Updated cumulative tokens: {} (used: {}/{}, cumulative: {})",
usage.total_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens usage.total_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens
); );
} }
@@ -371,12 +375,14 @@ impl ContextWindow {
self.update_usage_from_response(usage); self.update_usage_from_response(usage);
} }
/// Update cumulative token usage (for streaming) /// Update cumulative token usage (for streaming) when no provider usage data is available
/// NOTE: This only updates cumulative_tokens, not used_tokens.
/// The assistant message will be added via add_message which tracks used_tokens.
pub fn add_streaming_tokens(&mut self, new_tokens: u32) { pub fn add_streaming_tokens(&mut self, new_tokens: u32) {
self.used_tokens += new_tokens; // Only update cumulative tokens - used_tokens is tracked via add_message
self.cumulative_tokens += new_tokens; self.cumulative_tokens += new_tokens;
debug!( debug!(
"Added {} streaming tokens (used: {}/{}, cumulative: {})", "Updated cumulative streaming tokens: {} (used: {}/{}, cumulative: {})",
new_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens new_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens
); );
} }
@@ -421,6 +427,7 @@ Format this as a detailed but concise summary that can be used to resume the con
} }
/// Reset the context window with a summary /// Reset the context window with a summary
/// Preserves the original system prompt as the first message
pub fn reset_with_summary( pub fn reset_with_summary(
&mut self, &mut self,
summary: String, summary: String,
@@ -433,10 +440,31 @@ Format this as a detailed but concise summary that can be used to resume the con
.map(|m| m.content.len()) .map(|m| m.content.len())
.sum(); .sum();
// Preserve the original system prompt (first message) and optionally the README (second message)
let original_system_prompt = self.conversation_history.first().cloned();
let readme_message = self.conversation_history.get(1).and_then(|msg| {
if matches!(msg.role, MessageRole::System) &&
(msg.content.contains("Project README") || msg.content.contains("Agent Configuration")) {
Some(msg.clone())
} else {
None
}
});
// Clear the conversation history // Clear the conversation history
self.conversation_history.clear(); self.conversation_history.clear();
self.used_tokens = 0; self.used_tokens = 0;
// Re-add the original system prompt first (critical invariant)
if let Some(system_prompt) = original_system_prompt {
self.add_message(system_prompt);
}
// Re-add the README message if it existed
if let Some(readme) = readme_message {
self.add_message(readme);
}
// Add the summary as a system message // Add the summary as a system message
let summary_message = Message::new( let summary_message = Message::new(
MessageRole::System, MessageRole::System,
@@ -922,6 +950,7 @@ impl<W: UiWriter> Agent<W> {
anthropic_config.temperature, anthropic_config.temperature,
anthropic_config.cache_config.clone(), anthropic_config.cache_config.clone(),
anthropic_config.enable_1m_context, anthropic_config.enable_1m_context,
anthropic_config.thinking_budget_tokens,
)?; )?;
providers.register(anthropic_provider); providers.register(anthropic_provider);
} }
@@ -996,6 +1025,28 @@ impl<W: UiWriter> Agent<W> {
context_window.add_message(readme_message); context_window.add_message(readme_message);
} }
// Load existing TODO list if present (after system prompt and README)
let todo_path = std::env::current_dir().ok().map(|p| p.join("todo.g3.md"));
let initial_todo_content = if let Some(ref path) = todo_path {
if path.exists() {
std::fs::read_to_string(path).ok()
} else {
None
}
} else {
None
};
if let Some(ref todo_content) = initial_todo_content {
if !todo_content.trim().is_empty() {
let todo_message = Message::new(
MessageRole::System,
format!("📋 Existing TODO list (from todo.g3.md):\n\n{}", todo_content),
);
context_window.add_message(todo_message);
}
}
// Initialize computer controller if enabled // Initialize computer controller if enabled
let computer_controller = if config.computer_control.enabled { let computer_controller = if config.computer_control.enabled {
match g3_computer_control::create_controller() { match g3_computer_control::create_controller() {
@@ -1117,6 +1168,17 @@ impl<W: UiWriter> Agent<W> {
} }
} }
/// Get the configured temperature for a provider from top-level config
fn provider_temperature(config: &Config, provider_name: &str) -> Option<f32> {
match provider_name {
"anthropic" => config.providers.anthropic.as_ref()?.temperature,
"openai" => config.providers.openai.as_ref()?.temperature,
"databricks" => config.providers.databricks.as_ref()?.temperature,
"embedded" => config.providers.embedded.as_ref()?.temperature,
_ => None,
}
}
/// Resolve the max_tokens to use for a given provider, applying fallbacks /// Resolve the max_tokens to use for a given provider, applying fallbacks
fn resolve_max_tokens(&self, provider_name: &str) -> u32 { fn resolve_max_tokens(&self, provider_name: &str) -> u32 {
match provider_name { match provider_name {
@@ -1129,6 +1191,16 @@ impl<W: UiWriter> Agent<W> {
} }
} }
/// Resolve the temperature to use for a given provider, applying fallbacks
fn resolve_temperature(&self, provider_name: &str) -> f32 {
match provider_name {
"databricks" => Self::provider_temperature(&self.config, "databricks")
.unwrap_or(0.1),
other => Self::provider_temperature(&self.config, other)
.unwrap_or(0.1),
}
}
/// Print provider diagnostics through the UiWriter for visibility /// Print provider diagnostics through the UiWriter for visibility
pub fn print_provider_banner(&self, role_label: &str) { pub fn print_provider_banner(&self, role_label: &str) {
if let Ok((provider_name, model)) = self.get_provider_info() { if let Ok((provider_name, model)) = self.get_provider_info() {
@@ -1512,7 +1584,7 @@ impl<W: UiWriter> Agent<W> {
let request = CompletionRequest { let request = CompletionRequest {
messages, messages,
max_tokens, max_tokens,
temperature: Some(0.1), temperature: Some(self.resolve_temperature(&provider_name)),
stream: true, // Enable streaming stream: true, // Enable streaming
tools, tools,
}; };
@@ -1720,7 +1792,7 @@ impl<W: UiWriter> Agent<W> {
let mut summary_lines = Vec::new(); let mut summary_lines = Vec::new();
for message in &self.context_window.conversation_history { for message in &self.context_window.conversation_history {
let timestamp = chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(); let _timestamp = chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
// Estimate tokens for this message // Estimate tokens for this message
let message_tokens = ContextWindow::estimate_tokens(&message.content); let message_tokens = ContextWindow::estimate_tokens(&message.content);
@@ -1972,7 +2044,7 @@ impl<W: UiWriter> Agent<W> {
let summary_request = CompletionRequest { let summary_request = CompletionRequest {
messages: summary_messages, messages: summary_messages,
max_tokens: summary_max_tokens, max_tokens: summary_max_tokens,
temperature: Some(0.3), temperature: Some(self.resolve_temperature(provider.name())),
stream: false, stream: false,
tools: None, tools: None,
}; };
@@ -3030,7 +3102,7 @@ impl<W: UiWriter> Agent<W> {
let summary_request = CompletionRequest { let summary_request = CompletionRequest {
messages: summary_messages, messages: summary_messages,
max_tokens: summary_max_tokens, max_tokens: summary_max_tokens,
temperature: Some(0.3), // Lower temperature for factual summary temperature: Some(self.resolve_temperature(provider.name())),
stream: false, stream: false,
tools: None, tools: None,
}; };
@@ -3479,11 +3551,9 @@ impl<W: UiWriter> Agent<W> {
// Display tool execution result with proper indentation // Display tool execution result with proper indentation
if tool_call.tool == "final_output" { if tool_call.tool == "final_output" {
// For final_output, display the summary without truncation // For final_output, use the dedicated method that renders markdown
for line in tool_result.lines() { // with a spinner animation
self.ui_writer.update_tool_output_line(line); self.ui_writer.print_final_output(&tool_result);
}
self.ui_writer.println("");
} else { } else {
let output_lines: Vec<&str> = tool_result.lines().collect(); let output_lines: Vec<&str> = tool_result.lines().collect();
@@ -3513,44 +3583,32 @@ impl<W: UiWriter> Agent<W> {
const MAX_LINE_WIDTH: usize = 80; const MAX_LINE_WIDTH: usize = 80;
let output_len = output_lines.len(); let output_len = output_lines.len();
// For todo tools, show all lines without truncation // Skip printing for todo tools - they already print their content
let is_todo_tool = let is_todo_tool =
tool_call.tool == "todo_read" || tool_call.tool == "todo_write"; tool_call.tool == "todo_read" || tool_call.tool == "todo_write";
let max_lines_to_show = if is_todo_tool || wants_full {
output_len if !is_todo_tool {
} else { let max_lines_to_show = if wants_full { output_len } else { MAX_LINES };
MAX_LINES
};
for (idx, line) in output_lines.iter().enumerate() { for (idx, line) in output_lines.iter().enumerate() {
if !is_todo_tool && !wants_full && idx >= max_lines_to_show { if !wants_full && idx >= max_lines_to_show {
break; break;
} }
// Clip line to max width (but not for todo tools) let clipped_line = truncate_line(line, MAX_LINE_WIDTH, !wants_full);
let clipped_line = truncate_line(
line,
MAX_LINE_WIDTH,
!wants_full && !is_todo_tool,
);
// Use print_tool_output_line for todo tools to get special formatting
if is_todo_tool {
self.ui_writer.print_tool_output_line(&clipped_line);
} else {
self.ui_writer.update_tool_output_line(&clipped_line); self.ui_writer.update_tool_output_line(&clipped_line);
} }
}
if !is_todo_tool && !wants_full && output_len > MAX_LINES { if !wants_full && output_len > MAX_LINES {
self.ui_writer.print_tool_output_summary(output_len); self.ui_writer.print_tool_output_summary(output_len);
} }
} }
}
// Check if this was a final_output tool call // Check if this was a final_output tool call
if tool_call.tool == "final_output" { if tool_call.tool == "final_output" {
// The summary was displayed above when we printed the tool result // The summary was already displayed via print_final_output
// Add it to full_response so it's included in the TaskResult // Don't add it to full_response to avoid duplicate printing
full_response.push_str(&tool_result); // full_response is intentionally left empty/unchanged
self.ui_writer.println(""); self.ui_writer.println("");
let _ttft = let _ttft =
first_token_time.unwrap_or_else(|| stream_start.elapsed()); first_token_time.unwrap_or_else(|| stream_start.elapsed());
@@ -3558,13 +3616,13 @@ impl<W: UiWriter> Agent<W> {
// Add timing if needed // Add timing if needed
let final_response = if show_timing { let final_response = if show_timing {
format!( format!(
"{}\n\n🕝 {} | 💭 {}", "🕝 {} | 💭 {}",
full_response,
Self::format_duration(stream_start.elapsed()), Self::format_duration(stream_start.elapsed()),
Self::format_duration(_ttft) Self::format_duration(_ttft)
) )
} else { } else {
full_response // Return empty string since content was already displayed
String::new()
}; };
return Ok(TaskResult::new( return Ok(TaskResult::new(
@@ -4685,6 +4743,11 @@ impl<W: UiWriter> Agent<W> {
if content.trim().is_empty() { if content.trim().is_empty() {
Ok("📝 TODO list is empty".to_string()) Ok("📝 TODO list is empty".to_string())
} else { } else {
// Print the TODO content to the console
self.ui_writer.print_context_status("📝 TODO list:");
for line in content.lines() {
self.ui_writer.print_tool_output_line(line);
}
Ok(format!("📝 TODO list:\n{}", content)) Ok(format!("📝 TODO list:\n{}", content))
} }
} }
@@ -4709,6 +4772,27 @@ impl<W: UiWriter> Agent<W> {
)); ));
} }
// Check if all todos are completed (all checkboxes are checked)
let has_incomplete = content_str.lines().any(|line| {
let trimmed = line.trim();
trimmed.starts_with("- [ ]")
});
// If all todos are complete, delete the file instead of writing
if !has_incomplete && (content_str.contains("- [x]") || content_str.contains("- [X]")) {
let todo_path = std::env::current_dir()?.join("todo.g3.md");
if todo_path.exists() {
match std::fs::remove_file(&todo_path) {
Ok(_) => {
let mut todo = self.todo_content.write().await;
*todo = String::new();
return Ok("✅ All TODOs completed! Removed todo.g3.md".to_string());
}
Err(e) => return Ok(format!("❌ Failed to remove todo.g3.md: {}", e)),
}
}
}
// Write to todo.g3.md file in current workspace directory // Write to todo.g3.md file in current workspace directory
let todo_path = std::env::current_dir()?.join("todo.g3.md"); let todo_path = std::env::current_dir()?.join("todo.g3.md");
@@ -4717,10 +4801,15 @@ impl<W: UiWriter> Agent<W> {
// Also update in-memory content to stay in sync // Also update in-memory content to stay in sync
let mut todo = self.todo_content.write().await; let mut todo = self.todo_content.write().await;
*todo = content_str.to_string(); *todo = content_str.to_string();
Ok(format!( // Print the TODO content to the console
"✅ TODO list updated ({} chars) and saved to todo.g3.md", self.ui_writer.print_context_status(&format!(
"✅ TODO list updated ({} chars) and saved to todo.g3.md:",
char_count char_count
)) ));
for line in content_str.lines() {
self.ui_writer.print_tool_output_line(line);
}
Ok(format!("✅ TODO list updated ({} chars) and saved to todo.g3.md:\n{}", char_count, content_str))
} }
Err(e) => Ok(format!("❌ Failed to write todo.g3.md: {}", e)), Err(e) => Ok(format!("❌ Failed to write todo.g3.md: {}", e)),
} }

View File

@@ -65,6 +65,10 @@ pub trait UiWriter: Send + Sync {
/// Prompt the user to choose from a list of options /// Prompt the user to choose from a list of options
/// Returns the index of the selected option /// Returns the index of the selected option
fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize; fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize;
/// Print the final output summary with markdown formatting
/// Shows a spinner while formatting, then renders the markdown
fn print_final_output(&self, summary: &str);
} }
/// A no-op implementation for when UI output is not needed /// A no-op implementation for when UI output is not needed
@@ -97,4 +101,7 @@ impl UiWriter for NullUiWriter {
fn prompt_user_choice(&self, _message: &str, _options: &[&str]) -> usize { fn prompt_user_choice(&self, _message: &str, _options: &[&str]) -> usize {
0 0
} }
fn print_final_output(&self, _summary: &str) {
// No-op for null writer
}
} }

View File

@@ -0,0 +1,159 @@
//! Tests for reset_with_summary to ensure system prompt is preserved after compaction
use g3_core::ContextWindow;
use g3_providers::{Message, MessageRole};
/// Test that reset_with_summary preserves the original system prompt
#[test]
fn test_reset_with_summary_preserves_system_prompt() {
let mut context = ContextWindow::new(10000);
// Add the system prompt as the first message (simulating agent initialization)
let system_prompt = "You are G3, an AI programming agent...";
context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
// Add some conversation history
context.add_message(Message::new(MessageRole::User, "Task: Write a function".to_string()));
context.add_message(Message::new(MessageRole::Assistant, "I'll help you write that function.".to_string()));
context.add_message(Message::new(MessageRole::User, "Thanks, now add tests".to_string()));
context.add_message(Message::new(MessageRole::Assistant, "Here are the tests.".to_string()));
// Verify we have 5 messages before reset
assert_eq!(context.conversation_history.len(), 5);
// Reset with summary
let summary = "We discussed writing a function and adding tests.".to_string();
let latest_user_msg = Some("Continue with the implementation".to_string());
context.reset_with_summary(summary, latest_user_msg);
// Verify the first message is still the system prompt
assert!(!context.conversation_history.is_empty(), "Conversation history should not be empty");
let first_message = &context.conversation_history[0];
assert!(
matches!(first_message.role, MessageRole::System),
"First message should be a System message, got {:?}",
first_message.role
);
assert!(
first_message.content.contains("You are G3"),
"First message should contain the system prompt 'You are G3', got: {}",
&first_message.content[..first_message.content.len().min(100)]
);
// Verify the summary was added as a separate system message
let has_summary = context.conversation_history.iter().any(|m| {
matches!(m.role, MessageRole::System) && m.content.contains("Previous conversation summary")
});
assert!(has_summary, "Should have a summary message");
// Verify the latest user message was added
let has_user_msg = context.conversation_history.iter().any(|m| {
matches!(m.role, MessageRole::User) && m.content.contains("Continue with the implementation")
});
assert!(has_user_msg, "Should have the latest user message");
}
/// Test that reset_with_summary preserves README message if present
#[test]
fn test_reset_with_summary_preserves_readme() {
let mut context = ContextWindow::new(10000);
// Add the system prompt as the first message
let system_prompt = "You are G3, an AI programming agent...";
context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
// Add README as second system message
let readme_content = "# Project README\n\nThis is a test project.";
context.add_message(Message::new(MessageRole::System, readme_content.to_string()));
// Add some conversation history
context.add_message(Message::new(MessageRole::User, "Task: Write a function".to_string()));
context.add_message(Message::new(MessageRole::Assistant, "Done.".to_string()));
// Verify we have 4 messages before reset
assert_eq!(context.conversation_history.len(), 4);
// Reset with summary
let summary = "We wrote a function.".to_string();
context.reset_with_summary(summary, None);
// Verify the first message is still the system prompt
let first_message = &context.conversation_history[0];
assert!(
first_message.content.contains("You are G3"),
"First message should be the system prompt"
);
// Verify the README was preserved as the second message
let second_message = &context.conversation_history[1];
assert!(
matches!(second_message.role, MessageRole::System),
"Second message should be a System message"
);
assert!(
second_message.content.contains("Project README"),
"Second message should be the README"
);
}
/// Test that reset_with_summary works when there's no README
#[test]
fn test_reset_with_summary_without_readme() {
let mut context = ContextWindow::new(10000);
// Add only the system prompt (no README)
let system_prompt = "You are G3, an AI programming agent...";
context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
// Add conversation without README
context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
// Reset with summary
let summary = "Greeted the user.".to_string();
context.reset_with_summary(summary, None);
// Verify the first message is still the system prompt
let first_message = &context.conversation_history[0];
assert!(
first_message.content.contains("You are G3"),
"First message should be the system prompt"
);
// Verify we have system prompt + summary (no README)
// The second message should be the summary, not a README
let second_message = &context.conversation_history[1];
assert!(
second_message.content.contains("Previous conversation summary"),
"Second message should be the summary when no README exists"
);
}
/// Test that reset_with_summary handles Agent Configuration in addition to README
#[test]
fn test_reset_with_summary_preserves_agent_configuration() {
let mut context = ContextWindow::new(10000);
// Add the system prompt as the first message
let system_prompt = "You are G3, an AI programming agent...";
context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
// Add Agent Configuration as second system message
let agents_content = "# Agent Configuration\n\nSpecial instructions for this project.";
context.add_message(Message::new(MessageRole::System, agents_content.to_string()));
// Add some conversation history
context.add_message(Message::new(MessageRole::User, "Task: Do something".to_string()));
// Reset with summary
let summary = "Did something.".to_string();
context.reset_with_summary(summary, None);
// Verify the Agent Configuration was preserved
let second_message = &context.conversation_history[1];
assert!(
second_message.content.contains("Agent Configuration"),
"Second message should be the Agent Configuration"
);
}

View File

@@ -0,0 +1,263 @@
//! Tests for verifying system message loading with README content
//!
//! This test verifies that when a README is present, the system message
//! is correctly loaded and structured in the context window.
use g3_core::ContextWindow;
use g3_providers::{Message, MessageRole};
/// Test that system prompt is always the first message
#[test]
fn test_system_prompt_is_first_message() {
let mut context = ContextWindow::new(10000);
// Simulate agent initialization: system prompt first
let system_prompt = "You are G3, an AI programming agent of the same skill level...";
context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
// Verify the first message is the system prompt
assert!(!context.conversation_history.is_empty());
let first_message = &context.conversation_history[0];
assert!(
matches!(first_message.role, MessageRole::System),
"First message should be a System message"
);
assert!(
first_message.content.contains("You are G3"),
"First message should contain the system prompt"
);
}
/// Test that README is added as the second system message after the system prompt
#[test]
fn test_readme_is_second_message_after_system_prompt() {
let mut context = ContextWindow::new(10000);
// Simulate agent initialization: system prompt first
let system_prompt = "You are G3, an AI programming agent of the same skill level...";
context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
// Add README as second system message (simulating what Agent::new_with_readme does)
let readme_content = "📚 Project README (from README.md):\n\n# My Project\n\nThis is a test project.";
context.add_message(Message::new(MessageRole::System, readme_content.to_string()));
// Verify we have 2 messages
assert_eq!(context.conversation_history.len(), 2);
// Verify the first message is the system prompt
let first_message = &context.conversation_history[0];
assert!(
matches!(first_message.role, MessageRole::System),
"First message should be a System message"
);
assert!(
first_message.content.contains("You are G3"),
"First message should contain the system prompt"
);
// Verify the second message is the README
let second_message = &context.conversation_history[1];
assert!(
matches!(second_message.role, MessageRole::System),
"Second message should be a System message"
);
assert!(
second_message.content.contains("Project README"),
"Second message should contain the README content"
);
assert!(
second_message.content.contains("My Project"),
"Second message should contain the actual README content"
);
}
/// Test that system prompt and README are separate messages (not combined)
#[test]
fn test_system_prompt_and_readme_are_separate() {
let mut context = ContextWindow::new(10000);
// Simulate agent initialization
let system_prompt = "You are G3, an AI programming agent...";
context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
let readme_content = "📚 Project README (from README.md):\n\n# Test Project";
context.add_message(Message::new(MessageRole::System, readme_content.to_string()));
// Verify they are separate messages
assert_eq!(context.conversation_history.len(), 2);
// First message should NOT contain README
let first_message = &context.conversation_history[0];
assert!(
!first_message.content.contains("Project README"),
"System prompt should not contain README content"
);
// Second message should NOT contain system prompt
let second_message = &context.conversation_history[1];
assert!(
!second_message.content.contains("You are G3"),
"README message should not contain system prompt"
);
}
/// Test that TODO is added as third message after system prompt and README
#[test]
fn test_todo_is_third_message_after_readme() {
let mut context = ContextWindow::new(10000);
// Simulate agent initialization order:
// 1. System prompt
let system_prompt = "You are G3, an AI programming agent...";
context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
// 2. README
let readme_content = "📚 Project README (from README.md):\n\n# Test Project";
context.add_message(Message::new(MessageRole::System, readme_content.to_string()));
// 3. TODO (if present)
let todo_content = "📋 Existing TODO list (from todo.g3.md):\n\n- [ ] Task 1\n- [x] Task 2";
context.add_message(Message::new(MessageRole::System, todo_content.to_string()));
// Verify we have 3 messages
assert_eq!(context.conversation_history.len(), 3);
// Verify order
assert!(
context.conversation_history[0].content.contains("You are G3"),
"First message should be system prompt"
);
assert!(
context.conversation_history[1].content.contains("Project README"),
"Second message should be README"
);
assert!(
context.conversation_history[2].content.contains("TODO list"),
"Third message should be TODO"
);
}
/// Test that AGENTS.md content is combined with README in the same message
#[test]
fn test_agents_and_readme_combined() {
let mut context = ContextWindow::new(10000);
// Simulate agent initialization
let system_prompt = "You are G3, an AI programming agent...";
context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
// Combined AGENTS.md and README.md content (as done in g3-cli)
let combined_content = "# Agent Configuration\n\nSpecial instructions.\n\n# Project README\n\nProject description.";
context.add_message(Message::new(MessageRole::System, combined_content.to_string()));
// Verify we have 2 messages
assert_eq!(context.conversation_history.len(), 2);
// Verify the second message contains both AGENTS and README
let second_message = &context.conversation_history[1];
assert!(
second_message.content.contains("Agent Configuration"),
"Combined message should contain AGENTS.md content"
);
assert!(
second_message.content.contains("Project README"),
"Combined message should contain README content"
);
}
/// Test that user messages come after system messages
#[test]
fn test_user_messages_after_system_messages() {
let mut context = ContextWindow::new(10000);
// Simulate agent initialization
let system_prompt = "You are G3, an AI programming agent...";
context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
let readme_content = "📚 Project README (from README.md):\n\n# Test Project";
context.add_message(Message::new(MessageRole::System, readme_content.to_string()));
// Add user message
let user_message = "Please help me with this task.";
context.add_message(Message::new(MessageRole::User, user_message.to_string()));
// Verify order
assert_eq!(context.conversation_history.len(), 3);
assert!(matches!(context.conversation_history[0].role, MessageRole::System));
assert!(matches!(context.conversation_history[1].role, MessageRole::System));
assert!(matches!(context.conversation_history[2].role, MessageRole::User));
}
/// Test that empty README content is not added
#[test]
fn test_empty_readme_not_added() {
let mut context = ContextWindow::new(10000);
// Simulate agent initialization
let system_prompt = "You are G3, an AI programming agent...";
context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
// Try to add empty README (should be skipped due to empty content check)
let empty_readme = " "; // whitespace only
context.add_message(Message::new(MessageRole::System, empty_readme.to_string()));
// Verify only system prompt was added (empty message should be skipped)
assert_eq!(
context.conversation_history.len(),
1,
"Empty README should not be added to conversation history"
);
}
/// Test the reload_readme detection logic
#[test]
fn test_readme_detection_for_reload() {
let mut context = ContextWindow::new(10000);
// Simulate agent initialization
let system_prompt = "You are G3, an AI programming agent...";
context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
// Add README with expected markers
let readme_content = "# Project README\n\nThis is the project description.";
context.add_message(Message::new(MessageRole::System, readme_content.to_string()));
// Check if the second message (index 1) is a README
let has_readme = context
.conversation_history
.get(1)
.map(|m| {
matches!(m.role, MessageRole::System)
&& (m.content.contains("Project README")
|| m.content.contains("Agent Configuration"))
})
.unwrap_or(false);
assert!(has_readme, "Should detect README at index 1");
}
/// Test that README detection fails when no README is present
#[test]
fn test_readme_detection_without_readme() {
let mut context = ContextWindow::new(10000);
// Simulate agent initialization without README
let system_prompt = "You are G3, an AI programming agent...";
context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
// Add a user message directly (no README)
context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
// Check if the second message (index 1) is a README
let has_readme = context
.conversation_history
.get(1)
.map(|m| {
matches!(m.role, MessageRole::System)
&& (m.content.contains("Project README")
|| m.content.contains("Agent Configuration"))
})
.unwrap_or(false);
assert!(!has_readme, "Should not detect README when none exists");
}

View File

@@ -0,0 +1,78 @@
//! Tests for TODO completion detection and file deletion behavior
/// Helper to check if all TODOs are complete (same logic as in lib.rs)
fn all_todos_complete(content: &str) -> bool {
let has_incomplete = content.lines().any(|line| {
let trimmed = line.trim();
trimmed.starts_with("- [ ]")
});
!has_incomplete && (content.contains("- [x]") || content.contains("- [X]"))
}
#[test]
fn test_all_complete_lowercase() {
let content = "# Test\n\n- [x] Done 1\n- [x] Done 2";
assert!(all_todos_complete(content));
}
#[test]
fn test_all_complete_uppercase() {
let content = "# Test\n\n- [X] Done 1\n- [X] Done 2";
assert!(all_todos_complete(content));
}
#[test]
fn test_all_complete_mixed_case() {
let content = "# Test\n\n- [x] Done 1\n- [X] Done 2";
assert!(all_todos_complete(content));
}
#[test]
fn test_has_incomplete() {
let content = "# Test\n\n- [x] Done 1\n- [ ] Not done";
assert!(!all_todos_complete(content));
}
#[test]
fn test_all_incomplete() {
let content = "# Test\n\n- [ ] Not done 1\n- [ ] Not done 2";
assert!(!all_todos_complete(content));
}
#[test]
fn test_no_checkboxes() {
let content = "# Just a header\n\nSome text without checkboxes";
assert!(!all_todos_complete(content));
}
#[test]
fn test_nested_complete() {
let content = "# Test\n\n- [x] Parent\n - [x] Child 1\n - [x] Child 2";
assert!(all_todos_complete(content));
}
#[test]
fn test_nested_incomplete() {
let content = "# Test\n\n- [x] Parent\n - [x] Child 1\n - [ ] Child 2";
assert!(!all_todos_complete(content));
}
#[test]
fn test_indented_incomplete() {
// Indented incomplete items should still be detected
let content = "# Test\n\n- [x] Done\n - [ ] Indented incomplete";
assert!(!all_todos_complete(content));
}
#[test]
fn test_empty_content() {
let content = "";
assert!(!all_todos_complete(content));
}
#[test]
fn test_whitespace_only() {
let content = " \n\n ";
assert!(!all_todos_complete(content));
}

View File

@@ -1,103 +1,170 @@
use g3_core::ContextWindow; use g3_core::ContextWindow;
use g3_providers::Usage; use g3_providers::{Message, MessageRole, Usage};
/// Test that used_tokens is tracked via add_message, not update_usage_from_response.
/// This is critical for the 80% summarization threshold to work correctly.
#[test] #[test]
fn test_token_accumulation() { fn test_used_tokens_tracked_via_messages() {
let mut window = ContextWindow::new(10000); let mut window = ContextWindow::new(10000);
// First API call: 100 prompt + 50 completion = 150 total // Add a user message - this should update used_tokens
let usage1 = Usage { let user_msg = Message::new(MessageRole::User, "Hello, how are you?".to_string());
window.add_message(user_msg);
// used_tokens should be non-zero after adding a message
assert!(window.used_tokens > 0, "used_tokens should increase after add_message");
let tokens_after_user_msg = window.used_tokens;
// Add an assistant message
let assistant_msg = Message::new(MessageRole::Assistant, "I'm doing well, thank you!".to_string());
window.add_message(assistant_msg);
// used_tokens should increase further
assert!(window.used_tokens > tokens_after_user_msg, "used_tokens should increase after adding assistant message");
}
/// Test that update_usage_from_response only updates cumulative_tokens, not used_tokens.
/// This prevents double-counting which was causing the 80% threshold to be reached at 200%+.
#[test]
fn test_update_usage_only_affects_cumulative() {
let mut window = ContextWindow::new(10000);
// Initial state
assert_eq!(window.used_tokens, 0);
assert_eq!(window.cumulative_tokens, 0);
// Simulate API response with usage data
let usage = Usage {
prompt_tokens: 100, prompt_tokens: 100,
completion_tokens: 50, completion_tokens: 50,
total_tokens: 150, total_tokens: 150,
}; };
window.update_usage_from_response(&usage1); window.update_usage_from_response(&usage);
assert_eq!(window.used_tokens, 150, "First call should have 150 tokens");
assert_eq!(window.cumulative_tokens, 150, "Cumulative should be 150");
// Second API call: 200 prompt + 75 completion = 275 total // used_tokens should NOT change - it's tracked via add_message
assert_eq!(window.used_tokens, 0, "used_tokens should not be updated by update_usage_from_response");
// cumulative_tokens SHOULD be updated for API usage tracking
assert_eq!(window.cumulative_tokens, 150, "cumulative_tokens should track total API usage");
// Another API call
let usage2 = Usage { let usage2 = Usage {
prompt_tokens: 200, prompt_tokens: 200,
completion_tokens: 75, completion_tokens: 75,
total_tokens: 275, total_tokens: 275,
}; };
window.update_usage_from_response(&usage2); window.update_usage_from_response(&usage2);
assert_eq!(
window.used_tokens, 425,
"Second call should accumulate to 425 tokens"
);
assert_eq!(window.cumulative_tokens, 425, "Cumulative should be 425");
// Third API call with SMALLER token count: 50 prompt + 25 completion = 75 total // used_tokens still unchanged
let usage3 = Usage { assert_eq!(window.used_tokens, 0, "used_tokens should remain unchanged");
prompt_tokens: 50,
completion_tokens: 25,
total_tokens: 75,
};
window.update_usage_from_response(&usage3);
assert_eq!(
window.used_tokens, 500,
"Third call should accumulate to 500 tokens"
);
assert_eq!(window.cumulative_tokens, 500, "Cumulative should be 500");
// Verify tokens never decrease // cumulative_tokens accumulates
assert!( assert_eq!(window.cumulative_tokens, 425, "cumulative_tokens should accumulate");
window.used_tokens >= 425,
"Token count should never decrease!"
);
} }
/// Test that add_streaming_tokens only updates cumulative_tokens.
/// The assistant message will be added via add_message which tracks used_tokens.
#[test] #[test]
fn test_add_streaming_tokens() { fn test_add_streaming_tokens_only_affects_cumulative() {
let mut window = ContextWindow::new(10000); let mut window = ContextWindow::new(10000);
// Add some streaming tokens // Add streaming tokens (fallback when no usage data available)
window.add_streaming_tokens(100); window.add_streaming_tokens(100);
assert_eq!(window.used_tokens, 100);
assert_eq!(window.cumulative_tokens, 100);
// Add more // used_tokens should NOT change
assert_eq!(window.used_tokens, 0, "used_tokens should not be updated by add_streaming_tokens");
// cumulative_tokens SHOULD be updated
assert_eq!(window.cumulative_tokens, 100, "cumulative_tokens should be updated");
// Add more streaming tokens
window.add_streaming_tokens(50); window.add_streaming_tokens(50);
assert_eq!(window.used_tokens, 150); assert_eq!(window.used_tokens, 0);
assert_eq!(window.cumulative_tokens, 150); assert_eq!(window.cumulative_tokens, 150);
// Now update from provider response
let usage = Usage {
prompt_tokens: 80,
completion_tokens: 40,
total_tokens: 120,
};
window.update_usage_from_response(&usage);
// Should ADD to existing, not replace
assert_eq!(window.used_tokens, 270, "Should add 120 to existing 150");
assert_eq!(window.cumulative_tokens, 270);
} }
/// Test percentage calculation is based on used_tokens (actual context content).
#[test] #[test]
fn test_percentage_calculation() { fn test_percentage_based_on_used_tokens() {
let mut window = ContextWindow::new(1000); let mut window = ContextWindow::new(1000);
// Add tokens via provider response // Initially 0%
assert_eq!(window.percentage_used(), 0.0);
assert_eq!(window.remaining_tokens(), 1000);
// Add messages to increase used_tokens
// A message with ~100 chars should be roughly 25-30 tokens
let msg = Message::new(MessageRole::User, "x".repeat(400)); // ~100 tokens estimated
window.add_message(msg);
// Percentage should be based on used_tokens
let percentage = window.percentage_used();
assert!(percentage > 0.0, "percentage should be > 0 after adding message");
assert!(percentage < 100.0, "percentage should be < 100");
// remaining_tokens should decrease
assert!(window.remaining_tokens() < 1000, "remaining tokens should decrease");
}
/// Test that the 80% summarization threshold works correctly.
/// This was the original bug - used_tokens was being double/triple counted.
#[test]
fn test_should_summarize_threshold() {
let mut window = ContextWindow::new(1000);
// Add messages until we approach 80%
// Each message of ~320 chars is roughly 80 tokens (at 4 chars/token)
for _ in 0..9 {
let msg = Message::new(MessageRole::User, "x".repeat(320));
window.add_message(msg);
}
// Should be around 720 tokens (72%) - not yet at threshold
// Note: actual token count depends on estimation algorithm
let percentage = window.percentage_used();
println!("After 9 messages: {}% used ({} tokens)", percentage, window.used_tokens);
// Add one more message to push over 80%
let msg = Message::new(MessageRole::User, "x".repeat(320));
window.add_message(msg);
let percentage_after = window.percentage_used();
println!("After 10 messages: {}% used ({} tokens)", percentage_after, window.used_tokens);
// Now should_summarize should return true if we're at 80%+
if percentage_after >= 80.0 {
assert!(window.should_summarize(), "should_summarize should be true at 80%+");
}
}
/// Test that cumulative_tokens and used_tokens are independent.
#[test]
fn test_cumulative_vs_used_independence() {
let mut window = ContextWindow::new(10000);
// Add a message (affects used_tokens)
let msg = Message::new(MessageRole::User, "Hello world".to_string());
window.add_message(msg);
let used_after_msg = window.used_tokens;
let cumulative_after_msg = window.cumulative_tokens;
// Both should be equal at this point (message adds to both)
assert_eq!(used_after_msg, cumulative_after_msg);
// Now simulate API response (only affects cumulative_tokens)
let usage = Usage { let usage = Usage {
prompt_tokens: 150, prompt_tokens: 500,
completion_tokens: 100, completion_tokens: 200,
total_tokens: 250, total_tokens: 700,
}; };
window.update_usage_from_response(&usage); window.update_usage_from_response(&usage);
assert_eq!(window.percentage_used(), 25.0); // used_tokens unchanged
assert_eq!(window.remaining_tokens(), 750); assert_eq!(window.used_tokens, used_after_msg, "used_tokens should not change from API response");
// Add more tokens // cumulative_tokens increased
let usage2 = Usage { assert_eq!(window.cumulative_tokens, cumulative_after_msg + 700, "cumulative_tokens should increase");
prompt_tokens: 300,
completion_tokens: 200,
total_tokens: 500,
};
window.update_usage_from_response(&usage2);
assert_eq!(window.percentage_used(), 75.0); // They should now be different
assert_eq!(window.remaining_tokens(), 250); assert!(window.cumulative_tokens > window.used_tokens, "cumulative should be greater than used");
} }

View File

@@ -81,6 +81,9 @@ impl UiWriter for MockUiWriter {
.push(format!("CHOICE: {} Options: {:?}", message, options)); .push(format!("CHOICE: {} Options: {:?}", message, options));
self.choice_responses.lock().unwrap().pop().unwrap_or(0) self.choice_responses.lock().unwrap().pop().unwrap_or(0)
} }
fn print_final_output(&self, summary: &str) {
self.output.lock().unwrap().push(format!("FINAL: {}", summary));
}
} }
#[tokio::test] #[tokio::test]

View File

@@ -26,6 +26,7 @@
//! Some(0.1), //! Some(0.1),
//! None, // cache_config //! None, // cache_config
//! None, // enable_1m_context //! None, // enable_1m_context
//! None, // thinking_budget_tokens
//! )?; //! )?;
//! //!
//! // Create a completion request //! // Create a completion request
@@ -63,6 +64,7 @@
//! None, //! None,
//! None, // cache_config //! None, // cache_config
//! None, // enable_1m_context //! None, // enable_1m_context
//! None, // thinking_budget_tokens
//! )?; //! )?;
//! //!
//! let request = CompletionRequest { //! let request = CompletionRequest {
@@ -103,7 +105,7 @@ use serde::{Deserialize, Serialize};
use std::time::Duration; use std::time::Duration;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream; use tokio_stream::wrappers::ReceiverStream;
use tracing::{debug, error, warn}; use tracing::{debug, error};
use crate::{ use crate::{
CompletionChunk, CompletionRequest, CompletionResponse, CompletionStream, LLMProvider, Message, CompletionChunk, CompletionRequest, CompletionResponse, CompletionStream, LLMProvider, Message,
@@ -122,6 +124,7 @@ pub struct AnthropicProvider {
temperature: f32, temperature: f32,
cache_config: Option<String>, cache_config: Option<String>,
enable_1m_context: bool, enable_1m_context: bool,
thinking_budget_tokens: Option<u32>,
} }
impl AnthropicProvider { impl AnthropicProvider {
@@ -132,6 +135,7 @@ impl AnthropicProvider {
temperature: Option<f32>, temperature: Option<f32>,
cache_config: Option<String>, cache_config: Option<String>,
enable_1m_context: Option<bool>, enable_1m_context: Option<bool>,
thinking_budget_tokens: Option<u32>,
) -> Result<Self> { ) -> Result<Self> {
let client = Client::builder() let client = Client::builder()
.timeout(Duration::from_secs(300)) .timeout(Duration::from_secs(300))
@@ -150,6 +154,7 @@ impl AnthropicProvider {
temperature: temperature.unwrap_or(0.1), temperature: temperature.unwrap_or(0.1),
cache_config, cache_config,
enable_1m_context: enable_1m_context.unwrap_or(false), enable_1m_context: enable_1m_context.unwrap_or(false),
thinking_budget_tokens,
}) })
} }
@@ -223,11 +228,13 @@ impl AnthropicProvider {
for message in messages { for message in messages {
match message.role { match message.role {
MessageRole::System => { MessageRole::System => {
if system_message.is_some() { if let Some(existing) = system_message {
warn!("Multiple system messages found, using the last one"); // Concatenate system messages instead of replacing
} system_message = Some(format!("{}\n\n{}", existing, message.content));
} else {
system_message = Some(message.content.clone()); system_message = Some(message.content.clone());
} }
}
MessageRole::User => { MessageRole::User => {
anthropic_messages.push(AnthropicMessage { anthropic_messages.push(AnthropicMessage {
role: "user".to_string(), role: "user".to_string(),
@@ -277,6 +284,11 @@ impl AnthropicProvider {
// Convert tools if provided // Convert tools if provided
let anthropic_tools = tools.map(|t| self.convert_tools(t)); let anthropic_tools = tools.map(|t| self.convert_tools(t));
// Add thinking configuration if budget_tokens is set
let thinking = self.thinking_budget_tokens.map(|budget| {
ThinkingConfig::enabled(budget)
});
let request = AnthropicRequest { let request = AnthropicRequest {
model: self.model.clone(), model: self.model.clone(),
max_tokens, max_tokens,
@@ -285,6 +297,7 @@ impl AnthropicProvider {
system, system,
tools: anthropic_tools, tools: anthropic_tools,
stream: streaming, stream: streaming,
thinking,
}; };
// Ensure the conversation starts with a user message // Ensure the conversation starts with a user message
@@ -775,6 +788,19 @@ impl LLMProvider for AnthropicProvider {
// Anthropic API request/response structures // Anthropic API request/response structures
#[derive(Debug, Serialize)]
struct ThinkingConfig {
#[serde(rename = "type")]
thinking_type: String,
budget_tokens: u32,
}
impl ThinkingConfig {
fn enabled(budget_tokens: u32) -> Self {
Self { thinking_type: "enabled".to_string(), budget_tokens }
}
}
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
struct AnthropicRequest { struct AnthropicRequest {
model: String, model: String,
@@ -786,6 +812,8 @@ struct AnthropicRequest {
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
tools: Option<Vec<AnthropicTool>>, tools: Option<Vec<AnthropicTool>>,
stream: bool, stream: bool,
#[serde(skip_serializing_if = "Option::is_none")]
thinking: Option<ThinkingConfig>,
} }
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
@@ -884,7 +912,7 @@ mod tests {
#[test] #[test]
fn test_message_conversion() { fn test_message_conversion() {
let provider = let provider =
AnthropicProvider::new("test-key".to_string(), None, None, None, None, None).unwrap(); AnthropicProvider::new("test-key".to_string(), None, None, None, None, None, None).unwrap();
let messages = vec![ let messages = vec![
Message::new( Message::new(
@@ -912,6 +940,7 @@ mod tests {
Some(0.5), Some(0.5),
None, None,
None, None,
None,
) )
.unwrap(); .unwrap();
@@ -932,7 +961,7 @@ mod tests {
#[test] #[test]
fn test_tool_conversion() { fn test_tool_conversion() {
let provider = let provider =
AnthropicProvider::new("test-key".to_string(), None, None, None, None, None).unwrap(); AnthropicProvider::new("test-key".to_string(), None, None, None, None, None, None).unwrap();
let tools = vec![Tool { let tools = vec![Tool {
name: "get_weather".to_string(), name: "get_weather".to_string(),
@@ -965,7 +994,7 @@ mod tests {
#[test] #[test]
fn test_cache_control_serialization() { fn test_cache_control_serialization() {
let provider = let provider =
AnthropicProvider::new("test-key".to_string(), None, None, None, None, None).unwrap(); AnthropicProvider::new("test-key".to_string(), None, None, None, None, None, None).unwrap();
// Test message WITHOUT cache_control // Test message WITHOUT cache_control
let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())]; let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
@@ -1007,4 +1036,46 @@ mod tests {
"JSON should not contain 'cache_control' field or null values when not configured" "JSON should not contain 'cache_control' field or null values when not configured"
); );
} }
#[test]
fn test_thinking_parameter_serialization() {
// Test WITHOUT thinking parameter
let provider_without = AnthropicProvider::new(
"test-key".to_string(),
Some("claude-sonnet-4-5".to_string()),
Some(1000),
Some(0.5),
None,
None,
None, // No thinking budget
)
.unwrap();
let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];
let request_without = provider_without
.create_request_body(&messages, None, false, 1000, 0.5)
.unwrap();
let json_without = serde_json::to_string(&request_without).unwrap();
assert!(!json_without.contains("thinking"), "JSON should not contain 'thinking' field when not configured");
// Test WITH thinking parameter
let provider_with = AnthropicProvider::new(
"test-key".to_string(),
Some("claude-sonnet-4-5".to_string()),
Some(1000),
Some(0.5),
None,
None,
Some(10000), // With thinking budget
)
.unwrap();
let request_with = provider_with
.create_request_body(&messages, None, false, 1000, 0.5)
.unwrap();
let json_with = serde_json::to_string(&request_with).unwrap();
assert!(json_with.contains("thinking"), "JSON should contain 'thinking' field when configured");
assert!(json_with.contains("\"type\":\"enabled\""), "JSON should contain type: enabled");
assert!(json_with.contains("\"budget_tokens\":10000"), "JSON should contain budget_tokens: 10000");
}
} }