Compare commits

...

30 Commits

Author SHA1 Message Date
Jochen
467e300ec2 reorder system prompt 2025-11-26 09:30:26 +11:00
Jochen
f501751bdf Merge pull request #30 from dhanji/fix_tests
Fix tests & add code coverage tool
2025-11-25 10:18:18 +11:00
Jochen
a96a15d1fc add code coverage command 2025-11-21 14:38:58 +11:00
Jochen
24dc7ad642 fix build target 2025-11-21 14:07:31 +11:00
Jochen
a097c3abef first cut 2025-11-21 13:56:36 +11:00
Jochen
34e55050b3 Merge pull request #28 from dhanji/jochen_force_todo_check_at_start
check for stale TODO at startup of autonomous
2025-11-21 12:41:45 +11:00
Jochen
551a577ee1 changed user choice for TODO stale check
user can ignore, mark stale or quit.
2025-11-21 12:35:14 +11:00
Jochen
84718223bc remove minor comment 2025-11-21 12:26:41 +11:00
Jochen
28a83d2dcf check for stale TODOs
on by default, can be disabled
2025-11-21 12:09:01 +11:00
Jochen
0ce905dc74 Merge pull request #26 from dhanji/jochen_log_tool_calls__with_tool_logs
log tool calls, allow multiple calls (optional)
2025-11-21 11:07:23 +11:00
Jochen
9f0d5add1e remove redundant SYSTEM_NATIVE_TOOL_CALLS_MULTIPLE 2025-11-21 11:04:14 +11:00
Jochen
be6c6bfca4 fix ref to system prompt 2025-11-21 10:49:39 +11:00
Jochen
94a41c5c34 don't write warning to console 2025-11-21 10:49:27 +11:00
Jochen
09dbad2d68 allow multiple tool calls, log warnings if there are duplicate calls.
controlled via a flag to the agent config:
allow_multiple_tool_calls = true
2025-11-21 10:49:15 +11:00
Jochen
ffbf410b17 log tool calls 2025-11-21 10:49:02 +11:00
Jochen
c6f3f12b71 Merge pull request #27 from dhanji/jochen_tool_tail
useful shell command for tailing tool logs
2025-11-20 13:31:09 +11:00
Dhanji Prasanna
14c8d066c9 ensure system prompt is always added first 2025-11-20 08:45:03 +11:00
Jochen
e556f06b15 useful command for tailing tool logs 2025-11-19 21:02:42 +11:00
Jochen
b6e226df67 Merge pull request #23 from dhanji/jochen-add-code-instructions
system prompt now includes code style guide
2025-11-19 16:25:20 +11:00
Dhanji R. Prasanna
5b46922047 Merge pull request #25 from dhanji/fix_max_tokens
fix bad max_tokens and context_window logic
2025-11-19 15:55:34 +11:00
Jochen
1069664e16 fix bad max_tokens and context_window logic
for non-databricks code
2025-11-19 13:51:16 +11:00
Dhanji R. Prasanna
725f54b99b Merge pull request #24 from dhanji/jochen_cache_control
Add cache control for Anthropic (won't work via Databricks)
2025-11-19 13:39:09 +11:00
Dhanji R. Prasanna
325aab6b0e Merge pull request #22 from dhanji/micn/console-detection
patching console for detecting g3
2025-11-19 13:37:22 +11:00
Jochen
3f21bdc7b2 fix tests 2025-11-19 12:42:37 +11:00
Jochen
9bffd8b1bf cache_control removed from databricks 2025-11-19 12:15:49 +11:00
Jochen
bfee8040e9 regression tests added 2025-11-19 11:32:14 +11:00
Jochen
a150ba6a55 adds ttl to cache control 2025-11-18 23:23:49 +11:00
Jochen
296bf5a449 adds cache_control 2025-11-18 22:38:52 +11:00
Michael Neale
8d8ddbe4b9 live reloading of detected things 2025-11-14 16:31:46 +11:00
Michael Neale
0466405d87 don't detect console, better process pickup 2025-11-13 18:46:55 +11:00
39 changed files with 2159 additions and 347 deletions

9
Cargo.lock generated
View File

@@ -1365,11 +1365,13 @@ dependencies = [
"dirs 5.0.1",
"g3-config",
"g3-core",
"hex",
"indicatif",
"ratatui",
"rustyline",
"serde",
"serde_json",
"sha2",
"termimad",
"tokio",
"tokio-util",
@@ -1409,6 +1411,7 @@ dependencies = [
"config",
"dirs 5.0.1",
"serde",
"serde_json",
"shellexpand",
"tempfile",
"thiserror 1.0.69",
@@ -1652,6 +1655,12 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "home"
version = "0.5.9"

View File

@@ -11,14 +11,27 @@ model = "databricks-claude-sonnet-4"
max_tokens = 4096
temperature = 0.1
use_oauth = true
# cache_config = "ephemeral" # Optional: Enable prompt caching for Claude models
# Options: "ephemeral", "5minute", "1hour"
# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
# The cache control will be automatically applied to:
# - The system prompt at the start of each session
# - Assistant responses after every 10 tool calls
# - 5minute costs $3/mtok, more details below
# https://docs.claude.com/en/docs/build-with-claude/prompt-caching#pricing
[providers.anthropic]
api_key = "your-anthropic-api-key"
model = "claude-3-haiku-20240307" # Using a faster model for player
model = "claude-sonnet-4-5"
max_tokens = 4096
temperature = 0.3 # Slightly higher temperature for more creative implementations
# cache_config = "ephemeral" # Optional: Enable prompt caching
# Options: "ephemeral", "5minute", "1hour"
# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
# enable_1m_context = true # optional, more expensive
[agent]
fallback_default_max_tokens = 8192
enable_streaming = true
timeout_seconds = 60
timeout_seconds = 60
allow_multiple_tool_calls = true # Enable multiple tool calls, will usually only work with Anthropic

View File

@@ -15,6 +15,17 @@ max_tokens = 4096 # Per-request output limit (how many tokens the model can gen
temperature = 0.1
use_oauth = true
[providers.anthropic]
api_key = "your-anthropic-api-key"
model = "claude-sonnet-4-5"
max_tokens = 4096
temperature = 0.3 # Slightly higher temperature for more creative implementations
# cache_config = "ephemeral" # Optional: Enable prompt caching
# Options: "ephemeral", "5minute", "1hour"
# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
# enable_1m_context = true # optional, more expensive
# Multiple OpenAI-compatible providers can be configured with custom names
# Each provider gets its own section under [providers.openai_compatible.<name>]
# [providers.openai_compatible.openrouter]
@@ -46,6 +57,7 @@ timeout_seconds = 60
# Retry configuration for recoverable errors (timeouts, rate limits, etc.)
max_retry_attempts = 3 # Default mode retry attempts
autonomous_max_retry_attempts = 6 # Autonomous mode retry attempts (higher for long-running tasks)
allow_multiple_tool_calls = true # Enable multiple tool calls
[computer_control]
enabled = false # Set to true to enable computer control (requires OS permissions)

View File

@@ -17,6 +17,8 @@ serde_json = { workspace = true }
rustyline = "17.0.1"
dirs = "5.0"
tokio-util = "0.7"
sha2 = "0.10"
hex = "0.4"
indicatif = "0.17"
chrono = { version = "0.4", features = ["serde"] }
crossterm = "0.29.0"

View File

@@ -164,6 +164,7 @@ use rustyline::error::ReadlineError;
use rustyline::DefaultEditor;
use std::path::Path;
use std::path::PathBuf;
use sha2::{Digest, Sha256};
use tokio_util::sync::CancellationToken;
use tracing::{error, info};
@@ -1660,6 +1661,17 @@ async fn run_autonomous(
} else {
output.print("📋 Requirements loaded from requirements.md");
}
// Calculate SHA256 of requirements
let mut hasher = Sha256::new();
hasher.update(requirements.as_bytes());
let requirements_sha = hex::encode(hasher.finalize());
output.print(&format!("🔒 Requirements SHA256: {}", requirements_sha));
// Pass SHA to agent for staleness checking
agent.set_requirements_sha(requirements_sha.clone());
output.print("🔄 Starting coach-player feedback loop...");
// Check if implementation files already exist
@@ -1686,11 +1698,14 @@ async fn run_autonomous(
turn, max_turns
));
// Surface provider info for player agent
agent.print_provider_banner("Player");
// Player mode: implement requirements (with coach feedback if available)
let player_prompt = if coach_feedback.is_empty() {
format!(
"You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nImplement this step by step, creating all necessary files and code.",
requirements
"You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nRequirements SHA256: {}\n\nImplement this step by step, creating all necessary files and code.",
requirements, requirements_sha
)
} else {
format!(
@@ -1879,6 +1894,9 @@ async fn run_autonomous(
let mut coach_agent =
Agent::new_autonomous_with_readme_and_quiet(coach_config, ui_writer, None, quiet).await?;
// Surface provider info for coach agent
coach_agent.print_provider_banner("Coach");
// Ensure coach agent is also in the workspace directory
project.enter_workspace()?;

View File

@@ -91,4 +91,18 @@ impl UiWriter for MachineUiWriter {
fn wants_full_output(&self) -> bool {
true // Machine mode wants complete, untruncated output
}
fn prompt_user_yes_no(&self, message: &str) -> bool {
// In machine mode, we can't interactively prompt, so we log the request and return true
// to allow automation to proceed.
println!("PROMPT_USER_YES_NO: {}", message);
true
}
fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
println!("PROMPT_USER_CHOICE: {}", message);
println!("OPTIONS: {:?}", options);
// Default to first option (index 0) for automation
0
}
}

View File

@@ -343,5 +343,40 @@ impl UiWriter for ConsoleUiWriter {
fn flush(&self) {
let _ = io::stdout().flush();
}
fn prompt_user_yes_no(&self, message: &str) -> bool {
print!("{} [y/N] ", message);
let _ = io::stdout().flush();
let mut input = String::new();
if io::stdin().read_line(&mut input).is_ok() {
let trimmed = input.trim().to_lowercase();
trimmed == "y" || trimmed == "yes"
} else {
false
}
}
fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
println!("{} ", message);
for (i, option) in options.iter().enumerate() {
println!(" [{}] {}", i + 1, option);
}
print!("Select an option (1-{}): ", options.len());
let _ = io::stdout().flush();
loop {
let mut input = String::new();
if io::stdin().read_line(&mut input).is_ok() {
if let Ok(choice) = input.trim().parse::<usize>() {
if choice > 0 && choice <= options.len() {
return choice - 1;
}
}
}
print!("Invalid choice. Please select (1-{}): ", options.len());
let _ = io::stdout().flush();
}
}
}

View File

@@ -36,11 +36,20 @@ fn main() {
// Copy the dylib to the output directory so it can be found at runtime
let target_dir = manifest_dir.parent().unwrap().parent().unwrap().join("target");
let profile = env::var("PROFILE").unwrap_or_else(|_| "debug".to_string());
let output_dir = target_dir.join(&profile);
// Determine the actual target directory (could be llvm-cov-target or regular target)
let target_dir_name = env::var("CARGO_TARGET_DIR")
.unwrap_or_else(|_| target_dir.to_string_lossy().to_string());
let actual_target_dir = PathBuf::from(&target_dir_name);
let output_dir = actual_target_dir.join(&profile);
let dylib_src = lib_path.join("libVisionBridge.dylib");
let dylib_dst = output_dir.join("libVisionBridge.dylib");
// Create output directory if it doesn't exist
std::fs::create_dir_all(&output_dir)
.expect(&format!("Failed to create output directory {}", output_dir.display()));
std::fs::copy(&dylib_src, &dylib_dst)
.expect(&format!("Failed to copy dylib from {} to {}", dylib_src.display(), dylib_dst.display()));

View File

@@ -15,3 +15,4 @@ dirs = "5.0"
[dev-dependencies]
tempfile = "3.8"
serde_json = { workspace = true }

View File

@@ -40,6 +40,8 @@ pub struct AnthropicConfig {
pub model: String,
pub max_tokens: Option<u32>,
pub temperature: Option<f32>,
pub cache_config: Option<String>, // "ephemeral", "5minute", "1hour", or None to disable
pub enable_1m_context: Option<bool>, // Enable 1m context window (costs extra)
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -68,10 +70,17 @@ pub struct AgentConfig {
pub max_context_length: Option<u32>,
pub fallback_default_max_tokens: usize,
pub enable_streaming: bool,
pub allow_multiple_tool_calls: bool,
pub timeout_seconds: u64,
pub auto_compact: bool,
pub max_retry_attempts: u32,
pub autonomous_max_retry_attempts: u32,
#[serde(default = "default_check_todo_staleness")]
pub check_todo_staleness: bool,
}
fn default_check_todo_staleness() -> bool {
true
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -143,10 +152,12 @@ impl Default for Config {
max_context_length: None,
fallback_default_max_tokens: 8192,
enable_streaming: true,
allow_multiple_tool_calls: false,
timeout_seconds: 60,
auto_compact: true,
max_retry_attempts: 3,
autonomous_max_retry_attempts: 6,
check_todo_staleness: true,
},
computer_control: ComputerControlConfig::default(),
webdriver: WebDriverConfig::default(),
@@ -263,10 +274,12 @@ impl Config {
max_context_length: None,
fallback_default_max_tokens: 8192,
enable_streaming: true,
allow_multiple_tool_calls: false,
timeout_seconds: 60,
auto_compact: true,
max_retry_attempts: 3,
autonomous_max_retry_attempts: 6,
check_todo_staleness: true,
},
computer_control: ComputerControlConfig::default(),
webdriver: WebDriverConfig::default(),

View File

@@ -0,0 +1,40 @@
#[cfg(test)]
mod test_multiple_tool_calls {
use g3_config::{Config, AgentConfig};
#[test]
fn test_config_has_multiple_tool_calls_field() {
let config = Config::default();
// Test that the field exists and defaults to false
assert_eq!(config.agent.allow_multiple_tool_calls, false);
// Test that we can create a config with the field set to true
let mut custom_config = Config::default();
custom_config.agent.allow_multiple_tool_calls = true;
assert_eq!(custom_config.agent.allow_multiple_tool_calls, true);
}
#[test]
fn test_agent_config_serialization() {
let agent_config = AgentConfig {
max_context_length: Some(100000),
fallback_default_max_tokens: 8192,
enable_streaming: true,
allow_multiple_tool_calls: true,
timeout_seconds: 60,
auto_compact: true,
max_retry_attempts: 3,
autonomous_max_retry_attempts: 6,
check_todo_staleness: true,
};
// Test serialization
let json = serde_json::to_string(&agent_config).unwrap();
assert!(json.contains("\"allow_multiple_tool_calls\":true"));
// Test deserialization
let deserialized: AgentConfig = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.allow_multiple_tool_calls, true);
}
}

View File

@@ -6,6 +6,9 @@ authors = ["G3 Team"]
description = "Web console for monitoring and managing g3 instances"
license = "MIT"
[lib]
path = "src/lib.rs"
[[bin]]
name = "g3-console"
path = "src/main.rs"

View File

@@ -0,0 +1,5 @@
pub mod api;
pub mod logs;
pub mod models;
pub mod process;
pub mod launch;

View File

@@ -0,0 +1,256 @@
use crate::models::{InstanceStats, TurnInfo};
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::fs;
use std::path::Path;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LogEntry {
pub timestamp: Option<DateTime<Utc>>,
pub role: Option<String>,
pub content: Option<String>,
pub tool_calls: Option<Vec<Value>>,
pub raw: Value,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChatMessage {
pub role: String,
pub content: String,
pub timestamp: Option<DateTime<Utc>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCall {
pub name: String,
pub parameters: Value,
pub result: Option<String>,
pub timestamp: Option<DateTime<Utc>>,
}
pub struct LogParser;
impl LogParser {
/// Parse logs from a workspace directory
pub fn parse_logs(workspace: &Path) -> Result<Vec<LogEntry>> {
let logs_dir = workspace.join("logs");
if !logs_dir.exists() {
return Ok(Vec::new());
}
let mut entries = Vec::new();
// Read all JSON log files
for entry in fs::read_dir(&logs_dir).context("Failed to read logs directory")? {
let entry = entry?;
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) == Some("json") {
if let Ok(content) = fs::read_to_string(&path) {
if let Ok(json) = serde_json::from_str::<Value>(&content) {
// Try to parse as a log session
if let Some(messages) = json.get("messages").and_then(|m| m.as_array()) {
for msg in messages {
entries.push(LogEntry {
timestamp: msg.get("timestamp")
.and_then(|t| t.as_str())
.and_then(|s| DateTime::parse_from_rfc3339(s).ok())
.map(|dt| dt.with_timezone(&Utc)),
role: msg.get("role")
.and_then(|r| r.as_str())
.map(String::from),
content: msg.get("content")
.and_then(|c| c.as_str())
.map(String::from),
tool_calls: msg.get("tool_calls")
.and_then(|tc| tc.as_array())
.map(|arr| arr.clone()),
raw: msg.clone(),
});
}
}
}
}
}
}
// Sort by timestamp
entries.sort_by(|a, b| {
match (&a.timestamp, &b.timestamp) {
(Some(t1), Some(t2)) => t1.cmp(t2),
(Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater,
(None, None) => std::cmp::Ordering::Equal,
}
});
Ok(entries)
}
/// Extract chat messages from log entries
pub fn extract_chat_messages(entries: &[LogEntry]) -> Vec<ChatMessage> {
entries
.iter()
.filter_map(|entry| {
let role = entry.role.clone()?;
let content = entry.content.clone()?;
Some(ChatMessage {
role,
content,
timestamp: entry.timestamp,
})
})
.collect()
}
/// Extract tool calls from log entries
pub fn extract_tool_calls(entries: &[LogEntry]) -> Vec<ToolCall> {
let mut tool_calls = Vec::new();
for entry in entries {
if let Some(calls) = &entry.tool_calls {
for call in calls {
if let Some(name) = call.get("name").and_then(|n| n.as_str()) {
tool_calls.push(ToolCall {
name: name.to_string(),
parameters: call.get("parameters")
.cloned()
.unwrap_or(Value::Object(serde_json::Map::new())),
result: call.get("result")
.and_then(|r| r.as_str())
.map(String::from),
timestamp: entry.timestamp,
});
}
}
}
}
tool_calls
}
}
pub struct StatsAggregator;
impl StatsAggregator {
/// Aggregate statistics from log entries
pub fn aggregate_stats(
entries: &[LogEntry],
start_time: DateTime<Utc>,
is_ensemble: bool,
) -> InstanceStats {
let total_tokens = Self::count_tokens(entries);
let tool_calls = Self::count_tool_calls(entries);
let errors = Self::count_errors(entries);
let duration_secs = if let Some(last_entry) = entries.last() {
if let Some(last_time) = last_entry.timestamp {
(last_time - start_time).num_seconds().max(0) as u64
} else {
(Utc::now() - start_time).num_seconds().max(0) as u64
}
} else {
(Utc::now() - start_time).num_seconds().max(0) as u64
};
let turns = if is_ensemble {
Some(Self::extract_turns(entries))
} else {
None
};
InstanceStats {
total_tokens,
tool_calls,
errors,
duration_secs,
turns,
}
}
/// Get the latest message content from log entries
pub fn get_latest_message(entries: &[LogEntry]) -> Option<String> {
entries
.iter()
.rev()
.find(|entry| entry.role.as_deref() == Some("assistant"))
.and_then(|entry| entry.content.clone())
.or_else(|| {
entries
.iter()
.rev()
.find(|entry| entry.content.is_some())
.and_then(|entry| entry.content.clone())
})
}
fn count_tokens(entries: &[LogEntry]) -> u64 {
// Try to extract token counts from metadata
entries
.iter()
.filter_map(|entry| {
entry.raw.get("usage")
.and_then(|u| u.get("total_tokens"))
.and_then(|t| t.as_u64())
})
.sum()
}
fn count_tool_calls(entries: &[LogEntry]) -> u64 {
entries
.iter()
.filter_map(|entry| entry.tool_calls.as_ref())
.map(|calls| calls.len() as u64)
.sum()
}
fn count_errors(entries: &[LogEntry]) -> u64 {
entries
.iter()
.filter(|entry| {
entry.raw.get("error").is_some()
|| entry.content.as_ref().map(|c| c.to_lowercase().contains("error")).unwrap_or(false)
})
.count() as u64
}
fn extract_turns(entries: &[LogEntry]) -> Vec<TurnInfo> {
// Simple implementation: group consecutive assistant messages as turns
let mut turns = Vec::new();
let mut current_turn_start: Option<DateTime<Utc>> = None;
let mut turn_count = 0;
for entry in entries {
if entry.role.as_deref() == Some("assistant") {
if current_turn_start.is_none() {
current_turn_start = entry.timestamp;
turn_count += 1;
}
} else if entry.role.as_deref() == Some("user") {
if let Some(start) = current_turn_start {
if let Some(end) = entry.timestamp {
let duration = (end - start).num_seconds().max(0) as u64;
turns.push(TurnInfo {
agent: format!("agent-{}", turn_count),
duration_secs: duration,
status: "completed".to_string(),
color: Self::get_turn_color(turn_count),
});
}
current_turn_start = None;
}
}
}
turns
}
fn get_turn_color(turn_number: usize) -> String {
let colors = vec!["blue", "green", "purple", "orange", "pink", "teal"];
colors[turn_number % colors.len()].to_string()
}
}

View File

@@ -1,8 +1,6 @@
mod api;
mod logs;
mod models;
mod process;
mod launch;
use g3_console::api;
use g3_console::process;
use g3_console::launch;
use api::control::{kill_instance, launch_instance, restart_instance};
use api::instances::{get_instance, get_file_content, list_instances};

View File

@@ -3,7 +3,7 @@ use anyhow::Result;
use chrono::{DateTime, Utc};
use std::path::PathBuf;
use sysinfo::{System, Pid, Process};
use tracing::{debug, warn};
use tracing::{debug, info, warn};
pub struct ProcessDetector {
system: System,
@@ -17,7 +17,11 @@ impl ProcessDetector {
}
pub fn detect_instances(&mut self) -> Result<Vec<Instance>> {
self.system.refresh_processes();
info!("Scanning for g3 processes...");
// Refresh all processes to ensure we catch newly started ones
// Using refresh_all() instead of just refresh_processes() to ensure
// we get complete information about new processes
self.system.refresh_all();
let mut instances = Vec::new();
// Find all g3 processes
@@ -33,7 +37,7 @@ impl ProcessDetector {
}
}
debug!("Detected {} g3 instances", instances.len());
info!("Detected {} g3 instances", instances.len());
Ok(instances)
}
@@ -45,24 +49,27 @@ impl ProcessDetector {
) -> Option<Instance> {
let cmd_str = cmd.join(" ");
// Exclude g3-console itself
if cmd_str.contains("g3-console") {
return None;
}
// Check if this is a g3 binary (more comprehensive check)
let is_g3_binary = cmd.get(0).map(|s| {
s.ends_with("g3") || s.ends_with("/g3") || s.contains("/target/release/g3") || s.contains("/target/debug/g3")
(s.ends_with("g3") || s.ends_with("/g3") || s.contains("/target/release/g3") || s.contains("/target/debug/g3"))
&& !s.contains("g3-") // Exclude other g3-* binaries
}).unwrap_or(false);
// Check if this is cargo run with g3
let is_cargo_run = cmd.get(0).map(|s| s.contains("cargo")).unwrap_or(false) && cmd.iter().any(|s| s == "run");
// Check if this is cargo run with g3 (not g3-console or other variants)
let is_cargo_run = cmd.get(0).map(|s| s.contains("cargo")).unwrap_or(false)
&& cmd.iter().any(|s| s == "run")
&& !cmd_str.contains("g3-console");
// Also check if any part of the command line contains g3-related patterns
let has_g3_pattern = cmd_str.contains("g3 ")
|| cmd_str.contains("/g3 ")
|| cmd_str.contains("g3-")
|| cmd_str.ends_with("g3")
|| cmd_str.contains("--workspace") // g3-specific flag
|| cmd_str.contains("--autonomous"); // g3-specific flag
// Also check if command line has g3-specific flags
let has_g3_flags = cmd_str.contains("--workspace") || cmd_str.contains("--autonomous");
// Accept if it's a g3 binary, cargo run with g3 patterns, or has g3-specific flags
let is_g3_process = is_g3_binary || (is_cargo_run && has_g3_pattern) || has_g3_pattern;
// Accept if it's a g3 binary or cargo run with g3, and has typical g3 patterns
let is_g3_process = is_g3_binary || (is_cargo_run && has_g3_flags);
if !is_g3_process {
return None;
@@ -165,7 +172,7 @@ impl ProcessDetector {
}
pub fn get_process_status(&mut self, pid: u32) -> Option<InstanceStatus> {
self.system.refresh_processes();
self.system.refresh_all();
let sysinfo_pid = Pid::from_u32(pid);
if self.system.process(sysinfo_pid).is_some() {

View File

@@ -15,7 +15,7 @@
<div id="app">
<header class="header">
<div class="header-content">
<h1 class="header-title">G3 Console</h1>
<h1 class="header-title">G3 Console <span id="live-indicator" class="live-indicator" title="Scanning for processes every 3 seconds">● LIVE</span></h1>
<div class="header-actions">
<button id="new-run-btn" class="btn btn-primary">+ New Run</button>
<button id="theme-toggle" class="btn btn-secondary">🌙</button>

View File

@@ -6,6 +6,7 @@ const router = {
currentInstanceId: null,
initialized: false,
renderInProgress: false,
REFRESH_INTERVAL_MS: 3000, // Refresh every 3 seconds for live updates
init() {
console.log('[Router] init() called');
@@ -84,6 +85,9 @@ const router = {
this.renderInProgress = true;
try {
// Flash live indicator
this.flashLiveIndicator();
// Check if we already have a container for instances
let instancesList = container.querySelector('.instances-list');
const isInitialLoad = !instancesList;
@@ -167,11 +171,11 @@ const router = {
// Schedule next refresh only if still on home route
if (this.currentRoute === '/' || this.currentRoute === '') {
console.log('[Router] Scheduling auto-refresh in 5 seconds');
console.log(`[Router] Scheduling auto-refresh in ${this.REFRESH_INTERVAL_MS}ms`);
this.refreshTimeout = setTimeout(() => {
console.log('[Router] Auto-refresh triggered');
this.renderHome(container);
}, 5000);
}, this.REFRESH_INTERVAL_MS);
}
} catch (error) {
console.error('[Router] Error in renderHome:', error);
@@ -187,12 +191,26 @@ const router = {
}
},
flashLiveIndicator() {
const indicator = document.getElementById('live-indicator');
if (indicator) {
indicator.style.animation = 'none';
// Force reflow
void indicator.offsetWidth;
indicator.style.animation = null;
indicator.style.opacity = '1';
}
},
async renderDetail(container, id) {
console.log('[Router] renderDetail called for', id);
this.currentInstanceId = id;
try {
// Flash live indicator
this.flashLiveIndicator();
// Check if we already have a detail view for this instance
let detailView = container.querySelector('.detail-view');
const isInitialLoad = !detailView || detailView.getAttribute('data-instance-id') !== id;

View File

@@ -64,6 +64,22 @@ body {
color: var(--text-primary);
}
.live-indicator {
font-size: 0.625rem; /* 75% of 0.833rem */
font-weight: 600;
color: var(--success);
margin-left: 0.75rem;
display: inline-flex;
align-items: center;
gap: 0.25rem;
animation: pulse 2s ease-in-out infinite;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
.header-actions {
display: flex;
gap: 1rem;

View File

@@ -48,7 +48,7 @@ pub async fn another_async(x: i32) -> Result<(), ()> {
println!("{}\n", "=".repeat(80));
let mut parser = Parser::new();
let language: Language = tree_sitter_rust::language().into();
let language: Language = tree_sitter_rust::LANGUAGE.into();
parser.set_language(&language)?;
let tree = parser.parse(source_code, None).unwrap();

View File

@@ -46,7 +46,7 @@ class MyClass:
println!("{}\n", "=".repeat(80));
let mut parser = Parser::new();
let language: Language = tree_sitter_python::language().into();
let language: Language = tree_sitter_python::LANGUAGE.into();
parser.set_language(&language)?;
let tree = parser.parse(source_code, None).unwrap();

View File

@@ -1,6 +1,7 @@
//! Test Python async query
use tree_sitter::{Parser, Query, QueryCursor, Language};
use streaming_iterator::StreamingIterator;
fn main() -> anyhow::Result<()> {
let source_code = r#"
@@ -12,7 +13,7 @@ async def async_function():
"#;
let mut parser = Parser::new();
let language: Language = tree_sitter_python::language().into();
let language: Language = tree_sitter_python::LANGUAGE.into();
parser.set_language(&language)?;
let tree = parser.parse(source_code, None).unwrap();

View File

@@ -3,6 +3,7 @@ pub mod error_handling;
pub mod project;
pub mod task_result;
pub mod ui_writer;
pub use task_result::TaskResult;
#[cfg(test)]
@@ -25,15 +26,19 @@ use anyhow::Result;
use g3_computer_control::WebDriverController;
use g3_config::Config;
use g3_execution::CodeExecutor;
use g3_providers::{CompletionRequest, Message, MessageRole, ProviderRegistry, Tool};
use g3_providers::{CacheControl, CompletionRequest, Message, MessageRole, ProviderRegistry, Tool};
use chrono::Local;
#[allow(unused_imports)]
use regex::Regex;
use serde::{Deserialize, Serialize};
use serde_json::json;
use std::fs::OpenOptions;
use std::io::Write;
use std::sync::{Mutex, OnceLock};
use std::time::{Duration, Instant};
use tokio_util::sync::CancellationToken;
use tracing::{debug, error, info, warn};
use prompts::{SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE, SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE};
use prompts::{SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE, get_system_prompt_for_native};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCall {
@@ -426,18 +431,12 @@ Format this as a detailed but concise summary that can be used to resume the con
self.used_tokens = 0;
// Add the summary as a system message
let summary_message = Message {
role: MessageRole::System,
content: format!("Previous conversation summary:\n\n{}", summary),
};
let summary_message = Message::new(MessageRole::System, format!("Previous conversation summary:\n\n{}", summary));
self.add_message(summary_message);
// Add the latest user message if provided
if let Some(user_msg) = latest_user_message {
self.add_message(Message {
role: MessageRole::User,
content: user_msg,
});
self.add_message(Message::new(MessageRole::User, user_msg));
}
let new_chars: usize = self
@@ -759,6 +758,8 @@ pub struct Agent<W: UiWriter> {
safaridriver_process: std::sync::Arc<tokio::sync::RwLock<Option<tokio::process::Child>>>,
macax_controller:
std::sync::Arc<tokio::sync::RwLock<Option<g3_computer_control::MacAxController>>>,
tool_call_count: usize,
requirements_sha: Option<String>,
}
impl<W: UiWriter> Agent<W> {
@@ -901,6 +902,8 @@ impl<W: UiWriter> Agent<W> {
Some(anthropic_config.model.clone()),
anthropic_config.max_tokens,
anthropic_config.temperature,
anthropic_config.cache_config.clone(),
anthropic_config.enable_1m_context,
)?;
providers.register(anthropic_provider);
}
@@ -942,15 +945,36 @@ impl<W: UiWriter> Agent<W> {
debug!("Default provider set successfully");
// Determine context window size based on active provider
let context_length = Self::get_configured_context_length(&config, &providers)?;
let mut context_warnings = Vec::new();
let context_length =
Self::get_configured_context_length(&config, &providers, &mut context_warnings)?;
let mut context_window = ContextWindow::new(context_length);
// If README content is provided, add it as the first system message
// Surface any context warnings to the user via UI
for warning in context_warnings {
ui_writer.print_context_status(&format!("⚠️ {}", warning));
}
// Add system prompt as the FIRST message (before README)
// This ensures the agent always has proper tool usage instructions
let provider = providers.get(None)?;
let provider_has_native_tool_calling = provider.has_native_tool_calling();
let _ = provider; // Drop provider reference to avoid borrowing issues
let system_prompt = if provider_has_native_tool_calling {
// For native tool calling providers, use a more explicit system prompt
get_system_prompt_for_native(config.agent.allow_multiple_tool_calls)
} else {
// For non-native providers (embedded models), use JSON format instructions
SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE.to_string()
};
let system_message = Message::new(MessageRole::System, system_prompt);
context_window.add_message(system_message);
// If README content is provided, add it as a second system message (after the main system prompt)
if let Some(readme) = readme_content {
let readme_message = Message {
role: MessageRole::System,
content: readme,
};
let readme_message = Message::new(MessageRole::System, readme);
context_window.add_message(readme_message);
}
@@ -1006,27 +1030,119 @@ impl<W: UiWriter> Agent<W> {
None
}))
},
tool_call_count: 0,
requirements_sha: None,
})
}
fn get_configured_context_length(config: &Config, providers: &ProviderRegistry) -> Result<u32> {
/// Validate that the system prompt is the first message in the conversation history.
/// This is a critical invariant that must be maintained for proper agent operation.
///
/// # Panics
/// Panics if:
/// - The conversation history is empty
/// - The first message is not a System message
/// - The first message doesn't contain the system prompt markers
fn validate_system_prompt_is_first(&self) {
if self.context_window.conversation_history.is_empty() {
panic!(
"FATAL: Conversation history is empty. System prompt must be the first message."
);
}
let first_message = &self.context_window.conversation_history[0];
if !matches!(first_message.role, MessageRole::System) {
panic!(
"FATAL: First message is not a System message. Found: {:?}",
first_message.role
);
}
if !first_message.content.contains("You are G3") {
panic!("FATAL: First system message does not contain the system prompt. This likely means the README was added before the system prompt.");
}
}
/// Convert cache config string to CacheControl enum
fn parse_cache_control(cache_config: &str) -> Option<CacheControl> {
match cache_config {
"ephemeral" => Some(CacheControl::ephemeral()),
"5minute" => Some(CacheControl::five_minute()),
"1hour" => Some(CacheControl::one_hour()),
_ => {
warn!("Invalid cache_config value: '{}'. Valid values are: ephemeral, 5minute, 1hour", cache_config);
None
}
}
}
/// Get the configured max_tokens for a provider from top-level config
fn provider_max_tokens(config: &Config, provider_name: &str) -> Option<u32> {
match provider_name {
"anthropic" => config.providers.anthropic.as_ref()?.max_tokens,
"openai" => config.providers.openai.as_ref()?.max_tokens,
"databricks" => config.providers.databricks.as_ref()?.max_tokens,
"embedded" => config.providers.embedded.as_ref()?.max_tokens,
_ => None,
}
}
/// Resolve the max_tokens to use for a given provider, applying fallbacks
fn resolve_max_tokens(&self, provider_name: &str) -> u32 {
match provider_name {
"databricks" => Self::provider_max_tokens(&self.config, "databricks")
.or(Some(self.config.agent.fallback_default_max_tokens as u32))
.unwrap_or(32000),
other => Self::provider_max_tokens(&self.config, other)
.or(Some(self.config.agent.fallback_default_max_tokens as u32))
.unwrap_or(16000),
}
}
/// Print provider diagnostics through the UiWriter for visibility
pub fn print_provider_banner(&self, role_label: &str) {
if let Ok((provider_name, model)) = self.get_provider_info() {
let max_tokens = self.resolve_max_tokens(&provider_name);
let context_len = self.context_window.total_tokens;
let mut details = vec![
format!("provider={}", provider_name),
format!("model={}", model),
format!("max_tokens={}", max_tokens),
format!("context_window_length={}", context_len),
];
if let Ok(provider) = self.providers.get(None) {
details.push(format!(
"native_tools={}",
if provider.has_native_tool_calling() {
"yes"
} else {
"no"
}
));
if provider.supports_cache_control() {
details.push("cache_control=yes".to_string());
}
}
self.ui_writer
.print_context_status(&format!("{}: {}", role_label, details.join(", ")));
}
}
fn get_configured_context_length(
config: &Config,
providers: &ProviderRegistry,
warnings: &mut Vec<String>,
) -> Result<u32> {
// First, check if there's a global max_context_length override in agent config
if let Some(max_context_length) = config.agent.max_context_length {
debug!("Using configured agent.max_context_length: {}", max_context_length);
return Ok(max_context_length);
}
// Get the configured max_tokens for the current provider
fn get_provider_max_tokens(config: &Config, provider_name: &str) -> Option<u32> {
match provider_name {
"anthropic" => config.providers.anthropic.as_ref()?.max_tokens,
"openai" => config.providers.openai.as_ref()?.max_tokens,
"databricks" => config.providers.databricks.as_ref()?.max_tokens,
"embedded" => config.providers.embedded.as_ref()?.max_tokens,
_ => None,
}
}
// Get the active provider to determine context length
let provider = providers.get(None)?;
let provider_name = provider.name();
@@ -1053,25 +1169,45 @@ impl<W: UiWriter> Agent<W> {
}
"openai" => {
// gpt-5 has 400k window
get_provider_max_tokens(config, "openai").unwrap_or(400000)
if let Some(max_tokens) = Self::provider_max_tokens(config, "openai") {
warnings.push(format!(
"Context length falling back to max_tokens ({}) for provider=openai",
max_tokens
));
max_tokens
} else {
400000
}
}
"anthropic" => {
// Claude models have large context windows
// Use configured max_tokens or fall back to default
get_provider_max_tokens(config, "anthropic").unwrap_or(200000)
if let Some(max_tokens) = Self::provider_max_tokens(config, "anthropic") {
warnings.push(format!(
"Context length falling back to max_tokens ({}) for provider=anthropic",
max_tokens
));
max_tokens
} else {
200000
}
}
"databricks" => {
// Databricks models have varying context windows depending on the model
// Use configured max_tokens or fall back to model-specific defaults
get_provider_max_tokens(config, "databricks").unwrap_or_else(|| {
if model_name.contains("claude") {
200000 // Claude models on Databricks have large context windows
} else if model_name.contains("llama") || model_name.contains("dbrx") {
32768 // DBRX supports 32k context
} else {
16384 // Conservative default for other Databricks models
}
})
if let Some(max_tokens) = Self::provider_max_tokens(config, "databricks") {
warnings.push(format!(
"Context length falling back to max_tokens ({}) for provider=databricks",
max_tokens
));
max_tokens
} else if model_name.contains("claude") {
200000 // Claude models on Databricks have large context windows
} else if model_name.contains("llama") || model_name.contains("dbrx") {
32768 // DBRX supports 32k context
} else {
16384 // Conservative default for other Databricks models
}
}
_ => config.agent.fallback_default_max_tokens as u32,
};
@@ -1084,6 +1220,63 @@ impl<W: UiWriter> Agent<W> {
Ok(context_length)
}
fn tool_log_handle() -> Option<&'static Mutex<std::fs::File>> {
static TOOL_LOG: OnceLock<Option<Mutex<std::fs::File>>> = OnceLock::new();
TOOL_LOG
.get_or_init(|| {
if let Err(e) = std::fs::create_dir_all("logs") {
error!("Failed to create logs directory for tool log: {}", e);
return None;
}
let ts = Local::now().format("%Y%m%d_%H%M%S").to_string();
let path = format!("logs/tool_calls_{}.log", ts);
match OpenOptions::new()
.create(true)
.append(true)
.open(&path)
{
Ok(file) => Some(Mutex::new(file)),
Err(e) => {
error!("Failed to open tool log file {}: {}", path, e);
None
}
}
})
.as_ref()
}
fn log_tool_call(&self, tool_call: &ToolCall, response: &str) {
if let Some(handle) = Self::tool_log_handle() {
let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
let args_str = serde_json::to_string(&tool_call.args)
.unwrap_or_else(|_| "<unserializable>".to_string());
fn sanitize(s: &str) -> String {
s.replace('\n', "\\n")
}
fn truncate(s: &str, limit: usize) -> String {
s.chars().take(limit).collect()
}
let args_snippet = truncate(&sanitize(&args_str), 80);
let response_snippet = truncate(&sanitize(response), 80);
let tool_field = format!("{:<15}", tool_call.tool);
let line = format!(
"{} {} {} 🟩 {}\n",
timestamp, tool_field, args_snippet, response_snippet
);
if let Ok(mut file) = handle.lock() {
let _ = file.write_all(line.as_bytes());
let _ = file.flush();
}
}
}
pub fn get_provider_info(&self) -> Result<(String, String)> {
let provider = self.providers.get(None)?;
Ok((provider.name().to_string(), provider.model().to_string()))
@@ -1171,7 +1364,7 @@ impl<W: UiWriter> Agent<W> {
async fn execute_single_task(
&mut self,
description: &str,
show_prompt: bool,
_show_prompt: bool,
_show_code: bool,
show_timing: bool,
cancellation_token: CancellationToken,
@@ -1179,40 +1372,17 @@ impl<W: UiWriter> Agent<W> {
// Reset the JSON tool call filter state at the start of each new task
// This prevents the filter from staying in suppression mode between user interactions
fixed_filter_json::reset_fixed_json_tool_state();
// Validate that the system prompt is the first message (critical invariant)
self.validate_system_prompt_is_first();
// Generate session ID based on the initial prompt if this is a new session
if self.session_id.is_none() {
self.session_id = Some(self.generate_session_id(description));
}
// Only add system message if this is the first interaction (empty conversation history)
if self.context_window.conversation_history.is_empty() {
let provider = self.providers.get(None)?;
let system_prompt = if provider.has_native_tool_calling() {
// For native tool calling providers, use a more explicit system prompt
SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string()
} else {
// For non-native providers (embedded models), use JSON format instructions
SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE.to_string()
};
if show_prompt {
self.ui_writer.print_system_prompt(&system_prompt);
}
// Add system message to context window
let system_message = Message {
role: MessageRole::System,
content: system_prompt,
};
self.context_window.add_message(system_message);
}
// Add user message to context window
let user_message = Message {
role: MessageRole::User,
content: format!("Task: {}", description),
};
let user_message = Message::new(MessageRole::User, format!("Task: {}", description));
self.context_window.add_message(user_message);
// Use the complete conversation history for the request
@@ -1220,6 +1390,9 @@ impl<W: UiWriter> Agent<W> {
// Check if provider supports native tool calling and add tools if so
let provider = self.providers.get(None)?;
let provider_name = provider.name().to_string();
let _has_native_tool_calling = provider.has_native_tool_calling();
let _supports_cache_control = provider.supports_cache_control();
let tools = if provider.has_native_tool_calling() {
Some(Self::create_tool_definitions(
self.config.webdriver.enabled,
@@ -1229,18 +1402,10 @@ impl<W: UiWriter> Agent<W> {
} else {
None
};
let _ = provider; // Drop the provider reference to avoid borrowing issues
// Get max_tokens from provider configuration
let max_tokens = match provider.name() {
"databricks" => {
// Use the model's maximum limit for Databricks to allow large file generation
Some(32000)
}
_ => {
// Default for other providers
Some(16000)
}
};
// Get max_tokens from provider configuration, falling back to sensible defaults
let max_tokens = Some(self.resolve_max_tokens(&provider_name));
let request = CompletionRequest {
messages,
@@ -1286,9 +1451,23 @@ impl<W: UiWriter> Agent<W> {
// Add assistant response to context window only if not empty
// This prevents the "Skipping empty message" warning when only tools were executed
if !response_content.trim().is_empty() {
let assistant_message = Message {
role: MessageRole::Assistant,
content: response_content.clone(),
let assistant_message = {
// Check if we should use cache control (every 10 tool calls)
if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 {
let provider = self.providers.get(None)?;
if let Some(cache_config) = match provider.name() {
"anthropic" => self.config.providers.anthropic.as_ref()
.and_then(|c| c.cache_config.as_ref())
.and_then(|config| Self::parse_cache_control(config)),
_ => None,
} {
Message::with_cache_control_validated(MessageRole::Assistant, response_content.clone(), cache_config, provider)
} else {
Message::new(MessageRole::Assistant, response_content.clone())
}
} else {
Message::new(MessageRole::Assistant, response_content.clone())
}
};
self.context_window.add_message(assistant_message);
} else {
@@ -1491,17 +1670,11 @@ impl<W: UiWriter> Agent<W> {
.join("\n\n");
let summary_messages = vec![
Message {
role: MessageRole::System,
content: "You are a helpful assistant that creates concise summaries.".to_string(),
},
Message {
role: MessageRole::User,
content: format!(
Message::new(MessageRole::System, "You are a helpful assistant that creates concise summaries.".to_string()),
Message::new(MessageRole::User, format!(
"Based on this conversation history, {}\n\nConversation:\n{}",
summary_prompt, conversation_text
),
},
)),
];
let provider = self.providers.get(None)?;
@@ -1589,17 +1762,21 @@ impl<W: UiWriter> Agent<W> {
pub fn reload_readme(&mut self) -> Result<bool> {
info!("Manual README reload triggered");
// Check if the first message in conversation history is a system message with README content
// Check if the second message in conversation history is a system message with README content
// (The first message should always be the system prompt)
let has_readme = self
.context_window
.conversation_history
.first()
.get(1) // Check the SECOND message (index 1)
.map(|m| {
matches!(m.role, MessageRole::System)
&& (m.content.contains("Project README")
|| m.content.contains("Agent Configuration"))
})
.unwrap_or(false);
// Validate that the system prompt is still first
self.validate_system_prompt_is_first();
if !has_readme {
return Ok(false);
@@ -1623,8 +1800,8 @@ impl<W: UiWriter> Agent<W> {
}
if found_any {
// Replace the first message with the new content
if let Some(first_msg) = self.context_window.conversation_history.first_mut() {
// Replace the second message (README) with the new content
if let Some(first_msg) = self.context_window.conversation_history.get_mut(1) {
first_msg.content = combined_content;
info!("README content reloaded successfully");
Ok(true)
@@ -1804,6 +1981,10 @@ impl<W: UiWriter> Agent<W> {
&self.config
}
pub fn set_requirements_sha(&mut self, sha: String) {
self.requirements_sha = Some(sha);
}
async fn stream_completion(
&mut self,
request: CompletionRequest,
@@ -1977,6 +2158,15 @@ impl<W: UiWriter> Agent<W> {
"required": ["content"]
}),
},
Tool {
name: "code_coverage".to_string(),
description: "Generate a code coverage report for the entire workspace using cargo llvm-cov. This runs all tests with coverage instrumentation and returns a summary of coverage statistics. Requires llvm-tools-preview and cargo-llvm-cov to be installed (they will be auto-installed if missing).".to_string(),
input_schema: json!({
"type": "object",
"properties": {},
"required": []
}),
},
];
// Add code_search tool
@@ -2484,18 +2674,11 @@ impl<W: UiWriter> Agent<W> {
.join("\n\n");
let summary_messages = vec![
Message {
role: MessageRole::System,
content: "You are a helpful assistant that creates concise summaries."
.to_string(),
},
Message {
role: MessageRole::User,
content: format!(
Message::new(MessageRole::System, "You are a helpful assistant that creates concise summaries.".to_string()),
Message::new(MessageRole::User, format!(
"Based on this conversation history, {}\n\nConversation:\n{}",
summary_prompt, conversation_text
),
},
)),
];
let provider = self.providers.get(None)?;
@@ -2622,8 +2805,12 @@ impl<W: UiWriter> Agent<W> {
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
}
let provider = self.providers.get(None)?;
debug!("Got provider: {}", provider.name());
// Get provider info for logging, then drop it to avoid borrow issues
let (provider_name, provider_model) = {
let provider = self.providers.get(None)?;
(provider.name().to_string(), provider.model().to_string())
};
debug!("Got provider: {}", provider_name);
// Create error context for detailed logging
let last_prompt = request
@@ -2636,8 +2823,8 @@ impl<W: UiWriter> Agent<W> {
let error_context = ErrorContext::new(
"stream_completion".to_string(),
provider.name().to_string(),
provider.model().to_string(),
provider_name.clone(),
provider_model.clone(),
last_prompt,
self.session_id.clone(),
self.context_window.used_tokens,
@@ -2650,8 +2837,8 @@ impl<W: UiWriter> Agent<W> {
// Log initial request details
debug!("Starting stream with provider={}, model={}, messages={}, tools={}, max_tokens={:?}",
provider.name(),
provider.model(),
provider_name,
provider_model,
request.messages.len(),
request.tools.is_some(),
request.max_tokens
@@ -2741,10 +2928,125 @@ impl<W: UiWriter> Agent<W> {
// Process chunk with the new parser
let completed_tools = parser.process_chunk(&chunk);
// Handle completed tool calls
if let Some(tool_call) = completed_tools.into_iter().next() {
// Handle completed tool calls - process all if multiple calls enabled
let tools_to_process: Vec<ToolCall> = if self.config.agent.allow_multiple_tool_calls {
completed_tools
} else {
// Original behavior - only take the first tool
completed_tools.into_iter().take(1).collect()
};
// Helper function to check if two tool calls are duplicates
let are_duplicates = |tc1: &ToolCall, tc2: &ToolCall| -> bool {
tc1.tool == tc2.tool && tc1.args == tc2.args
};
// De-duplicate tool calls and track duplicates
let mut seen_in_chunk: Vec<ToolCall> = Vec::new();
let mut deduplicated_tools: Vec<(ToolCall, Option<String>)> = Vec::new();
for tool_call in tools_to_process {
let mut duplicate_type = None;
// Check for duplicates in current chunk
if seen_in_chunk.iter().any(|tc| are_duplicates(tc, &tool_call)) {
duplicate_type = Some("DUP IN CHUNK".to_string());
} else {
// Check for duplicate against previous message in history
// Look at the last assistant message that contains tool calls
let mut found_in_prev = false;
for msg in self.context_window.conversation_history.iter().rev() {
if matches!(msg.role, MessageRole::Assistant) {
// Try to parse tool calls from the message content
if msg.content.contains(r#"\"tool\""#) {
// Simple JSON extraction for tool calls
let content = &msg.content;
let mut start_idx = 0;
while let Some(tool_start) = content[start_idx..].find(r#"{\"tool\""#) {
let tool_start = start_idx + tool_start;
// Find the end of this JSON object
let mut brace_count = 0;
let mut in_string = false;
let mut escape_next = false;
let mut end_idx = tool_start;
for (i, ch) in content[tool_start..].char_indices() {
if escape_next {
escape_next = false;
continue;
}
if ch == '\\' && in_string {
escape_next = true;
continue;
}
if ch == '"' && !escape_next {
in_string = !in_string;
}
if !in_string {
if ch == '{' {
brace_count += 1;
} else if ch == '}' {
brace_count -= 1;
if brace_count == 0 {
end_idx = tool_start + i + 1;
break;
}
}
}
}
if end_idx > tool_start {
let tool_json = &content[tool_start..end_idx];
if let Ok(prev_tool) = serde_json::from_str::<ToolCall>(tool_json) {
if are_duplicates(&prev_tool, &tool_call) {
found_in_prev = true;
break;
}
}
}
start_idx = end_idx;
}
}
// Only check the most recent assistant message
break;
}
}
if found_in_prev {
duplicate_type = Some("DUP IN MSG".to_string());
}
}
// Add to seen list if not a duplicate in chunk
if duplicate_type.as_ref().map_or(true, |s| s != "DUP IN CHUNK") {
seen_in_chunk.push(tool_call.clone());
}
deduplicated_tools.push((tool_call, duplicate_type));
}
// Process each tool call
for (tool_call, duplicate_type) in deduplicated_tools {
debug!("Processing completed tool call: {:?}", tool_call);
// If it's a duplicate, log it and return a warning
if let Some(dup_type) = &duplicate_type {
// Log the duplicate with red prefix
let prefixed_tool_name = format!("🟥 {} {}", tool_call.tool, dup_type);
let warning_msg = format!(
"⚠️ Duplicate tool call detected ({}): Skipping execution of {} with args {}",
dup_type,
tool_call.tool,
serde_json::to_string(&tool_call.args).unwrap_or_else(|_| "<unserializable>".to_string())
);
// Log to tool log with red prefix
let mut modified_tool_call = tool_call.clone();
modified_tool_call.tool = prefixed_tool_name;
self.log_tool_call(&modified_tool_call, &warning_msg);
continue; // Skip execution of duplicate
}
// Check if we should auto-compact at 90% BEFORE executing the tool
// We need to do this before any borrows of self
if self.auto_compact && self.context_window.percentage_used() >= 90.0 {
@@ -2981,29 +3283,20 @@ impl<W: UiWriter> Agent<W> {
// Add the tool call and result to the context window using RAW unfiltered content
// This ensures the log file contains the true raw content including JSON tool calls
let tool_message = if !raw_content_for_log.trim().is_empty() {
Message {
role: MessageRole::Assistant,
content: format!(
Message::new(MessageRole::Assistant, format!(
"{}\n\n{{\"tool\": \"{}\", \"args\": {}}}",
raw_content_for_log.trim(),
tool_call.tool,
tool_call.args
),
}
))
} else {
// No text content before tool call, just include the tool call
Message {
role: MessageRole::Assistant,
content: format!(
Message::new(MessageRole::Assistant, format!(
"{{\"tool\": \"{}\", \"args\": {}}}",
tool_call.tool, tool_call.args
),
}
};
let result_message = Message {
role: MessageRole::User,
content: format!("Tool result: {}", tool_result),
))
};
let result_message = Message::new(MessageRole::User, format!("Tool result: {}", tool_result));
self.context_window.add_message(tool_message);
self.context_window.add_message(result_message);
@@ -3012,7 +3305,8 @@ impl<W: UiWriter> Agent<W> {
request.messages = self.context_window.conversation_history.clone();
// Ensure tools are included for native providers in subsequent iterations
if provider.has_native_tool_calling() {
let provider_for_tools = self.providers.get(None)?;
if provider_for_tools.has_native_tool_calling() {
request.tools = Some(Self::create_tool_definitions(
self.config.webdriver.enabled,
self.config.macax.enabled,
@@ -3041,7 +3335,16 @@ impl<W: UiWriter> Agent<W> {
current_response.clear();
// Reset response_started flag for next iteration
response_started = false;
break; // Break out of current stream to start a new one
// For single tool mode, break immediately
if !self.config.agent.allow_multiple_tool_calls {
break; // Break out of current stream to start a new one
}
} // End of for loop processing each tool call
// If we processed any tools in multiple mode, break out to start new stream
if tool_executed && self.config.agent.allow_multiple_tool_calls {
break;
}
// If no tool calls were completed, continue streaming normally
@@ -3124,8 +3427,8 @@ impl<W: UiWriter> Agent<W> {
error!("Iteration: {}/{}", iteration_count, MAX_ITERATIONS);
error!(
"Provider: {} (model: {})",
provider.name(),
provider.model()
provider_name,
provider_model
);
error!("Chunks received: {}", chunks_received);
error!("Parser state:");
@@ -3343,9 +3646,23 @@ impl<W: UiWriter> Agent<W> {
.replace("<</SYS>>", "");
if !raw_clean.trim().is_empty() {
let assistant_message = Message {
role: MessageRole::Assistant,
content: raw_clean,
let assistant_message = {
// Check if we should use cache control (every 10 tool calls)
if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 {
let provider = self.providers.get(None)?;
if let Some(cache_config) = match provider.name() {
"anthropic" => self.config.providers.anthropic.as_ref()
.and_then(|c| c.cache_config.as_ref())
.and_then(|config| Self::parse_cache_control(config)),
_ => None,
} {
Message::with_cache_control_validated(MessageRole::Assistant, raw_clean, cache_config, provider)
} else {
Message::new(MessageRole::Assistant, raw_clean)
}
} else {
Message::new(MessageRole::Assistant, raw_clean)
}
};
self.context_window.add_message(assistant_message);
}
@@ -3387,7 +3704,20 @@ impl<W: UiWriter> Agent<W> {
Ok(TaskResult::new(final_response, self.context_window.clone()))
}
pub async fn execute_tool(&self, tool_call: &ToolCall) -> Result<String> {
pub async fn execute_tool(&mut self, tool_call: &ToolCall) -> Result<String> {
// Increment tool call count
self.tool_call_count += 1;
let result = self.execute_tool_inner(tool_call).await;
let log_str = match &result {
Ok(s) => s.clone(),
Err(e) => format!("ERROR: {}", e),
};
self.log_tool_call(tool_call, &log_str);
result
}
async fn execute_tool_inner(&mut self, tool_call: &ToolCall) -> Result<String> {
debug!("=== EXECUTING TOOL ===");
debug!("Tool name: {}", tool_call.tool);
debug!("Tool args (raw): {:?}", tool_call.args);
@@ -3935,6 +4265,56 @@ impl<W: UiWriter> Agent<W> {
let mut todo = self.todo_content.write().await;
*todo = content.clone();
// Check for staleness if enabled and we have a requirements SHA
if self.config.agent.check_todo_staleness {
if let Some(req_sha) = &self.requirements_sha {
// Parse the first line for the SHA header
if let Some(first_line) = content.lines().next() {
if first_line.starts_with("{{Based on the requirements file with SHA256:") {
let parts: Vec<&str> = first_line.split("SHA256:").collect();
if parts.len() > 1 {
let todo_sha = parts[1].trim().trim_end_matches("}}").trim();
if todo_sha != req_sha {
let warning = format!(
"⚠️ TODO list is stale! It was generated from a different requirements file.\nExpected SHA: {}\nFound SHA: {}",
req_sha, todo_sha
);
self.ui_writer.print_context_status(&warning);
// Beep 6 times
print!("\x07\x07\x07\x07\x07\x07");
let _ = std::io::stdout().flush();
let options = ["Ignore and Continue", "Mark as Stale", "Quit Application"];
let choice = self.ui_writer.prompt_user_choice("Requirements have changed! What would you like to do?", &options);
match choice {
0 => {
// Ignore and Continue
self.ui_writer.print_context_status("⚠️ Ignoring staleness warning.");
}
1 => {
// Mark as Stale
// We return a message to the agent so it knows to regenerate/fix it.
return Ok("⚠️ TODO list is stale (requirements changed). Please regenerate the TODO list to match the new requirements.".to_string());
}
2 => {
// Quit Application
self.ui_writer.print_context_status("❌ Quitting application as requested.");
std::process::exit(0);
}
_ => unreachable!(),
}
}
}
} else {
// Header missing, but we have a SHA. Warn the user?
// For now, maybe just proceed... assuming it's an old TODO.
}
}
}
}
if content.trim().is_empty() {
Ok("📝 TODO list is empty".to_string())
} else {
@@ -3981,6 +4361,46 @@ impl<W: UiWriter> Agent<W> {
Ok("❌ Missing content argument".to_string())
}
}
"code_coverage" => {
debug!("Processing code_coverage tool call");
self.ui_writer.print_context_status("🔍 Generating code coverage report...");
// Ensure coverage tools are installed
match g3_execution::ensure_coverage_tools_installed() {
Ok(already_installed) => {
if !already_installed {
self.ui_writer.print_context_status("✅ Coverage tools installed successfully");
}
}
Err(e) => {
return Ok(format!("❌ Failed to install coverage tools: {}", e));
}
}
// Run cargo llvm-cov --workspace
let output = std::process::Command::new("cargo")
.args(&["llvm-cov", "--workspace"])
.current_dir(std::env::current_dir()?)
.output()?;
if output.status.success() {
let stdout = String::from_utf8_lossy(&output.stdout);
let stderr = String::from_utf8_lossy(&output.stderr);
// Combine output
let mut result = String::from("✅ Code coverage report generated successfully\n\n");
result.push_str("## Coverage Summary\n");
result.push_str(&stdout);
if !stderr.is_empty() {
result.push_str("\n## Warnings\n");
result.push_str(&stderr);
}
Ok(result)
} else {
let stderr = String::from_utf8_lossy(&output.stderr);
Ok(format!("❌ Failed to generate coverage report:\n{}", stderr))
}
}
"webdriver_start" => {
debug!("Processing webdriver_start tool call");
@@ -5371,6 +5791,16 @@ mod integration_tests {
// Implement Drop to clean up safaridriver process
impl<W: UiWriter> Drop for Agent<W> {
fn drop(&mut self) {
// Validate system prompt invariant on drop (agent exit)
// This catches any bugs where the conversation history was corrupted during execution
if !self.context_window.conversation_history.is_empty() {
if let Err(e) = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
self.validate_system_prompt_is_first();
})) {
eprintln!("\n⚠️ FATAL ERROR ON EXIT: System prompt validation failed: {:?}", e);
}
}
// Try to kill safaridriver process if it's still running
// We need to use try_lock since we can't await in Drop
if let Ok(mut process_guard) = self.safaridriver_process.try_write() {

View File

@@ -71,9 +71,13 @@ Every multi-step task follows this pattern:
1. **Start**: Call todo_read, then todo_write to create your plan
2. **During**: Execute steps, then todo_read and todo_write to mark progress
3. **End**: Call todo_read to verify all items complete
Note: todo_write replaces the entire todo.g3.md file, so always read first to preserve content. TODO lists persist across g3 sessions in the workspace directory.
IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format:
`{{Based on the requirements file with SHA256: <SHA>}}`
This ensures the TODO list is tracked against the specific version of requirements it was generated from.
## Examples
**Example 1: Feature Implementation**
@@ -185,7 +189,25 @@ Do not explain what you're going to do - just do it by calling the tools.
";
pub const SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE: &'static str =
concatcp!(CODING_STYLE, SYSTEM_NATIVE_TOOL_CALLS);
concatcp!(SYSTEM_NATIVE_TOOL_CALLS, CODING_STYLE);
/// Generate system prompt based on whether multiple tool calls are allowed
pub fn get_system_prompt_for_native(allow_multiple: bool) -> String {
if allow_multiple {
// Replace the "ONE tool" instruction with multiple tools instruction
let base = SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string();
base.replace(
"2. Call the appropriate tool with the required parameters",
"2. Call the appropriate tool(s) with the required parameters - you may call multiple tools in parallel when appropriate.
<use_parallel_tool_calls>
For maximum efficiency, whenever you perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially. Prioritize calling tools in parallel whenever possible. For example, when reading 3 files, run 3 tool calls in parallel to read all 3 files into context at the same time. When running multiple read-only commands like `ls` or `list_dir`, always run all of the commands in parallel. Err on the side of maximizing parallel tool calls rather than running too many tools sequentially.
</use_parallel_tool_calls>
"
)
} else {
SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string()
}
}
const SYSTEM_NON_NATIVE_TOOL_USE: &'static str =
"You are G3, a general-purpose AI agent. Your goal is to analyze and solve problems by writing code.
@@ -285,6 +307,10 @@ Every multi-step task follows this pattern:
Note: todo_write replaces the entire list, so always read first to preserve content.
IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format:
`{{Based on the requirements file with SHA256: <SHA>}}`
This ensures the TODO list is tracked against the specific version of requirements it was generated from.
## Examples
**Example 1: Feature Implementation**
@@ -345,4 +371,4 @@ If you can complete it with 1-2 tool calls, skip TODO.
";
pub const SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE: &'static str =
concatcp!(CODING_STYLE, SYSTEM_NON_NATIVE_TOOL_USE);
concatcp!(SYSTEM_NON_NATIVE_TOOL_USE, CODING_STYLE);

View File

@@ -6,14 +6,10 @@ use std::sync::Arc;
fn test_task_result_basic_functionality() {
// Create a context window with some messages
let mut context = ContextWindow::new(10000);
context.add_message(Message {
role: MessageRole::User,
content: "Test message 1".to_string(),
});
context.add_message(Message {
role: MessageRole::Assistant,
content: "Response 1".to_string(),
});
context.add_message(Message::new(MessageRole::User, "Test message 1".to_string())
);
context.add_message(Message::new(MessageRole::Assistant, "Response 1".to_string())
);
// Create a TaskResult
let response = "This is the response\n\nFinal output block".to_string();
@@ -100,10 +96,7 @@ fn test_context_window_preservation() {
// Add some messages
for i in 0..5 {
context.add_message(Message {
role: if i % 2 == 0 { MessageRole::User } else { MessageRole::Assistant },
content: format!("Message {}", i),
});
context.add_message(Message::new(if i % 2 == 0 { MessageRole::User } else { MessageRole::Assistant }, format!("Message {}", i)));
}
// Create TaskResult

View File

@@ -56,6 +56,13 @@ pub trait UiWriter: Send + Sync {
/// Returns true if this UI writer wants full, untruncated output
/// Default is false (truncate for human readability)
fn wants_full_output(&self) -> bool { false }
/// Prompt the user for a yes/no confirmation
fn prompt_user_yes_no(&self, message: &str) -> bool;
/// Prompt the user to choose from a list of options
/// Returns the index of the selected option
fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize;
}
/// A no-op implementation for when UI output is not needed
@@ -80,4 +87,6 @@ impl UiWriter for NullUiWriter {
fn notify_sse_received(&self) {}
fn flush(&self) {}
fn wants_full_output(&self) -> bool { false }
fn prompt_user_yes_no(&self, _message: &str) -> bool { true }
fn prompt_user_choice(&self, _message: &str, _options: &[&str]) -> usize { 0 }
}

View File

@@ -551,6 +551,7 @@ async fn test_cpp_search() {
}
#[tokio::test]
#[ignore]
async fn test_kotlin_search() {
let request = CodeSearchRequest {
searches: vec![SearchSpec {

View File

@@ -46,10 +46,10 @@ fn test_thin_context_basic() {
// Add some messages to the first third
for i in 0..9 {
if i % 2 == 0 {
context.add_message(Message {
role: MessageRole::Assistant,
content: format!("Assistant message {}", i),
});
context.add_message(Message::new(
MessageRole::Assistant,
format!("Assistant message {}", i),
));
} else {
// Add tool results with varying sizes
let content = if i == 1 {
@@ -63,10 +63,10 @@ fn test_thin_context_basic() {
format!("Tool result: small result {}", i)
};
context.add_message(Message {
role: MessageRole::User,
context.add_message(Message::new(
MessageRole::User,
content,
});
));
}
}
@@ -98,10 +98,10 @@ fn test_thin_write_file_tool_calls() {
let mut context = ContextWindow::new(10000);
// Add some messages including a write_file tool call with large content
context.add_message(Message {
role: MessageRole::User,
content: "Please create a large file".to_string(),
});
context.add_message(Message::new(
MessageRole::User,
"Please create a large file".to_string(),
));
// Add an assistant message with a write_file tool call containing large content
let large_content = "x".repeat(1500);
@@ -109,22 +109,22 @@ fn test_thin_write_file_tool_calls() {
r#"{{"tool": "write_file", "args": {{"file_path": "test.txt", "content": "{}"}}}}"#,
large_content
);
context.add_message(Message {
role: MessageRole::Assistant,
content: format!("I'll create that file.\n\n{}", tool_call_json),
});
context.add_message(Message::new(
MessageRole::Assistant,
format!("I'll create that file.\n\n{}", tool_call_json),
));
context.add_message(Message {
role: MessageRole::User,
content: "Tool result: ✅ Successfully wrote 1500 lines".to_string(),
});
context.add_message(Message::new(
MessageRole::User,
"Tool result: ✅ Successfully wrote 1500 lines".to_string(),
));
// Add more messages to ensure we have enough for "first third" logic
for i in 0..6 {
context.add_message(Message {
role: MessageRole::Assistant,
content: format!("Response {}", i),
});
context.add_message(Message::new(
MessageRole::Assistant,
format!("Response {}", i),
));
}
// Trigger thinning at 50%
@@ -154,10 +154,10 @@ fn test_thin_str_replace_tool_calls() {
let mut context = ContextWindow::new(10000);
// Add some messages including a str_replace tool call with large diff
context.add_message(Message {
role: MessageRole::User,
content: "Please update the file".to_string(),
});
context.add_message(Message::new(
MessageRole::User,
"Please update the file".to_string(),
));
// Add an assistant message with a str_replace tool call containing large diff
let large_diff = format!("--- old\n{}\n+++ new\n{}", "-old line\n".repeat(100), "+new line\n".repeat(100));
@@ -165,22 +165,22 @@ fn test_thin_str_replace_tool_calls() {
r#"{{"tool": "str_replace", "args": {{"file_path": "test.txt", "diff": "{}"}}}}"#,
large_diff.replace('\n', "\\n")
);
context.add_message(Message {
role: MessageRole::Assistant,
content: format!("I'll update that file.\n\n{}", tool_call_json),
});
context.add_message(Message::new(
MessageRole::Assistant,
format!("I'll update that file.\n\n{}", tool_call_json),
));
context.add_message(Message {
role: MessageRole::User,
content: "Tool result: ✅ applied unified diff".to_string(),
});
context.add_message(Message::new(
MessageRole::User,
"Tool result: ✅ applied unified diff".to_string(),
));
// Add more messages to ensure we have enough for "first third" logic
for i in 0..6 {
context.add_message(Message {
role: MessageRole::Assistant,
content: format!("Response {}", i),
});
context.add_message(Message::new(
MessageRole::Assistant,
format!("Response {}", i),
));
}
// Trigger thinning at 50%
@@ -212,10 +212,10 @@ fn test_thin_context_no_large_results() {
// Add only small messages
for i in 0..9 {
context.add_message(Message {
role: MessageRole::User,
content: format!("Tool result: small {}", i),
});
context.add_message(Message::new(
MessageRole::User,
format!("Tool result: small {}", i),
));
}
context.used_tokens = 5000;
@@ -244,7 +244,7 @@ fn test_thin_context_only_affects_first_third() {
MessageRole::Assistant
};
context.add_message(Message { role, content });
context.add_message(Message::new(role, content));
}
context.used_tokens = 5000;

View File

@@ -8,27 +8,18 @@ fn test_todo_read_results_not_thinned() {
let mut context = ContextWindow::new(10000);
// Add a todo_read tool call
context.add_message(Message {
role: MessageRole::Assistant,
content: r#"{"tool": "todo_read", "args": {}}"#.to_string(),
});
context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "todo_read", "args": {}}"#.to_string()));
// Add a large TODO result (> 500 chars)
let large_todo_result = format!(
"Tool result: 📝 TODO list:\n{}",
"- [ ] Task with long description\n".repeat(50)
);
context.add_message(Message {
role: MessageRole::User,
content: large_todo_result.clone(),
});
context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
// Add more messages to ensure we have enough for "first third" logic
for i in 0..6 {
context.add_message(Message {
role: MessageRole::Assistant,
content: format!("Response {}", i),
});
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
}
// Trigger thinning at 50%
@@ -65,27 +56,18 @@ fn test_todo_write_results_not_thinned() {
// Add a todo_write tool call
let large_content = "- [ ] Task\n".repeat(100);
context.add_message(Message {
role: MessageRole::Assistant,
content: format!(r#"{{"tool": "todo_write", "args": {{"content": "{}"}}}}"#, large_content),
});
context.add_message(Message::new(MessageRole::Assistant, format!(r#"{{"tool": "todo_write", "args": {{"content": "{}"}}}}"#, large_content)));
// Add a large TODO write result
let large_todo_result = format!(
"Tool result: ✅ TODO list updated ({} chars) and saved to todo.g3.md",
large_content.len()
);
context.add_message(Message {
role: MessageRole::User,
content: large_todo_result.clone(),
});
context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
// Add more messages
for i in 0..6 {
context.add_message(Message {
role: MessageRole::Assistant,
content: format!("Response {}", i),
});
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
}
// Trigger thinning at 50%
@@ -119,24 +101,15 @@ fn test_non_todo_results_still_thinned() {
let mut context = ContextWindow::new(10000);
// Add a non-TODO tool call (e.g., read_file)
context.add_message(Message {
role: MessageRole::Assistant,
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
});
context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string()));
// Add a large read_file result (> 500 chars)
let large_result = format!("Tool result: {}", "x".repeat(1500));
context.add_message(Message {
role: MessageRole::User,
content: large_result,
});
context.add_message(Message::new(MessageRole::User, large_result));
// Add more messages
for i in 0..6 {
context.add_message(Message {
role: MessageRole::Assistant,
content: format!("Response {}", i),
});
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
}
// Trigger thinning at 50%
@@ -172,27 +145,18 @@ fn test_todo_read_with_spaces_in_tool_name() {
let mut context = ContextWindow::new(10000);
// Add a todo_read tool call with spaces (JSON formatting variation)
context.add_message(Message {
role: MessageRole::Assistant,
content: r#"{"tool": "todo_read", "args": {}}"#.to_string(),
});
context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "todo_read", "args": {}}"#.to_string()));
// Add a large TODO result
let large_todo_result = format!(
"Tool result: 📝 TODO list:\n{}",
"- [ ] Task\n".repeat(50)
);
context.add_message(Message {
role: MessageRole::User,
content: large_todo_result.clone(),
});
context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
// Add more messages
for i in 0..6 {
context.add_message(Message {
role: MessageRole::Assistant,
content: format!("Response {}", i),
});
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
}
// Trigger thinning

View File

@@ -27,7 +27,7 @@ fn get_todo_path(temp_dir: &TempDir) -> PathBuf {
#[serial]
async fn test_todo_write_creates_file() {
let temp_dir = TempDir::new().unwrap();
let agent = create_test_agent_in_dir(&temp_dir).await;
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let todo_path = get_todo_path(&temp_dir);
// Initially, todo.g3.md should not exist
@@ -67,7 +67,7 @@ async fn test_todo_read_from_file() {
fs::write(&todo_path, test_content).unwrap();
// Create agent (should load from file)
let agent = create_test_agent_in_dir(&temp_dir).await;
let mut agent = create_test_agent_in_dir(&temp_dir).await;
// Create a tool call to read TODO
let tool_call = g3_core::ToolCall {
@@ -88,7 +88,7 @@ async fn test_todo_read_from_file() {
#[serial]
async fn test_todo_read_empty_file() {
let temp_dir = TempDir::new().unwrap();
let agent = create_test_agent_in_dir(&temp_dir).await;
let mut agent = create_test_agent_in_dir(&temp_dir).await;
// Create a tool call to read TODO (file doesn't exist)
let tool_call = g3_core::ToolCall {
@@ -111,7 +111,7 @@ async fn test_todo_persistence_across_agents() {
// Agent 1: Write TODO
{
let agent = create_test_agent_in_dir(&temp_dir).await;
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let tool_call = g3_core::ToolCall {
tool: "todo_write".to_string(),
args: serde_json::json!({
@@ -126,7 +126,7 @@ async fn test_todo_persistence_across_agents() {
// Agent 2: Read TODO (new agent instance)
{
let agent = create_test_agent_in_dir(&temp_dir).await;
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let tool_call = g3_core::ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
@@ -143,7 +143,7 @@ async fn test_todo_persistence_across_agents() {
#[serial]
async fn test_todo_update_preserves_file() {
let temp_dir = TempDir::new().unwrap();
let agent = create_test_agent_in_dir(&temp_dir).await;
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let todo_path = get_todo_path(&temp_dir);
// Write initial TODO
@@ -173,7 +173,7 @@ async fn test_todo_update_preserves_file() {
#[serial]
async fn test_todo_handles_large_content() {
let temp_dir = TempDir::new().unwrap();
let agent = create_test_agent_in_dir(&temp_dir).await;
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let todo_path = get_todo_path(&temp_dir);
// Create a large TODO (but under the 50k limit)
@@ -202,7 +202,7 @@ async fn test_todo_handles_large_content() {
#[serial]
async fn test_todo_respects_size_limit() {
let temp_dir = TempDir::new().unwrap();
let agent = create_test_agent_in_dir(&temp_dir).await;
let mut agent = create_test_agent_in_dir(&temp_dir).await;
// Create content that exceeds the default 50k limit
let huge_content = "x".repeat(60_000);
@@ -232,7 +232,7 @@ async fn test_todo_agent_initialization_loads_file() {
fs::write(&todo_path, initial_content).unwrap();
// Create agent - should load the file during initialization
let agent = create_test_agent_in_dir(&temp_dir).await;
let mut agent = create_test_agent_in_dir(&temp_dir).await;
// Read TODO - should return the pre-existing content
let tool_call = g3_core::ToolCall {
@@ -248,7 +248,7 @@ async fn test_todo_agent_initialization_loads_file() {
#[serial]
async fn test_todo_handles_unicode_content() {
let temp_dir = TempDir::new().unwrap();
let agent = create_test_agent_in_dir(&temp_dir).await;
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let todo_path = get_todo_path(&temp_dir);
// Create TODO with unicode characters
@@ -283,7 +283,7 @@ async fn test_todo_handles_unicode_content() {
#[serial]
async fn test_todo_empty_content_creates_empty_file() {
let temp_dir = TempDir::new().unwrap();
let agent = create_test_agent_in_dir(&temp_dir).await;
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let todo_path = get_todo_path(&temp_dir);
// Write empty TODO
@@ -306,7 +306,7 @@ async fn test_todo_empty_content_creates_empty_file() {
#[serial]
async fn test_todo_whitespace_only_content() {
let temp_dir = TempDir::new().unwrap();
let agent = create_test_agent_in_dir(&temp_dir).await;
let mut agent = create_test_agent_in_dir(&temp_dir).await;
// Write whitespace-only TODO
let tool_call = g3_core::ToolCall {

View File

@@ -0,0 +1,193 @@
use g3_core::{Agent, ToolCall};
use g3_core::ui_writer::UiWriter;
use g3_config::Config;
use std::sync::{Arc, Mutex};
use tempfile::TempDir;
use serial_test::serial;
// Mock UI Writer for testing
#[derive(Clone)]
struct MockUiWriter {
output: Arc<Mutex<Vec<String>>>,
prompt_responses: Arc<Mutex<Vec<bool>>>,
choice_responses: Arc<Mutex<Vec<usize>>>,
}
impl MockUiWriter {
fn new() -> Self {
Self {
output: Arc::new(Mutex::new(Vec::new())),
prompt_responses: Arc::new(Mutex::new(Vec::new())),
choice_responses: Arc::new(Mutex::new(Vec::new())),
}
}
fn set_prompt_response(&self, response: bool) {
self.prompt_responses.lock().unwrap().push(response);
}
fn set_choice_response(&self, response: usize) {
self.choice_responses.lock().unwrap().push(response);
}
fn get_output(&self) -> Vec<String> {
self.output.lock().unwrap().clone()
}
}
impl UiWriter for MockUiWriter {
fn print(&self, message: &str) {
self.output.lock().unwrap().push(message.to_string());
}
fn println(&self, message: &str) {
self.output.lock().unwrap().push(message.to_string());
}
fn print_inline(&self, message: &str) {
self.output.lock().unwrap().push(message.to_string());
}
fn print_system_prompt(&self, _prompt: &str) {}
fn print_context_status(&self, message: &str) {
self.output.lock().unwrap().push(format!("STATUS: {}", message));
}
fn print_context_thinning(&self, _message: &str) {}
fn print_tool_header(&self, _tool_name: &str) {}
fn print_tool_arg(&self, _key: &str, _value: &str) {}
fn print_tool_output_header(&self) {}
fn update_tool_output_line(&self, _line: &str) {}
fn print_tool_output_line(&self, _line: &str) {}
fn print_tool_output_summary(&self, _hidden_count: usize) {}
fn print_tool_timing(&self, _duration_str: &str) {}
fn print_agent_prompt(&self) {}
fn print_agent_response(&self, _content: &str) {}
fn notify_sse_received(&self) {}
fn flush(&self) {}
fn wants_full_output(&self) -> bool { false }
fn prompt_user_yes_no(&self, message: &str) -> bool {
self.output.lock().unwrap().push(format!("PROMPT: {}", message));
self.prompt_responses.lock().unwrap().pop().unwrap_or(true)
}
fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
self.output.lock().unwrap().push(format!("CHOICE: {} Options: {:?}", message, options));
self.choice_responses.lock().unwrap().pop().unwrap_or(0)
}
}
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_matching_sha() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha = "abc123hash";
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = true;
let ui_writer = MockUiWriter::new();
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("📝 TODO list:"));
assert!(!result.contains("⚠️ TODO list is stale"));
}
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_mismatch_sha_ignore() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha_file = "old_sha";
let sha_req = "new_sha";
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = true;
let ui_writer = MockUiWriter::new();
ui_writer.set_choice_response(0); // Ignore
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha_req.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("📝 TODO list:"));
}
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_mismatch_sha_mark_stale() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha_file = "old_sha";
let sha_req = "new_sha";
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = true;
let ui_writer = MockUiWriter::new();
ui_writer.set_choice_response(1); // Mark as Stale
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha_req.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("⚠️ TODO list is stale"));
assert!(result.contains("Please regenerate"));
}
// Note: We cannot easily test "Quit" (index 2) because it calls std::process::exit(0)
// which would kill the test runner. We skip that test case here.
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_disabled() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha_file = "old_sha";
let sha_req = "new_sha";
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = false;
let ui_writer = MockUiWriter::new();
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha_req.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("📝 TODO list:"));
}

View File

@@ -0,0 +1,13 @@
use g3_execution::ensure_coverage_tools_installed;
fn main() -> anyhow::Result<()> {
// Ensure coverage tools are installed
let already_installed = ensure_coverage_tools_installed()?;
if already_installed {
println!("All coverage tools are already installed!");
} else {
println!("Coverage tools have been installed successfully!");
}
Ok(())
}

View File

@@ -330,3 +330,87 @@ impl CodeExecutor {
})
}
}
/// Check if rustup component llvm-tools-preview is installed
pub fn is_llvm_tools_installed() -> Result<bool> {
let output = Command::new("rustup")
.args(&["component", "list", "--installed"])
.output()?;
let installed = String::from_utf8_lossy(&output.stdout)
.lines()
.any(|line| line.trim() == "llvm-tools-preview" || line.starts_with("llvm-tools"));
Ok(installed)
}
/// Check if cargo-llvm-cov is installed
pub fn is_cargo_llvm_cov_installed() -> Result<bool> {
let output = Command::new("cargo")
.args(&["--list"])
.output()?;
let installed = String::from_utf8_lossy(&output.stdout)
.lines()
.any(|line| line.trim().starts_with("llvm-cov"));
Ok(installed)
}
/// Install llvm-tools-preview via rustup
pub fn install_llvm_tools() -> Result<()> {
info!("Installing llvm-tools-preview...");
let output = Command::new("rustup")
.args(&["component", "add", "llvm-tools-preview"])
.output()?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("Failed to install llvm-tools-preview: {}", stderr);
}
info!("✅ llvm-tools-preview installed successfully");
Ok(())
}
/// Install cargo-llvm-cov via cargo install
pub fn install_cargo_llvm_cov() -> Result<()> {
info!("Installing cargo-llvm-cov... (this may take a few minutes)");
let output = Command::new("cargo")
.args(&["install", "cargo-llvm-cov"])
.output()?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("Failed to install cargo-llvm-cov: {}", stderr);
}
info!("✅ cargo-llvm-cov installed successfully");
Ok(())
}
/// Ensure both llvm-tools-preview and cargo-llvm-cov are installed
/// Returns Ok(true) if tools were already installed, Ok(false) if they were installed by this function
pub fn ensure_coverage_tools_installed() -> Result<bool> {
let mut already_installed = true;
// Check and install llvm-tools-preview
if !is_llvm_tools_installed()? {
info!("llvm-tools-preview not found, installing...");
install_llvm_tools()?;
already_installed = false;
} else {
info!("✅ llvm-tools-preview is already installed");
}
// Check and install cargo-llvm-cov
if !is_cargo_llvm_cov_installed()? {
info!("cargo-llvm-cov not found, installing...");
install_cargo_llvm_cov()?;
already_installed = false;
} else {
info!("✅ cargo-llvm-cov is already installed");
}
Ok(already_installed)
}

View File

@@ -21,22 +21,18 @@
//! // Create the provider with your API key
//! let provider = AnthropicProvider::new(
//! "your-api-key".to_string(),
//! Some("claude-3-5-sonnet-20241022".to_string()), // Optional: defaults to claude-3-5-sonnet-20241022
//! Some(4096), // Optional: max tokens
//! Some(0.1), // Optional: temperature
//! Some("claude-3-5-sonnet-20241022".to_string()),
//! Some(4096),
//! Some(0.1),
//! None, // cache_config
//! None, // enable_1m_context
//! )?;
//!
//! // Create a completion request
//! let request = CompletionRequest {
//! messages: vec![
//! Message {
//! role: MessageRole::System,
//! content: "You are a helpful assistant.".to_string(),
//! },
//! Message {
//! role: MessageRole::User,
//! content: "Hello! How are you?".to_string(),
//! },
//! Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
//! Message::new(MessageRole::User, "Hello! How are you?".to_string()),
//! ],
//! max_tokens: Some(1000),
//! temperature: Some(0.7),
@@ -62,15 +58,16 @@
//! async fn main() -> anyhow::Result<()> {
//! let provider = AnthropicProvider::new(
//! "your-api-key".to_string(),
//! None, None, None,
//! None,
//! None,
//! None,
//! None, // cache_config
//! None, // enable_1m_context
//! )?;
//!
//! let request = CompletionRequest {
//! messages: vec![
//! Message {
//! role: MessageRole::User,
//! content: "Write a short story about a robot.".to_string(),
//! },
//! Message::new(MessageRole::User, "Write a short story about a robot.".to_string()),
//! ],
//! max_tokens: Some(1000),
//! temperature: Some(0.7),
@@ -123,6 +120,8 @@ pub struct AnthropicProvider {
model: String,
max_tokens: u32,
temperature: f32,
cache_config: Option<String>,
enable_1m_context: bool,
}
impl AnthropicProvider {
@@ -131,6 +130,8 @@ impl AnthropicProvider {
model: Option<String>,
max_tokens: Option<u32>,
temperature: Option<f32>,
cache_config: Option<String>,
enable_1m_context: Option<bool>,
) -> Result<Self> {
let client = Client::builder()
.timeout(Duration::from_secs(300))
@@ -147,6 +148,8 @@ impl AnthropicProvider {
model,
max_tokens: max_tokens.unwrap_or(4096),
temperature: temperature.unwrap_or(0.1),
cache_config,
enable_1m_context: enable_1m_context.unwrap_or(false),
})
}
@@ -156,9 +159,12 @@ impl AnthropicProvider {
.post(ANTHROPIC_API_URL)
.header("x-api-key", &self.api_key)
.header("anthropic-version", ANTHROPIC_VERSION)
// Anthropic beta 1m context window. Enable if needed. It costs extra, so check first.
// .header("anthropic-beta", "context-1m-2025-08-07")
.header("content-type", "application/json");
if self.enable_1m_context {
builder = builder.header("anthropic-beta", "context-1m-2025-08-07");
}
if streaming {
builder = builder.header("accept", "text/event-stream");
}
@@ -166,6 +172,11 @@ impl AnthropicProvider {
builder
}
fn convert_cache_control(cache_control: &crate::CacheControl) -> crate::CacheControl {
// Anthropic uses the same format, so just clone it
cache_control.clone()
}
fn convert_tools(&self, tools: &[Tool]) -> Vec<AnthropicTool> {
tools
.iter()
@@ -214,6 +225,8 @@ impl AnthropicProvider {
role: "user".to_string(),
content: vec![AnthropicContent::Text {
text: message.content.clone(),
cache_control: message.cache_control.as_ref()
.map(Self::convert_cache_control),
}],
});
}
@@ -222,6 +235,8 @@ impl AnthropicProvider {
role: "assistant".to_string(),
content: vec![AnthropicContent::Text {
text: message.content.clone(),
cache_control: message.cache_control.as_ref()
.map(Self::convert_cache_control),
}],
});
}
@@ -564,7 +579,7 @@ impl LLMProvider for AnthropicProvider {
.content
.iter()
.filter_map(|c| match c {
AnthropicContent::Text { text } => Some(text.as_str()),
AnthropicContent::Text { text, .. } => Some(text.as_str()),
_ => None,
})
.collect::<Vec<_>>()
@@ -658,6 +673,11 @@ impl LLMProvider for AnthropicProvider {
// Claude models support native tool calling
true
}
fn supports_cache_control(&self) -> bool {
// Anthropic supports cache control
true
}
}
// Anthropic API request/response structures
@@ -701,7 +721,11 @@ struct AnthropicMessage {
#[serde(tag = "type")]
enum AnthropicContent {
#[serde(rename = "text")]
Text { text: String },
Text {
text: String,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<crate::CacheControl>,
},
#[serde(rename = "tool_use")]
ToolUse {
id: String,
@@ -771,21 +795,14 @@ mod tests {
None,
None,
None,
None,
None,
).unwrap();
let messages = vec![
Message {
role: MessageRole::System,
content: "You are a helpful assistant.".to_string(),
},
Message {
role: MessageRole::User,
content: "Hello!".to_string(),
},
Message {
role: MessageRole::Assistant,
content: "Hi there!".to_string(),
},
Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
Message::new(MessageRole::User, "Hello!".to_string()),
Message::new(MessageRole::Assistant, "Hi there!".to_string()),
];
let (system, anthropic_messages) = provider.convert_messages(&messages).unwrap();
@@ -803,14 +820,11 @@ mod tests {
Some("claude-3-haiku-20240307".to_string()),
Some(1000),
Some(0.5),
None,
None,
).unwrap();
let messages = vec![
Message {
role: MessageRole::User,
content: "Test message".to_string(),
},
];
let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];
let request_body = provider
.create_request_body(&messages, None, false, 1000, 0.5)
@@ -831,6 +845,8 @@ mod tests {
None,
None,
None,
None,
None,
).unwrap();
let tools = vec![
@@ -859,4 +875,48 @@ mod tests {
assert!(anthropic_tools[0].input_schema.required.is_some());
assert_eq!(anthropic_tools[0].input_schema.required.as_ref().unwrap()[0], "location");
}
#[test]
fn test_cache_control_serialization() {
let provider = AnthropicProvider::new(
"test-key".to_string(),
None,
None,
None,
None,
None,
).unwrap();
// Test message WITHOUT cache_control
let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
let (_, anthropic_messages_without) = provider.convert_messages(&messages_without).unwrap();
let json_without = serde_json::to_string(&anthropic_messages_without).unwrap();
println!("Anthropic JSON without cache_control: {}", json_without);
// Check if cache_control appears in the JSON
if json_without.contains("cache_control") {
println!("WARNING: JSON contains 'cache_control' field when not configured!");
assert!(!json_without.contains("\"cache_control\":null"),
"JSON should not contain 'cache_control: null'");
}
// Test message WITH cache_control
let messages_with = vec![Message::with_cache_control(
MessageRole::User,
"Hello".to_string(),
crate::CacheControl::ephemeral(),
)];
let (_, anthropic_messages_with) = provider.convert_messages(&messages_with).unwrap();
let json_with = serde_json::to_string(&anthropic_messages_with).unwrap();
println!("Anthropic JSON with cache_control: {}", json_with);
assert!(json_with.contains("cache_control"),
"JSON should contain 'cache_control' field when configured");
assert!(json_with.contains("ephemeral"),
"JSON should contain 'ephemeral' type");
// The key assertion: when cache_control is None, it should not appear in JSON
assert!(!json_without.contains("cache_control") || !json_without.contains("null"),
"JSON should not contain 'cache_control' field or null values when not configured");
}
}

View File

@@ -39,10 +39,7 @@
//! // Create a completion request
//! let request = CompletionRequest {
//! messages: vec![
//! Message {
//! role: MessageRole::User,
//! content: "Hello! How are you?".to_string(),
//! },
//! Message::new(MessageRole::User, "Hello! How are you?".to_string()),
//! ],
//! max_tokens: Some(1000),
//! temperature: Some(0.7),
@@ -251,9 +248,12 @@ impl DatabricksProvider {
MessageRole::Assistant => "assistant",
};
// Always use simple string format (Databricks doesn't support cache_control)
let content = serde_json::Value::String(message.content.clone());
databricks_messages.push(DatabricksMessage {
role: role.to_string(),
content: Some(message.content.clone()),
content: Some(content),
tool_calls: None, // Only used in responses, not requests
});
}
@@ -864,8 +864,22 @@ impl LLMProvider for DatabricksProvider {
let content = databricks_response
.choices
.first()
.and_then(|choice| choice.message.content.as_ref())
.cloned()
.and_then(|choice| {
choice.message.content.as_ref().map(|c| {
// Handle both string and array formats
if let Some(s) = c.as_str() {
s.to_string()
} else if let Some(arr) = c.as_array() {
// Extract text from content blocks
arr.iter()
.filter_map(|block| block.get("text").and_then(|t| t.as_str()))
.collect::<Vec<_>>()
.join("")
} else {
String::new()
}
})
})
.unwrap_or_default();
// Check if there are tool calls in the response
@@ -1037,6 +1051,10 @@ impl LLMProvider for DatabricksProvider {
// This includes Claude, Llama, DBRX, and most other models on the platform
true
}
fn supports_cache_control(&self) -> bool {
false
}
}
// Databricks API request/response structures
@@ -1067,7 +1085,8 @@ struct DatabricksFunction {
#[derive(Debug, Serialize, Deserialize)]
struct DatabricksMessage {
role: String,
content: Option<String>, // Make content optional since tool calls might not have content
#[serde(skip_serializing_if = "Option::is_none")]
content: Option<serde_json::Value>, // Can be string or array of content blocks
#[serde(skip_serializing_if = "Option::is_none")]
tool_calls: Option<Vec<DatabricksToolCall>>, // Add tool_calls field for responses
}
@@ -1154,18 +1173,9 @@ mod tests {
.unwrap();
let messages = vec![
Message {
role: MessageRole::System,
content: "You are a helpful assistant.".to_string(),
},
Message {
role: MessageRole::User,
content: "Hello!".to_string(),
},
Message {
role: MessageRole::Assistant,
content: "Hi there!".to_string(),
},
Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
Message::new(MessageRole::User, "Hello!".to_string()),
Message::new(MessageRole::Assistant, "Hi there!".to_string()),
];
let databricks_messages = provider.convert_messages(&messages).unwrap();
@@ -1187,10 +1197,7 @@ mod tests {
)
.unwrap();
let messages = vec![Message {
role: MessageRole::User,
content: "Test message".to_string(),
}];
let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];
let request_body = provider
.create_request_body(&messages, None, false, 1000, 0.5)
@@ -1273,4 +1280,62 @@ mod tests {
assert!(llama_provider.has_native_tool_calling());
assert!(dbrx_provider.has_native_tool_calling());
}
#[test]
fn test_cache_control_serialization() {
let provider = DatabricksProvider::from_token(
"https://test.databricks.com".to_string(),
"test-token".to_string(),
"databricks-claude-sonnet-4".to_string(),
None,
None,
)
.unwrap();
// Test message WITHOUT cache_control
let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
let databricks_messages_without = provider.convert_messages(&messages_without).unwrap();
let json_without = serde_json::to_string(&databricks_messages_without).unwrap();
println!("JSON without cache_control: {}", json_without);
assert!(!json_without.contains("cache_control"),
"JSON should not contain 'cache_control' field when not configured");
// Test message WITH cache_control - should still NOT include it (Databricks doesn't support it)
let messages_with = vec![Message::with_cache_control(
MessageRole::User,
"Hello".to_string(),
crate::CacheControl::ephemeral(),
)];
let databricks_messages_with = provider.convert_messages(&messages_with).unwrap();
let json_with = serde_json::to_string(&databricks_messages_with).unwrap();
println!("JSON with cache_control: {}", json_with);
assert!(!json_with.contains("cache_control"),
"JSON should NOT contain 'cache_control' field - Databricks doesn't support it");
}
#[test]
fn test_databricks_does_not_support_cache_control() {
let claude_provider = DatabricksProvider::from_token(
"https://test.databricks.com".to_string(),
"test-token".to_string(),
"databricks-claude-sonnet-4".to_string(),
None,
None,
)
.unwrap();
let llama_provider = DatabricksProvider::from_token(
"https://test.databricks.com".to_string(),
"test-token".to_string(),
"databricks-meta-llama-3-3-70b-instruct".to_string(),
None,
None,
)
.unwrap();
assert!(!claude_provider.supports_cache_control(), "Databricks should not support cache_control even for Claude models");
assert!(!llama_provider.supports_cache_control(), "Databricks should not support cache_control for Llama models");
}
}

View File

@@ -21,6 +21,11 @@ pub trait LLMProvider: Send + Sync {
fn has_native_tool_calling(&self) -> bool {
false
}
/// Check if the provider supports cache control
fn supports_cache_control(&self) -> bool {
false
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -32,10 +37,40 @@ pub struct CompletionRequest {
pub tools: Option<Vec<Tool>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CacheControl {
#[serde(rename = "type")]
pub cache_type: CacheType,
#[serde(skip_serializing_if = "Option::is_none")]
pub ttl: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum CacheType {
Ephemeral,
}
impl CacheControl {
pub fn ephemeral() -> Self {
Self { cache_type: CacheType::Ephemeral, ttl: None }
}
pub fn five_minute() -> Self {
Self { cache_type: CacheType::Ephemeral, ttl: Some("5m".to_string()) }
}
pub fn one_hour() -> Self {
Self { cache_type: CacheType::Ephemeral, ttl: Some("1h".to_string()) }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Message {
pub role: MessageRole,
pub content: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_control: Option<CacheControl>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -95,6 +130,45 @@ pub use databricks::DatabricksProvider;
pub use embedded::EmbeddedProvider;
pub use openai::OpenAIProvider;
impl Message {
/// Create a new message with optional cache control
pub fn new(role: MessageRole, content: String) -> Self {
Self {
role,
content,
cache_control: None,
}
}
/// Create a new message with cache control
pub fn with_cache_control(role: MessageRole, content: String, cache_control: CacheControl) -> Self {
Self {
role,
content,
cache_control: Some(cache_control),
}
}
/// Create a message with cache control, with provider validation
pub fn with_cache_control_validated(
role: MessageRole,
content: String,
cache_control: CacheControl,
provider: &dyn LLMProvider
) -> Self {
if !provider.supports_cache_control() {
tracing::warn!(
"Cache control requested for provider '{}' which does not support it. \
Cache control is only supported by Anthropic and Anthropic via Databricks.",
provider.name()
);
return Self::new(role, content);
}
Self::with_cache_control(role, content, cache_control)
}
}
/// Provider registry for managing multiple LLM providers
pub struct ProviderRegistry {
providers: HashMap<String, Box<dyn LLMProvider>>,
@@ -144,3 +218,68 @@ impl Default for ProviderRegistry {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_message_serialization_without_cache_control() {
let msg = Message::new(MessageRole::User, "Hello".to_string());
let json = serde_json::to_string(&msg).unwrap();
println!("Message JSON without cache_control: {}", json);
assert!(!json.contains("cache_control"),
"JSON should not contain 'cache_control' field when not configured");
}
#[test]
fn test_message_serialization_with_cache_control() {
let msg = Message::with_cache_control(
MessageRole::User,
"Hello".to_string(),
CacheControl::ephemeral(),
);
let json = serde_json::to_string(&msg).unwrap();
println!("Message JSON with cache_control: {}", json);
assert!(json.contains("cache_control"),
"JSON should contain 'cache_control' field when configured");
assert!(json.contains("ephemeral"),
"JSON should contain 'ephemeral' value");
assert!(json.contains("\"type\":"),
"JSON should contain 'type' field in cache_control");
assert!(!json.contains("null"),
"JSON should not contain null values");
}
#[test]
fn test_cache_control_five_minute_serialization() {
let msg = Message::with_cache_control(
MessageRole::User,
"Hello".to_string(),
CacheControl::five_minute(),
);
let json = serde_json::to_string(&msg).unwrap();
println!("Message JSON with 5-minute cache_control: {}", json);
assert!(json.contains("cache_control"), "JSON should contain 'cache_control' field");
assert!(json.contains("ephemeral"), "JSON should contain 'ephemeral' type");
assert!(json.contains("\"ttl\":\"5m\""), "JSON should contain ttl field with 5m value");
}
#[test]
fn test_cache_control_one_hour_serialization() {
let msg = Message::with_cache_control(
MessageRole::User,
"Hello".to_string(),
CacheControl::one_hour(),
);
let json = serde_json::to_string(&msg).unwrap();
println!("Message JSON with 1-hour cache_control: {}", json);
assert!(json.contains("cache_control"), "JSON should contain 'cache_control' field");
assert!(json.contains("ephemeral"), "JSON should contain 'ephemeral' type");
assert!(json.contains("\"ttl\":\"1h\""), "JSON should contain ttl field with 1h value");
}
}

View File

@@ -0,0 +1,131 @@
//! Regression test for cache_control serialization bug
//!
//! This test verifies that cache_control is NOT serialized in the wrong format.
//! The bug was that it serialized as:
//! - `system.0.cache_control.ephemeral.ttl` (WRONG)
//!
//! It should serialize as:
//! - `"cache_control": {"type": "ephemeral"}` for ephemeral
//! - `"cache_control": {"type": "ephemeral", "ttl": "5m"}` for 5minute
//! - `"cache_control": {"type": "ephemeral", "ttl": "1h"}` for 1hour
use g3_providers::{CacheControl, Message, MessageRole};
#[test]
fn test_no_wrong_serialization_format() {
// Test ephemeral
let msg = Message::with_cache_control(
MessageRole::System,
"Test".to_string(),
CacheControl::ephemeral(),
);
let json = serde_json::to_string(&msg).unwrap();
println!("Ephemeral message JSON: {}", json);
// Should NOT contain the wrong format
assert!(!json.contains("system.0.cache_control"),
"JSON should not contain 'system.0.cache_control' path");
assert!(!json.contains("cache_control.ephemeral"),
"JSON should not contain 'cache_control.ephemeral' path");
// Should contain the correct format
assert!(json.contains(r#""cache_control":{"type":"ephemeral"}"#),
"JSON should contain correct cache_control format");
}
#[test]
fn test_five_minute_no_wrong_format() {
let msg = Message::with_cache_control(
MessageRole::System,
"Test".to_string(),
CacheControl::five_minute(),
);
let json = serde_json::to_string(&msg).unwrap();
println!("5-minute message JSON: {}", json);
// Should NOT contain the wrong format
assert!(!json.contains("system.0.cache_control"),
"JSON should not contain 'system.0.cache_control' path");
assert!(!json.contains("cache_control.ephemeral.ttl"),
"JSON should not contain 'cache_control.ephemeral.ttl' path");
// Should contain the correct format with ttl as a direct field
assert!(json.contains(r#""type":"ephemeral""#),
"JSON should contain type field");
assert!(json.contains(r#""ttl":"5m""#),
"JSON should contain ttl field with value 5m");
}
#[test]
fn test_one_hour_no_wrong_format() {
let msg = Message::with_cache_control(
MessageRole::System,
"Test".to_string(),
CacheControl::one_hour(),
);
let json = serde_json::to_string(&msg).unwrap();
println!("1-hour message JSON: {}", json);
// Should NOT contain the wrong format
assert!(!json.contains("system.0.cache_control"),
"JSON should not contain 'system.0.cache_control' path");
assert!(!json.contains("cache_control.ephemeral.ttl"),
"JSON should not contain 'cache_control.ephemeral.ttl' path");
// Should contain the correct format with ttl as a direct field
assert!(json.contains(r#""type":"ephemeral""#),
"JSON should contain type field");
assert!(json.contains(r#""ttl":"1h""#),
"JSON should contain ttl field with value 1h");
}
#[test]
fn test_cache_control_structure_is_flat() {
// Verify that the cache_control object has a flat structure
// with 'type' and optional 'ttl' at the same level
let cache_control = CacheControl::five_minute();
let json_value = serde_json::to_value(&cache_control).unwrap();
println!("Cache control as JSON value: {}", serde_json::to_string_pretty(&json_value).unwrap());
let obj = json_value.as_object().expect("Should be an object");
// Should have exactly 2 keys at the top level
assert_eq!(obj.len(), 2, "Cache control should have exactly 2 top-level fields");
// Both 'type' and 'ttl' should be at the same level
assert!(obj.contains_key("type"), "Should have 'type' field");
assert!(obj.contains_key("ttl"), "Should have 'ttl' field");
// 'type' should be a string, not an object
assert!(obj["type"].is_string(), "'type' should be a string value");
// 'ttl' should be a string, not nested
assert!(obj["ttl"].is_string(), "'ttl' should be a string value");
}
#[test]
fn test_ephemeral_cache_control_structure() {
let cache_control = CacheControl::ephemeral();
let json_value = serde_json::to_value(&cache_control).unwrap();
println!("Ephemeral cache control as JSON value: {}", serde_json::to_string_pretty(&json_value).unwrap());
let obj = json_value.as_object().expect("Should be an object");
// Should have exactly 1 key (only 'type', no 'ttl')
assert_eq!(obj.len(), 1, "Ephemeral cache control should have exactly 1 top-level field");
// Should have 'type' field
assert!(obj.contains_key("type"), "Should have 'type' field");
// Should NOT have 'ttl' field
assert!(!obj.contains_key("ttl"), "Ephemeral should not have 'ttl' field");
// 'type' should be a string with value "ephemeral"
assert_eq!(obj["type"].as_str().unwrap(), "ephemeral");
}

View File

@@ -0,0 +1,164 @@
//! Integration tests for cache_control feature
//!
//! These tests verify that cache_control is correctly serialized in messages
//! for both Anthropic and Databricks providers.
use g3_providers::{CacheControl, Message, MessageRole};
use serde_json::json;
#[test]
fn test_ephemeral_cache_control_serialization() {
let cache_control = CacheControl::ephemeral();
let json = serde_json::to_value(&cache_control).unwrap();
println!("Ephemeral cache_control JSON: {}", serde_json::to_string(&json).unwrap());
assert_eq!(json, json!({
"type": "ephemeral"
}));
// Verify no ttl field is present
assert!(!json.as_object().unwrap().contains_key("ttl"));
}
#[test]
fn test_five_minute_cache_control_serialization() {
let cache_control = CacheControl::five_minute();
let json = serde_json::to_value(&cache_control).unwrap();
println!("5-minute cache_control JSON: {}", serde_json::to_string(&json).unwrap());
assert_eq!(json, json!({
"type": "ephemeral",
"ttl": "5m"
}));
}
#[test]
fn test_one_hour_cache_control_serialization() {
let cache_control = CacheControl::one_hour();
let json = serde_json::to_value(&cache_control).unwrap();
println!("1-hour cache_control JSON: {}", serde_json::to_string(&json).unwrap());
assert_eq!(json, json!({
"type": "ephemeral",
"ttl": "1h"
}));
}
#[test]
fn test_message_with_ephemeral_cache_control() {
let msg = Message::with_cache_control(
MessageRole::System,
"System prompt".to_string(),
CacheControl::ephemeral(),
);
let json = serde_json::to_value(&msg).unwrap();
println!("Message with ephemeral cache_control: {}", serde_json::to_string(&json).unwrap());
let cache_control = json.get("cache_control").expect("cache_control field should exist");
assert_eq!(cache_control.get("type").unwrap(), "ephemeral");
assert!(!cache_control.as_object().unwrap().contains_key("ttl"));
}
#[test]
fn test_message_with_five_minute_cache_control() {
let msg = Message::with_cache_control(
MessageRole::System,
"System prompt".to_string(),
CacheControl::five_minute(),
);
let json = serde_json::to_value(&msg).unwrap();
println!("Message with 5-minute cache_control: {}", serde_json::to_string(&json).unwrap());
let cache_control = json.get("cache_control").expect("cache_control field should exist");
assert_eq!(cache_control.get("type").unwrap(), "ephemeral");
assert_eq!(cache_control.get("ttl").unwrap(), "5m");
}
#[test]
fn test_message_with_one_hour_cache_control() {
let msg = Message::with_cache_control(
MessageRole::System,
"System prompt".to_string(),
CacheControl::one_hour(),
);
let json = serde_json::to_value(&msg).unwrap();
println!("Message with 1-hour cache_control: {}", serde_json::to_string(&json).unwrap());
let cache_control = json.get("cache_control").expect("cache_control field should exist");
assert_eq!(cache_control.get("type").unwrap(), "ephemeral");
assert_eq!(cache_control.get("ttl").unwrap(), "1h");
}
#[test]
fn test_message_without_cache_control() {
let msg = Message::new(MessageRole::User, "Hello".to_string());
let json = serde_json::to_value(&msg).unwrap();
println!("Message without cache_control: {}", serde_json::to_string(&json).unwrap());
// cache_control field should not be present when not set
assert!(!json.as_object().unwrap().contains_key("cache_control"));
}
#[test]
fn test_cache_control_json_format_ephemeral() {
let cache_control = CacheControl::ephemeral();
let json_str = serde_json::to_string(&cache_control).unwrap();
println!("Ephemeral JSON string: {}", json_str);
// Verify exact JSON format
assert_eq!(json_str, r#"{"type":"ephemeral"}"#);
}
#[test]
fn test_cache_control_json_format_five_minute() {
let cache_control = CacheControl::five_minute();
let json_str = serde_json::to_string(&cache_control).unwrap();
println!("5-minute JSON string: {}", json_str);
// Verify exact JSON format
assert_eq!(json_str, r#"{"type":"ephemeral","ttl":"5m"}"#);
}
#[test]
fn test_cache_control_json_format_one_hour() {
let cache_control = CacheControl::one_hour();
let json_str = serde_json::to_string(&cache_control).unwrap();
println!("1-hour JSON string: {}", json_str);
// Verify exact JSON format
assert_eq!(json_str, r#"{"type":"ephemeral","ttl":"1h"}"#);
}
#[test]
fn test_deserialization_ephemeral() {
let json_str = r#"{"type":"ephemeral"}"#;
let cache_control: CacheControl = serde_json::from_str(json_str).unwrap();
assert_eq!(cache_control.ttl, None);
}
#[test]
fn test_deserialization_five_minute() {
let json_str = r#"{"type":"ephemeral","ttl":"5m"}"#;
let cache_control: CacheControl = serde_json::from_str(json_str).unwrap();
assert_eq!(cache_control.ttl, Some("5m".to_string()));
}
#[test]
fn test_deserialization_one_hour() {
let json_str = r#"{"type":"ephemeral","ttl":"1h"}"#;
let cache_control: CacheControl = serde_json::from_str(json_str).unwrap();
assert_eq!(cache_control.ttl, Some("1h".to_string()));
}

70
tail_tool_logs.sh Executable file
View File

@@ -0,0 +1,70 @@
#!/bin/bash
# Useful tool for tailing tool_calls files. It picks up whatever the latest is and does tail -f
if [[ -n "$G3_WORKSPACE" ]]; then
TARGET_DIR="$G3_WORKSPACE/logs"
else
TARGET_DIR="$HOME/tmp/workspace/logs"
fi
if [[ ! -d "$TARGET_DIR" ]]; then
echo "Error: Directory '$TARGET_DIR' does not exist."
exit 1
fi
cd "$TARGET_DIR" || exit 1
echo "Monitoring directory '$TARGET_DIR' for newest 'tool_calls*' file..."
# Variables to keep track of the current state
CURRENT_PID=""
CURRENT_FILE=""
# Cleanup function: Kill the background tail process when this script is stopped (Ctrl+C)
cleanup() {
echo ""
echo "Stopping monitor..."
if [[ -n "$CURRENT_PID" ]]; then
kill "$CURRENT_PID" 2>/dev/null
fi
exit 0
}
# Register the cleanup function for SIGINT (Ctrl+C) and SIGTERM
trap cleanup SIGINT SIGTERM
while true; do
# Find the newest file matching the pattern using ls -t (sort by time)
# 2>/dev/null suppresses errors if no files are found
NEWEST_FILE=$(ls -t tool_calls* 2>/dev/null | head -n 1)
# If a file was found AND it is different from the one we are currently watching
if [[ -n "$NEWEST_FILE" && "$NEWEST_FILE" != "$CURRENT_FILE" ]]; then
# If we were already watching a file, kill the old tail process
if [[ -n "$CURRENT_PID" ]]; then
kill "$CURRENT_PID" 2>/dev/null
fi
echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
echo ">>> Switched to new file: $NEWEST_FILE"
echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
# Start tail in the background (&)
tail -f "$NEWEST_FILE" &
# Capture the Process ID ($!) of the tail command we just launched
CURRENT_PID=$!
# Update the tracker variable
CURRENT_FILE="$NEWEST_FILE"
fi
# Wait 1 second before checking again
sleep 1
done