Compare commits
5 Commits
dhanji/g3-
...
jochen_cac
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3f21bdc7b2 | ||
|
|
9bffd8b1bf | ||
|
|
bfee8040e9 | ||
|
|
a150ba6a55 | ||
|
|
296bf5a449 |
18
Cargo.lock
generated
18
Cargo.lock
generated
@@ -1345,7 +1345,6 @@ dependencies = [
|
|||||||
"dirs 5.0.1",
|
"dirs 5.0.1",
|
||||||
"g3-config",
|
"g3-config",
|
||||||
"g3-core",
|
"g3-core",
|
||||||
"g3-ensembles",
|
|
||||||
"indicatif",
|
"indicatif",
|
||||||
"ratatui",
|
"ratatui",
|
||||||
"rustyline",
|
"rustyline",
|
||||||
@@ -1463,23 +1462,6 @@ dependencies = [
|
|||||||
"walkdir",
|
"walkdir",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "g3-ensembles"
|
|
||||||
version = "0.1.0"
|
|
||||||
dependencies = [
|
|
||||||
"anyhow",
|
|
||||||
"chrono",
|
|
||||||
"clap",
|
|
||||||
"g3-config",
|
|
||||||
"g3-core",
|
|
||||||
"serde",
|
|
||||||
"serde_json",
|
|
||||||
"tempfile",
|
|
||||||
"tokio",
|
|
||||||
"tracing",
|
|
||||||
"uuid",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "g3-execution"
|
name = "g3-execution"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
|||||||
@@ -6,8 +6,7 @@ members = [
|
|||||||
"crates/g3-config",
|
"crates/g3-config",
|
||||||
"crates/g3-execution",
|
"crates/g3-execution",
|
||||||
"crates/g3-computer-control",
|
"crates/g3-computer-control",
|
||||||
"crates/g3-console",
|
"crates/g3-console"
|
||||||
"crates/g3-ensembles"
|
|
||||||
]
|
]
|
||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|
||||||
|
|||||||
@@ -96,7 +96,6 @@ These commands give you fine-grained control over context management, allowing y
|
|||||||
- Window listing and identification
|
- Window listing and identification
|
||||||
- **Code Search**: Embedded tree-sitter for syntax-aware code search (Rust, Python, JavaScript, TypeScript, Go, Java, C, C++) - see [Code Search Guide](docs/CODE_SEARCH.md)
|
- **Code Search**: Embedded tree-sitter for syntax-aware code search (Rust, Python, JavaScript, TypeScript, Go, Java, C, C++) - see [Code Search Guide](docs/CODE_SEARCH.md)
|
||||||
- **Final Output**: Formatted result presentation
|
- **Final Output**: Formatted result presentation
|
||||||
- **Flock Mode**: Parallel multi-agent development for large projects - see [Flock Mode Guide](docs/FLOCK_MODE.md)
|
|
||||||
|
|
||||||
### Provider Flexibility
|
### Provider Flexibility
|
||||||
- Support for multiple LLM providers through a unified interface
|
- Support for multiple LLM providers through a unified interface
|
||||||
@@ -130,7 +129,6 @@ G3 is designed for:
|
|||||||
- API integration and testing
|
- API integration and testing
|
||||||
- Documentation generation
|
- Documentation generation
|
||||||
- Complex multi-step workflows
|
- Complex multi-step workflows
|
||||||
- Parallel development of modular architectures
|
|
||||||
- Desktop application automation and testing
|
- Desktop application automation and testing
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|||||||
@@ -11,12 +11,23 @@ model = "databricks-claude-sonnet-4"
|
|||||||
max_tokens = 4096
|
max_tokens = 4096
|
||||||
temperature = 0.1
|
temperature = 0.1
|
||||||
use_oauth = true
|
use_oauth = true
|
||||||
|
# cache_config = "ephemeral" # Optional: Enable prompt caching for Claude models
|
||||||
|
# Options: "ephemeral", "5minute", "1hour"
|
||||||
|
# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
|
||||||
|
# The cache control will be automatically applied to:
|
||||||
|
# - The system prompt at the start of each session
|
||||||
|
# - Assistant responses after every 10 tool calls
|
||||||
|
# - 5minute costs $3/mtok, more details below
|
||||||
|
# https://docs.claude.com/en/docs/build-with-claude/prompt-caching#pricing
|
||||||
|
|
||||||
[providers.anthropic]
|
[providers.anthropic]
|
||||||
api_key = "your-anthropic-api-key"
|
api_key = "your-anthropic-api-key"
|
||||||
model = "claude-3-haiku-20240307" # Using a faster model for player
|
model = "claude-3-haiku-20240307" # Using a faster model for player
|
||||||
max_tokens = 4096
|
max_tokens = 4096
|
||||||
temperature = 0.3 # Slightly higher temperature for more creative implementations
|
temperature = 0.3 # Slightly higher temperature for more creative implementations
|
||||||
|
# cache_config = "ephemeral" # Optional: Enable prompt caching
|
||||||
|
# Options: "ephemeral", "5minute", "1hour"
|
||||||
|
# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
|
||||||
|
|
||||||
[agent]
|
[agent]
|
||||||
fallback_default_max_tokens = 8192
|
fallback_default_max_tokens = 8192
|
||||||
|
|||||||
@@ -14,6 +14,15 @@ max_tokens = 4096 # Per-request output limit (how many tokens the model can gen
|
|||||||
# Note: This is different from max_context_length (total conversation history size)
|
# Note: This is different from max_context_length (total conversation history size)
|
||||||
temperature = 0.1
|
temperature = 0.1
|
||||||
use_oauth = true
|
use_oauth = true
|
||||||
|
# cache_config = "ephemeral" # Optional: Enable prompt caching for Claude models on Databricks
|
||||||
|
# Options: "ephemeral", "5minute", "1hour"
|
||||||
|
# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
|
||||||
|
# The cache control will be automatically applied to:
|
||||||
|
# - The system prompt at the start of each session
|
||||||
|
# - Assistant responses after every 10 tool calls
|
||||||
|
# - 5minute costs $3/mtok, more details below
|
||||||
|
# https://docs.claude.com/en/docs/build-with-claude/prompt-caching#pricing
|
||||||
|
|
||||||
|
|
||||||
# Multiple OpenAI-compatible providers can be configured with custom names
|
# Multiple OpenAI-compatible providers can be configured with custom names
|
||||||
# Each provider gets its own section under [providers.openai_compatible.<name>]
|
# Each provider gets its own section under [providers.openai_compatible.<name>]
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ description = "CLI interface for G3 AI coding agent"
|
|||||||
g3-core = { path = "../g3-core" }
|
g3-core = { path = "../g3-core" }
|
||||||
g3-config = { path = "../g3-config" }
|
g3-config = { path = "../g3-config" }
|
||||||
clap = { workspace = true }
|
clap = { workspace = true }
|
||||||
g3-ensembles = { path = "../g3-ensembles" }
|
|
||||||
tokio = { workspace = true }
|
tokio = { workspace = true }
|
||||||
anyhow = { workspace = true }
|
anyhow = { workspace = true }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
|
|||||||
@@ -246,36 +246,11 @@ pub struct Cli {
|
|||||||
/// Enable WebDriver browser automation tools
|
/// Enable WebDriver browser automation tools
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
pub webdriver: bool,
|
pub webdriver: bool,
|
||||||
|
|
||||||
/// Enable flock mode - parallel multi-agent development
|
|
||||||
#[arg(long, requires = "flock_workspace", requires = "segments")]
|
|
||||||
pub project: Option<PathBuf>,
|
|
||||||
|
|
||||||
/// Flock workspace directory (where segment copies will be created)
|
|
||||||
#[arg(long, requires = "project")]
|
|
||||||
pub flock_workspace: Option<PathBuf>,
|
|
||||||
|
|
||||||
/// Number of segments to partition work into (for flock mode)
|
|
||||||
#[arg(long, requires = "project")]
|
|
||||||
pub segments: Option<usize>,
|
|
||||||
|
|
||||||
/// Maximum turns per segment in flock mode (default: 5)
|
|
||||||
#[arg(long, default_value = "5")]
|
|
||||||
pub flock_max_turns: usize,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn run() -> Result<()> {
|
pub async fn run() -> Result<()> {
|
||||||
let cli = Cli::parse();
|
let cli = Cli::parse();
|
||||||
|
|
||||||
// Check if flock mode is enabled
|
|
||||||
if let (Some(project_dir), Some(flock_workspace), Some(num_segments)) =
|
|
||||||
(&cli.project, &cli.flock_workspace, cli.segments) {
|
|
||||||
// Run flock mode
|
|
||||||
return run_flock_mode(project_dir.clone(), flock_workspace.clone(), num_segments, cli.flock_max_turns).await;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise, continue with normal mode
|
|
||||||
|
|
||||||
// Only initialize logging if not in retro mode
|
// Only initialize logging if not in retro mode
|
||||||
if !cli.machine {
|
if !cli.machine {
|
||||||
// Initialize logging with filtering
|
// Initialize logging with filtering
|
||||||
@@ -464,39 +439,6 @@ pub async fn run() -> Result<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Run flock mode - parallel multi-agent development
|
|
||||||
async fn run_flock_mode(
|
|
||||||
project_dir: PathBuf,
|
|
||||||
flock_workspace: PathBuf,
|
|
||||||
num_segments: usize,
|
|
||||||
max_turns: usize,
|
|
||||||
) -> Result<()> {
|
|
||||||
let output = SimpleOutput::new();
|
|
||||||
|
|
||||||
output.print("");
|
|
||||||
output.print("🦅 G3 FLOCK MODE - Parallel Multi-Agent Development");
|
|
||||||
output.print("");
|
|
||||||
output.print(&format!("📁 Project: {}", project_dir.display()));
|
|
||||||
output.print(&format!("🗂️ Workspace: {}", flock_workspace.display()));
|
|
||||||
output.print(&format!("🔢 Segments: {}", num_segments));
|
|
||||||
output.print(&format!("🔄 Max Turns per Segment: {}", max_turns));
|
|
||||||
output.print("");
|
|
||||||
|
|
||||||
// Create flock configuration
|
|
||||||
let config = g3_ensembles::FlockConfig::new(project_dir, flock_workspace, num_segments)?
|
|
||||||
.with_max_turns(max_turns);
|
|
||||||
|
|
||||||
// Create and run flock mode
|
|
||||||
let mut flock = g3_ensembles::FlockMode::new(config)?;
|
|
||||||
|
|
||||||
match flock.run().await {
|
|
||||||
Ok(_) => output.print("\n✅ Flock mode completed successfully"),
|
|
||||||
Err(e) => output.print(&format!("\n❌ Flock mode failed: {}", e)),
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Accumulative autonomous mode: accumulates requirements from user input
|
/// Accumulative autonomous mode: accumulates requirements from user input
|
||||||
/// and runs autonomous mode after each input
|
/// and runs autonomous mode after each input
|
||||||
async fn run_accumulative_mode(
|
async fn run_accumulative_mode(
|
||||||
|
|||||||
@@ -40,6 +40,8 @@ pub struct AnthropicConfig {
|
|||||||
pub model: String,
|
pub model: String,
|
||||||
pub max_tokens: Option<u32>,
|
pub max_tokens: Option<u32>,
|
||||||
pub temperature: Option<f32>,
|
pub temperature: Option<f32>,
|
||||||
|
pub cache_config: Option<String>, // "ephemeral", "5minute", "1hour", or None to disable
|
||||||
|
pub enable_1m_context: Option<bool>, // Enable 1m context window (costs extra)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ pub mod error_handling;
|
|||||||
pub mod project;
|
pub mod project;
|
||||||
pub mod task_result;
|
pub mod task_result;
|
||||||
pub mod ui_writer;
|
pub mod ui_writer;
|
||||||
|
|
||||||
|
use std::process::exit;
|
||||||
pub use task_result::TaskResult;
|
pub use task_result::TaskResult;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -23,7 +25,7 @@ use anyhow::Result;
|
|||||||
use g3_computer_control::WebDriverController;
|
use g3_computer_control::WebDriverController;
|
||||||
use g3_config::Config;
|
use g3_config::Config;
|
||||||
use g3_execution::CodeExecutor;
|
use g3_execution::CodeExecutor;
|
||||||
use g3_providers::{CompletionRequest, Message, MessageRole, ProviderRegistry, Tool};
|
use g3_providers::{CacheControl, CompletionRequest, Message, MessageRole, ProviderRegistry, Tool};
|
||||||
#[allow(unused_imports)]
|
#[allow(unused_imports)]
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@@ -423,18 +425,12 @@ Format this as a detailed but concise summary that can be used to resume the con
|
|||||||
self.used_tokens = 0;
|
self.used_tokens = 0;
|
||||||
|
|
||||||
// Add the summary as a system message
|
// Add the summary as a system message
|
||||||
let summary_message = Message {
|
let summary_message = Message::new(MessageRole::System, format!("Previous conversation summary:\n\n{}", summary));
|
||||||
role: MessageRole::System,
|
|
||||||
content: format!("Previous conversation summary:\n\n{}", summary),
|
|
||||||
};
|
|
||||||
self.add_message(summary_message);
|
self.add_message(summary_message);
|
||||||
|
|
||||||
// Add the latest user message if provided
|
// Add the latest user message if provided
|
||||||
if let Some(user_msg) = latest_user_message {
|
if let Some(user_msg) = latest_user_message {
|
||||||
self.add_message(Message {
|
self.add_message(Message::new(MessageRole::User, user_msg));
|
||||||
role: MessageRole::User,
|
|
||||||
content: user_msg,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let new_chars: usize = self
|
let new_chars: usize = self
|
||||||
@@ -756,6 +752,7 @@ pub struct Agent<W: UiWriter> {
|
|||||||
safaridriver_process: std::sync::Arc<tokio::sync::RwLock<Option<tokio::process::Child>>>,
|
safaridriver_process: std::sync::Arc<tokio::sync::RwLock<Option<tokio::process::Child>>>,
|
||||||
macax_controller:
|
macax_controller:
|
||||||
std::sync::Arc<tokio::sync::RwLock<Option<g3_computer_control::MacAxController>>>,
|
std::sync::Arc<tokio::sync::RwLock<Option<g3_computer_control::MacAxController>>>,
|
||||||
|
tool_call_count: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<W: UiWriter> Agent<W> {
|
impl<W: UiWriter> Agent<W> {
|
||||||
@@ -898,6 +895,8 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
Some(anthropic_config.model.clone()),
|
Some(anthropic_config.model.clone()),
|
||||||
anthropic_config.max_tokens,
|
anthropic_config.max_tokens,
|
||||||
anthropic_config.temperature,
|
anthropic_config.temperature,
|
||||||
|
anthropic_config.cache_config.clone(),
|
||||||
|
anthropic_config.enable_1m_context,
|
||||||
)?;
|
)?;
|
||||||
providers.register(anthropic_provider);
|
providers.register(anthropic_provider);
|
||||||
}
|
}
|
||||||
@@ -944,10 +943,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
|
|
||||||
// If README content is provided, add it as the first system message
|
// If README content is provided, add it as the first system message
|
||||||
if let Some(readme) = readme_content {
|
if let Some(readme) = readme_content {
|
||||||
let readme_message = Message {
|
let readme_message = Message::new(MessageRole::System, readme);
|
||||||
role: MessageRole::System,
|
|
||||||
content: readme,
|
|
||||||
};
|
|
||||||
context_window.add_message(readme_message);
|
context_window.add_message(readme_message);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1003,9 +999,23 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
None
|
None
|
||||||
}))
|
}))
|
||||||
},
|
},
|
||||||
|
tool_call_count: 0,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Convert cache config string to CacheControl enum
|
||||||
|
fn parse_cache_control(cache_config: &str) -> Option<CacheControl> {
|
||||||
|
match cache_config {
|
||||||
|
"ephemeral" => Some(CacheControl::ephemeral()),
|
||||||
|
"5minute" => Some(CacheControl::five_minute()),
|
||||||
|
"1hour" => Some(CacheControl::one_hour()),
|
||||||
|
_ => {
|
||||||
|
warn!("Invalid cache_config value: '{}'. Valid values are: ephemeral, 5minute, 1hour", cache_config);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn get_configured_context_length(config: &Config, providers: &ProviderRegistry) -> Result<u32> {
|
fn get_configured_context_length(config: &Config, providers: &ProviderRegistry) -> Result<u32> {
|
||||||
// First, check if there's a global max_context_length override in agent config
|
// First, check if there's a global max_context_length override in agent config
|
||||||
if let Some(max_context_length) = config.agent.max_context_length {
|
if let Some(max_context_length) = config.agent.max_context_length {
|
||||||
@@ -1185,7 +1195,11 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
// Only add system message if this is the first interaction (empty conversation history)
|
// Only add system message if this is the first interaction (empty conversation history)
|
||||||
if self.context_window.conversation_history.is_empty() {
|
if self.context_window.conversation_history.is_empty() {
|
||||||
let provider = self.providers.get(None)?;
|
let provider = self.providers.get(None)?;
|
||||||
let system_prompt = if provider.has_native_tool_calling() {
|
let provider_has_native_tool_calling = provider.has_native_tool_calling();
|
||||||
|
let provider_name_for_system = provider.name().to_string();
|
||||||
|
drop(provider); // Drop provider reference to avoid borrowing issues
|
||||||
|
|
||||||
|
let system_prompt = if provider_has_native_tool_calling {
|
||||||
// For native tool calling providers, use a more explicit system prompt
|
// For native tool calling providers, use a more explicit system prompt
|
||||||
"You are G3, an AI programming agent of the same skill level as a seasoned engineer at a major technology company. You analyze given tasks and write code to achieve goals.
|
"You are G3, an AI programming agent of the same skill level as a seasoned engineer at a major technology company. You analyze given tasks and write code to achieve goals.
|
||||||
|
|
||||||
@@ -1493,18 +1507,26 @@ If you can complete it with 1-2 tool calls, skip TODO.
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add system message to context window
|
// Add system message to context window
|
||||||
let system_message = Message {
|
let system_message = {
|
||||||
role: MessageRole::System,
|
// Check if we should use cache control for system message
|
||||||
content: system_prompt,
|
if let Some(cache_config) = match provider_name_for_system.as_str() {
|
||||||
|
"anthropic" => self.config.providers.anthropic.as_ref()
|
||||||
|
.and_then(|c| c.cache_config.as_ref())
|
||||||
|
.and_then(|config| Self::parse_cache_control(config)),
|
||||||
|
_ => None,
|
||||||
|
} {
|
||||||
|
let provider = self.providers.get(None)?;
|
||||||
|
Message::with_cache_control_validated(MessageRole::System, system_prompt, cache_config, provider)
|
||||||
|
} else {
|
||||||
|
Message::new(MessageRole::System, system_prompt)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
self.context_window.add_message(system_message);
|
self.context_window.add_message(system_message);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add user message to context window
|
// Add user message to context window
|
||||||
let user_message = Message {
|
let user_message = Message::new(MessageRole::User, format!("Task: {}", description));
|
||||||
role: MessageRole::User,
|
|
||||||
content: format!("Task: {}", description),
|
|
||||||
};
|
|
||||||
self.context_window.add_message(user_message);
|
self.context_window.add_message(user_message);
|
||||||
|
|
||||||
// Use the complete conversation history for the request
|
// Use the complete conversation history for the request
|
||||||
@@ -1512,6 +1534,9 @@ If you can complete it with 1-2 tool calls, skip TODO.
|
|||||||
|
|
||||||
// Check if provider supports native tool calling and add tools if so
|
// Check if provider supports native tool calling and add tools if so
|
||||||
let provider = self.providers.get(None)?;
|
let provider = self.providers.get(None)?;
|
||||||
|
let provider_name = provider.name().to_string();
|
||||||
|
let has_native_tool_calling = provider.has_native_tool_calling();
|
||||||
|
let supports_cache_control = provider.supports_cache_control();
|
||||||
let tools = if provider.has_native_tool_calling() {
|
let tools = if provider.has_native_tool_calling() {
|
||||||
Some(Self::create_tool_definitions(
|
Some(Self::create_tool_definitions(
|
||||||
self.config.webdriver.enabled,
|
self.config.webdriver.enabled,
|
||||||
@@ -1521,9 +1546,10 @@ If you can complete it with 1-2 tool calls, skip TODO.
|
|||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
drop(provider); // Drop the provider reference to avoid borrowing issues
|
||||||
|
|
||||||
// Get max_tokens from provider configuration
|
// Get max_tokens from provider configuration
|
||||||
let max_tokens = match provider.name() {
|
let max_tokens = match provider_name.as_str() {
|
||||||
"databricks" => {
|
"databricks" => {
|
||||||
// Use the model's maximum limit for Databricks to allow large file generation
|
// Use the model's maximum limit for Databricks to allow large file generation
|
||||||
Some(32000)
|
Some(32000)
|
||||||
@@ -1578,9 +1604,23 @@ If you can complete it with 1-2 tool calls, skip TODO.
|
|||||||
// Add assistant response to context window only if not empty
|
// Add assistant response to context window only if not empty
|
||||||
// This prevents the "Skipping empty message" warning when only tools were executed
|
// This prevents the "Skipping empty message" warning when only tools were executed
|
||||||
if !response_content.trim().is_empty() {
|
if !response_content.trim().is_empty() {
|
||||||
let assistant_message = Message {
|
let assistant_message = {
|
||||||
role: MessageRole::Assistant,
|
// Check if we should use cache control (every 10 tool calls)
|
||||||
content: response_content.clone(),
|
if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 {
|
||||||
|
let provider = self.providers.get(None)?;
|
||||||
|
if let Some(cache_config) = match provider.name() {
|
||||||
|
"anthropic" => self.config.providers.anthropic.as_ref()
|
||||||
|
.and_then(|c| c.cache_config.as_ref())
|
||||||
|
.and_then(|config| Self::parse_cache_control(config)),
|
||||||
|
_ => None,
|
||||||
|
} {
|
||||||
|
Message::with_cache_control_validated(MessageRole::Assistant, response_content.clone(), cache_config, provider)
|
||||||
|
} else {
|
||||||
|
Message::new(MessageRole::Assistant, response_content.clone())
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Message::new(MessageRole::Assistant, response_content.clone())
|
||||||
|
}
|
||||||
};
|
};
|
||||||
self.context_window.add_message(assistant_message);
|
self.context_window.add_message(assistant_message);
|
||||||
} else {
|
} else {
|
||||||
@@ -1783,17 +1823,11 @@ If you can complete it with 1-2 tool calls, skip TODO.
|
|||||||
.join("\n\n");
|
.join("\n\n");
|
||||||
|
|
||||||
let summary_messages = vec![
|
let summary_messages = vec![
|
||||||
Message {
|
Message::new(MessageRole::System, "You are a helpful assistant that creates concise summaries.".to_string()),
|
||||||
role: MessageRole::System,
|
Message::new(MessageRole::User, format!(
|
||||||
content: "You are a helpful assistant that creates concise summaries.".to_string(),
|
|
||||||
},
|
|
||||||
Message {
|
|
||||||
role: MessageRole::User,
|
|
||||||
content: format!(
|
|
||||||
"Based on this conversation history, {}\n\nConversation:\n{}",
|
"Based on this conversation history, {}\n\nConversation:\n{}",
|
||||||
summary_prompt, conversation_text
|
summary_prompt, conversation_text
|
||||||
),
|
)),
|
||||||
},
|
|
||||||
];
|
];
|
||||||
|
|
||||||
let provider = self.providers.get(None)?;
|
let provider = self.providers.get(None)?;
|
||||||
@@ -2776,18 +2810,11 @@ If you can complete it with 1-2 tool calls, skip TODO.
|
|||||||
.join("\n\n");
|
.join("\n\n");
|
||||||
|
|
||||||
let summary_messages = vec![
|
let summary_messages = vec![
|
||||||
Message {
|
Message::new(MessageRole::System, "You are a helpful assistant that creates concise summaries.".to_string()),
|
||||||
role: MessageRole::System,
|
Message::new(MessageRole::User, format!(
|
||||||
content: "You are a helpful assistant that creates concise summaries."
|
|
||||||
.to_string(),
|
|
||||||
},
|
|
||||||
Message {
|
|
||||||
role: MessageRole::User,
|
|
||||||
content: format!(
|
|
||||||
"Based on this conversation history, {}\n\nConversation:\n{}",
|
"Based on this conversation history, {}\n\nConversation:\n{}",
|
||||||
summary_prompt, conversation_text
|
summary_prompt, conversation_text
|
||||||
),
|
)),
|
||||||
},
|
|
||||||
];
|
];
|
||||||
|
|
||||||
let provider = self.providers.get(None)?;
|
let provider = self.providers.get(None)?;
|
||||||
@@ -3273,29 +3300,20 @@ If you can complete it with 1-2 tool calls, skip TODO.
|
|||||||
// Add the tool call and result to the context window using RAW unfiltered content
|
// Add the tool call and result to the context window using RAW unfiltered content
|
||||||
// This ensures the log file contains the true raw content including JSON tool calls
|
// This ensures the log file contains the true raw content including JSON tool calls
|
||||||
let tool_message = if !raw_content_for_log.trim().is_empty() {
|
let tool_message = if !raw_content_for_log.trim().is_empty() {
|
||||||
Message {
|
Message::new(MessageRole::Assistant, format!(
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: format!(
|
|
||||||
"{}\n\n{{\"tool\": \"{}\", \"args\": {}}}",
|
"{}\n\n{{\"tool\": \"{}\", \"args\": {}}}",
|
||||||
raw_content_for_log.trim(),
|
raw_content_for_log.trim(),
|
||||||
tool_call.tool,
|
tool_call.tool,
|
||||||
tool_call.args
|
tool_call.args
|
||||||
),
|
))
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// No text content before tool call, just include the tool call
|
// No text content before tool call, just include the tool call
|
||||||
Message {
|
Message::new(MessageRole::Assistant, format!(
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: format!(
|
|
||||||
"{{\"tool\": \"{}\", \"args\": {}}}",
|
"{{\"tool\": \"{}\", \"args\": {}}}",
|
||||||
tool_call.tool, tool_call.args
|
tool_call.tool, tool_call.args
|
||||||
),
|
))
|
||||||
}
|
|
||||||
};
|
|
||||||
let result_message = Message {
|
|
||||||
role: MessageRole::User,
|
|
||||||
content: format!("Tool result: {}", tool_result),
|
|
||||||
};
|
};
|
||||||
|
let result_message = Message::new(MessageRole::User, format!("Tool result: {}", tool_result));
|
||||||
|
|
||||||
self.context_window.add_message(tool_message);
|
self.context_window.add_message(tool_message);
|
||||||
self.context_window.add_message(result_message);
|
self.context_window.add_message(result_message);
|
||||||
@@ -3304,7 +3322,8 @@ If you can complete it with 1-2 tool calls, skip TODO.
|
|||||||
request.messages = self.context_window.conversation_history.clone();
|
request.messages = self.context_window.conversation_history.clone();
|
||||||
|
|
||||||
// Ensure tools are included for native providers in subsequent iterations
|
// Ensure tools are included for native providers in subsequent iterations
|
||||||
if provider.has_native_tool_calling() {
|
let provider_for_tools = self.providers.get(None)?;
|
||||||
|
if provider_for_tools.has_native_tool_calling() {
|
||||||
request.tools = Some(Self::create_tool_definitions(
|
request.tools = Some(Self::create_tool_definitions(
|
||||||
self.config.webdriver.enabled,
|
self.config.webdriver.enabled,
|
||||||
self.config.macax.enabled,
|
self.config.macax.enabled,
|
||||||
@@ -3635,9 +3654,23 @@ If you can complete it with 1-2 tool calls, skip TODO.
|
|||||||
.replace("<</SYS>>", "");
|
.replace("<</SYS>>", "");
|
||||||
|
|
||||||
if !raw_clean.trim().is_empty() {
|
if !raw_clean.trim().is_empty() {
|
||||||
let assistant_message = Message {
|
let assistant_message = {
|
||||||
role: MessageRole::Assistant,
|
// Check if we should use cache control (every 10 tool calls)
|
||||||
content: raw_clean,
|
if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 {
|
||||||
|
let provider = self.providers.get(None)?;
|
||||||
|
if let Some(cache_config) = match provider.name() {
|
||||||
|
"anthropic" => self.config.providers.anthropic.as_ref()
|
||||||
|
.and_then(|c| c.cache_config.as_ref())
|
||||||
|
.and_then(|config| Self::parse_cache_control(config)),
|
||||||
|
_ => None,
|
||||||
|
} {
|
||||||
|
Message::with_cache_control_validated(MessageRole::Assistant, raw_clean, cache_config, provider)
|
||||||
|
} else {
|
||||||
|
Message::new(MessageRole::Assistant, raw_clean)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Message::new(MessageRole::Assistant, raw_clean)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
self.context_window.add_message(assistant_message);
|
self.context_window.add_message(assistant_message);
|
||||||
}
|
}
|
||||||
@@ -3679,7 +3712,10 @@ If you can complete it with 1-2 tool calls, skip TODO.
|
|||||||
Ok(TaskResult::new(final_response, self.context_window.clone()))
|
Ok(TaskResult::new(final_response, self.context_window.clone()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn execute_tool(&self, tool_call: &ToolCall) -> Result<String> {
|
pub async fn execute_tool(&mut self, tool_call: &ToolCall) -> Result<String> {
|
||||||
|
// Increment tool call count
|
||||||
|
self.tool_call_count += 1;
|
||||||
|
|
||||||
debug!("=== EXECUTING TOOL ===");
|
debug!("=== EXECUTING TOOL ===");
|
||||||
debug!("Tool name: {}", tool_call.tool);
|
debug!("Tool name: {}", tool_call.tool);
|
||||||
debug!("Tool args (raw): {:?}", tool_call.args);
|
debug!("Tool args (raw): {:?}", tool_call.args);
|
||||||
|
|||||||
@@ -6,14 +6,10 @@ use std::sync::Arc;
|
|||||||
fn test_task_result_basic_functionality() {
|
fn test_task_result_basic_functionality() {
|
||||||
// Create a context window with some messages
|
// Create a context window with some messages
|
||||||
let mut context = ContextWindow::new(10000);
|
let mut context = ContextWindow::new(10000);
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(MessageRole::User, "Test message 1".to_string())
|
||||||
role: MessageRole::User,
|
);
|
||||||
content: "Test message 1".to_string(),
|
context.add_message(Message::new(MessageRole::Assistant, "Response 1".to_string())
|
||||||
});
|
);
|
||||||
context.add_message(Message {
|
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: "Response 1".to_string(),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Create a TaskResult
|
// Create a TaskResult
|
||||||
let response = "This is the response\n\nFinal output block".to_string();
|
let response = "This is the response\n\nFinal output block".to_string();
|
||||||
@@ -100,10 +96,7 @@ fn test_context_window_preservation() {
|
|||||||
|
|
||||||
// Add some messages
|
// Add some messages
|
||||||
for i in 0..5 {
|
for i in 0..5 {
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(if i % 2 == 0 { MessageRole::User } else { MessageRole::Assistant }, format!("Message {}", i)));
|
||||||
role: if i % 2 == 0 { MessageRole::User } else { MessageRole::Assistant },
|
|
||||||
content: format!("Message {}", i),
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create TaskResult
|
// Create TaskResult
|
||||||
|
|||||||
@@ -46,10 +46,10 @@ fn test_thin_context_basic() {
|
|||||||
// Add some messages to the first third
|
// Add some messages to the first third
|
||||||
for i in 0..9 {
|
for i in 0..9 {
|
||||||
if i % 2 == 0 {
|
if i % 2 == 0 {
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(
|
||||||
role: MessageRole::Assistant,
|
MessageRole::Assistant,
|
||||||
content: format!("Assistant message {}", i),
|
format!("Assistant message {}", i),
|
||||||
});
|
));
|
||||||
} else {
|
} else {
|
||||||
// Add tool results with varying sizes
|
// Add tool results with varying sizes
|
||||||
let content = if i == 1 {
|
let content = if i == 1 {
|
||||||
@@ -63,10 +63,10 @@ fn test_thin_context_basic() {
|
|||||||
format!("Tool result: small result {}", i)
|
format!("Tool result: small result {}", i)
|
||||||
};
|
};
|
||||||
|
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(
|
||||||
role: MessageRole::User,
|
MessageRole::User,
|
||||||
content,
|
content,
|
||||||
});
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -98,10 +98,10 @@ fn test_thin_write_file_tool_calls() {
|
|||||||
let mut context = ContextWindow::new(10000);
|
let mut context = ContextWindow::new(10000);
|
||||||
|
|
||||||
// Add some messages including a write_file tool call with large content
|
// Add some messages including a write_file tool call with large content
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(
|
||||||
role: MessageRole::User,
|
MessageRole::User,
|
||||||
content: "Please create a large file".to_string(),
|
"Please create a large file".to_string(),
|
||||||
});
|
));
|
||||||
|
|
||||||
// Add an assistant message with a write_file tool call containing large content
|
// Add an assistant message with a write_file tool call containing large content
|
||||||
let large_content = "x".repeat(1500);
|
let large_content = "x".repeat(1500);
|
||||||
@@ -109,22 +109,22 @@ fn test_thin_write_file_tool_calls() {
|
|||||||
r#"{{"tool": "write_file", "args": {{"file_path": "test.txt", "content": "{}"}}}}"#,
|
r#"{{"tool": "write_file", "args": {{"file_path": "test.txt", "content": "{}"}}}}"#,
|
||||||
large_content
|
large_content
|
||||||
);
|
);
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(
|
||||||
role: MessageRole::Assistant,
|
MessageRole::Assistant,
|
||||||
content: format!("I'll create that file.\n\n{}", tool_call_json),
|
format!("I'll create that file.\n\n{}", tool_call_json),
|
||||||
});
|
));
|
||||||
|
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(
|
||||||
role: MessageRole::User,
|
MessageRole::User,
|
||||||
content: "Tool result: ✅ Successfully wrote 1500 lines".to_string(),
|
"Tool result: ✅ Successfully wrote 1500 lines".to_string(),
|
||||||
});
|
));
|
||||||
|
|
||||||
// Add more messages to ensure we have enough for "first third" logic
|
// Add more messages to ensure we have enough for "first third" logic
|
||||||
for i in 0..6 {
|
for i in 0..6 {
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(
|
||||||
role: MessageRole::Assistant,
|
MessageRole::Assistant,
|
||||||
content: format!("Response {}", i),
|
format!("Response {}", i),
|
||||||
});
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trigger thinning at 50%
|
// Trigger thinning at 50%
|
||||||
@@ -154,10 +154,10 @@ fn test_thin_str_replace_tool_calls() {
|
|||||||
let mut context = ContextWindow::new(10000);
|
let mut context = ContextWindow::new(10000);
|
||||||
|
|
||||||
// Add some messages including a str_replace tool call with large diff
|
// Add some messages including a str_replace tool call with large diff
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(
|
||||||
role: MessageRole::User,
|
MessageRole::User,
|
||||||
content: "Please update the file".to_string(),
|
"Please update the file".to_string(),
|
||||||
});
|
));
|
||||||
|
|
||||||
// Add an assistant message with a str_replace tool call containing large diff
|
// Add an assistant message with a str_replace tool call containing large diff
|
||||||
let large_diff = format!("--- old\n{}\n+++ new\n{}", "-old line\n".repeat(100), "+new line\n".repeat(100));
|
let large_diff = format!("--- old\n{}\n+++ new\n{}", "-old line\n".repeat(100), "+new line\n".repeat(100));
|
||||||
@@ -165,22 +165,22 @@ fn test_thin_str_replace_tool_calls() {
|
|||||||
r#"{{"tool": "str_replace", "args": {{"file_path": "test.txt", "diff": "{}"}}}}"#,
|
r#"{{"tool": "str_replace", "args": {{"file_path": "test.txt", "diff": "{}"}}}}"#,
|
||||||
large_diff.replace('\n', "\\n")
|
large_diff.replace('\n', "\\n")
|
||||||
);
|
);
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(
|
||||||
role: MessageRole::Assistant,
|
MessageRole::Assistant,
|
||||||
content: format!("I'll update that file.\n\n{}", tool_call_json),
|
format!("I'll update that file.\n\n{}", tool_call_json),
|
||||||
});
|
));
|
||||||
|
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(
|
||||||
role: MessageRole::User,
|
MessageRole::User,
|
||||||
content: "Tool result: ✅ applied unified diff".to_string(),
|
"Tool result: ✅ applied unified diff".to_string(),
|
||||||
});
|
));
|
||||||
|
|
||||||
// Add more messages to ensure we have enough for "first third" logic
|
// Add more messages to ensure we have enough for "first third" logic
|
||||||
for i in 0..6 {
|
for i in 0..6 {
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(
|
||||||
role: MessageRole::Assistant,
|
MessageRole::Assistant,
|
||||||
content: format!("Response {}", i),
|
format!("Response {}", i),
|
||||||
});
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trigger thinning at 50%
|
// Trigger thinning at 50%
|
||||||
@@ -212,10 +212,10 @@ fn test_thin_context_no_large_results() {
|
|||||||
|
|
||||||
// Add only small messages
|
// Add only small messages
|
||||||
for i in 0..9 {
|
for i in 0..9 {
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(
|
||||||
role: MessageRole::User,
|
MessageRole::User,
|
||||||
content: format!("Tool result: small {}", i),
|
format!("Tool result: small {}", i),
|
||||||
});
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
context.used_tokens = 5000;
|
context.used_tokens = 5000;
|
||||||
@@ -244,7 +244,7 @@ fn test_thin_context_only_affects_first_third() {
|
|||||||
MessageRole::Assistant
|
MessageRole::Assistant
|
||||||
};
|
};
|
||||||
|
|
||||||
context.add_message(Message { role, content });
|
context.add_message(Message::new(role, content));
|
||||||
}
|
}
|
||||||
|
|
||||||
context.used_tokens = 5000;
|
context.used_tokens = 5000;
|
||||||
|
|||||||
@@ -8,27 +8,18 @@ fn test_todo_read_results_not_thinned() {
|
|||||||
let mut context = ContextWindow::new(10000);
|
let mut context = ContextWindow::new(10000);
|
||||||
|
|
||||||
// Add a todo_read tool call
|
// Add a todo_read tool call
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "todo_read", "args": {}}"#.to_string()));
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: r#"{"tool": "todo_read", "args": {}}"#.to_string(),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add a large TODO result (> 500 chars)
|
// Add a large TODO result (> 500 chars)
|
||||||
let large_todo_result = format!(
|
let large_todo_result = format!(
|
||||||
"Tool result: 📝 TODO list:\n{}",
|
"Tool result: 📝 TODO list:\n{}",
|
||||||
"- [ ] Task with long description\n".repeat(50)
|
"- [ ] Task with long description\n".repeat(50)
|
||||||
);
|
);
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
|
||||||
role: MessageRole::User,
|
|
||||||
content: large_todo_result.clone(),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add more messages to ensure we have enough for "first third" logic
|
// Add more messages to ensure we have enough for "first third" logic
|
||||||
for i in 0..6 {
|
for i in 0..6 {
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: format!("Response {}", i),
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trigger thinning at 50%
|
// Trigger thinning at 50%
|
||||||
@@ -65,27 +56,18 @@ fn test_todo_write_results_not_thinned() {
|
|||||||
|
|
||||||
// Add a todo_write tool call
|
// Add a todo_write tool call
|
||||||
let large_content = "- [ ] Task\n".repeat(100);
|
let large_content = "- [ ] Task\n".repeat(100);
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(MessageRole::Assistant, format!(r#"{{"tool": "todo_write", "args": {{"content": "{}"}}}}"#, large_content)));
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: format!(r#"{{"tool": "todo_write", "args": {{"content": "{}"}}}}"#, large_content),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add a large TODO write result
|
// Add a large TODO write result
|
||||||
let large_todo_result = format!(
|
let large_todo_result = format!(
|
||||||
"Tool result: ✅ TODO list updated ({} chars) and saved to todo.g3.md",
|
"Tool result: ✅ TODO list updated ({} chars) and saved to todo.g3.md",
|
||||||
large_content.len()
|
large_content.len()
|
||||||
);
|
);
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
|
||||||
role: MessageRole::User,
|
|
||||||
content: large_todo_result.clone(),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add more messages
|
// Add more messages
|
||||||
for i in 0..6 {
|
for i in 0..6 {
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: format!("Response {}", i),
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trigger thinning at 50%
|
// Trigger thinning at 50%
|
||||||
@@ -119,24 +101,15 @@ fn test_non_todo_results_still_thinned() {
|
|||||||
let mut context = ContextWindow::new(10000);
|
let mut context = ContextWindow::new(10000);
|
||||||
|
|
||||||
// Add a non-TODO tool call (e.g., read_file)
|
// Add a non-TODO tool call (e.g., read_file)
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string()));
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add a large read_file result (> 500 chars)
|
// Add a large read_file result (> 500 chars)
|
||||||
let large_result = format!("Tool result: {}", "x".repeat(1500));
|
let large_result = format!("Tool result: {}", "x".repeat(1500));
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(MessageRole::User, large_result));
|
||||||
role: MessageRole::User,
|
|
||||||
content: large_result,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add more messages
|
// Add more messages
|
||||||
for i in 0..6 {
|
for i in 0..6 {
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: format!("Response {}", i),
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trigger thinning at 50%
|
// Trigger thinning at 50%
|
||||||
@@ -172,27 +145,18 @@ fn test_todo_read_with_spaces_in_tool_name() {
|
|||||||
let mut context = ContextWindow::new(10000);
|
let mut context = ContextWindow::new(10000);
|
||||||
|
|
||||||
// Add a todo_read tool call with spaces (JSON formatting variation)
|
// Add a todo_read tool call with spaces (JSON formatting variation)
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "todo_read", "args": {}}"#.to_string()));
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: r#"{"tool": "todo_read", "args": {}}"#.to_string(),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add a large TODO result
|
// Add a large TODO result
|
||||||
let large_todo_result = format!(
|
let large_todo_result = format!(
|
||||||
"Tool result: 📝 TODO list:\n{}",
|
"Tool result: 📝 TODO list:\n{}",
|
||||||
"- [ ] Task\n".repeat(50)
|
"- [ ] Task\n".repeat(50)
|
||||||
);
|
);
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
|
||||||
role: MessageRole::User,
|
|
||||||
content: large_todo_result.clone(),
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add more messages
|
// Add more messages
|
||||||
for i in 0..6 {
|
for i in 0..6 {
|
||||||
context.add_message(Message {
|
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: format!("Response {}", i),
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trigger thinning
|
// Trigger thinning
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ fn get_todo_path(temp_dir: &TempDir) -> PathBuf {
|
|||||||
#[serial]
|
#[serial]
|
||||||
async fn test_todo_write_creates_file() {
|
async fn test_todo_write_creates_file() {
|
||||||
let temp_dir = TempDir::new().unwrap();
|
let temp_dir = TempDir::new().unwrap();
|
||||||
let agent = create_test_agent_in_dir(&temp_dir).await;
|
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||||
let todo_path = get_todo_path(&temp_dir);
|
let todo_path = get_todo_path(&temp_dir);
|
||||||
|
|
||||||
// Initially, todo.g3.md should not exist
|
// Initially, todo.g3.md should not exist
|
||||||
@@ -67,7 +67,7 @@ async fn test_todo_read_from_file() {
|
|||||||
fs::write(&todo_path, test_content).unwrap();
|
fs::write(&todo_path, test_content).unwrap();
|
||||||
|
|
||||||
// Create agent (should load from file)
|
// Create agent (should load from file)
|
||||||
let agent = create_test_agent_in_dir(&temp_dir).await;
|
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||||
|
|
||||||
// Create a tool call to read TODO
|
// Create a tool call to read TODO
|
||||||
let tool_call = g3_core::ToolCall {
|
let tool_call = g3_core::ToolCall {
|
||||||
@@ -88,7 +88,7 @@ async fn test_todo_read_from_file() {
|
|||||||
#[serial]
|
#[serial]
|
||||||
async fn test_todo_read_empty_file() {
|
async fn test_todo_read_empty_file() {
|
||||||
let temp_dir = TempDir::new().unwrap();
|
let temp_dir = TempDir::new().unwrap();
|
||||||
let agent = create_test_agent_in_dir(&temp_dir).await;
|
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||||
|
|
||||||
// Create a tool call to read TODO (file doesn't exist)
|
// Create a tool call to read TODO (file doesn't exist)
|
||||||
let tool_call = g3_core::ToolCall {
|
let tool_call = g3_core::ToolCall {
|
||||||
@@ -111,7 +111,7 @@ async fn test_todo_persistence_across_agents() {
|
|||||||
|
|
||||||
// Agent 1: Write TODO
|
// Agent 1: Write TODO
|
||||||
{
|
{
|
||||||
let agent = create_test_agent_in_dir(&temp_dir).await;
|
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||||
let tool_call = g3_core::ToolCall {
|
let tool_call = g3_core::ToolCall {
|
||||||
tool: "todo_write".to_string(),
|
tool: "todo_write".to_string(),
|
||||||
args: serde_json::json!({
|
args: serde_json::json!({
|
||||||
@@ -126,7 +126,7 @@ async fn test_todo_persistence_across_agents() {
|
|||||||
|
|
||||||
// Agent 2: Read TODO (new agent instance)
|
// Agent 2: Read TODO (new agent instance)
|
||||||
{
|
{
|
||||||
let agent = create_test_agent_in_dir(&temp_dir).await;
|
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||||
let tool_call = g3_core::ToolCall {
|
let tool_call = g3_core::ToolCall {
|
||||||
tool: "todo_read".to_string(),
|
tool: "todo_read".to_string(),
|
||||||
args: serde_json::json!({}),
|
args: serde_json::json!({}),
|
||||||
@@ -143,7 +143,7 @@ async fn test_todo_persistence_across_agents() {
|
|||||||
#[serial]
|
#[serial]
|
||||||
async fn test_todo_update_preserves_file() {
|
async fn test_todo_update_preserves_file() {
|
||||||
let temp_dir = TempDir::new().unwrap();
|
let temp_dir = TempDir::new().unwrap();
|
||||||
let agent = create_test_agent_in_dir(&temp_dir).await;
|
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||||
let todo_path = get_todo_path(&temp_dir);
|
let todo_path = get_todo_path(&temp_dir);
|
||||||
|
|
||||||
// Write initial TODO
|
// Write initial TODO
|
||||||
@@ -173,7 +173,7 @@ async fn test_todo_update_preserves_file() {
|
|||||||
#[serial]
|
#[serial]
|
||||||
async fn test_todo_handles_large_content() {
|
async fn test_todo_handles_large_content() {
|
||||||
let temp_dir = TempDir::new().unwrap();
|
let temp_dir = TempDir::new().unwrap();
|
||||||
let agent = create_test_agent_in_dir(&temp_dir).await;
|
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||||
let todo_path = get_todo_path(&temp_dir);
|
let todo_path = get_todo_path(&temp_dir);
|
||||||
|
|
||||||
// Create a large TODO (but under the 50k limit)
|
// Create a large TODO (but under the 50k limit)
|
||||||
@@ -202,7 +202,7 @@ async fn test_todo_handles_large_content() {
|
|||||||
#[serial]
|
#[serial]
|
||||||
async fn test_todo_respects_size_limit() {
|
async fn test_todo_respects_size_limit() {
|
||||||
let temp_dir = TempDir::new().unwrap();
|
let temp_dir = TempDir::new().unwrap();
|
||||||
let agent = create_test_agent_in_dir(&temp_dir).await;
|
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||||
|
|
||||||
// Create content that exceeds the default 50k limit
|
// Create content that exceeds the default 50k limit
|
||||||
let huge_content = "x".repeat(60_000);
|
let huge_content = "x".repeat(60_000);
|
||||||
@@ -232,7 +232,7 @@ async fn test_todo_agent_initialization_loads_file() {
|
|||||||
fs::write(&todo_path, initial_content).unwrap();
|
fs::write(&todo_path, initial_content).unwrap();
|
||||||
|
|
||||||
// Create agent - should load the file during initialization
|
// Create agent - should load the file during initialization
|
||||||
let agent = create_test_agent_in_dir(&temp_dir).await;
|
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||||
|
|
||||||
// Read TODO - should return the pre-existing content
|
// Read TODO - should return the pre-existing content
|
||||||
let tool_call = g3_core::ToolCall {
|
let tool_call = g3_core::ToolCall {
|
||||||
@@ -248,7 +248,7 @@ async fn test_todo_agent_initialization_loads_file() {
|
|||||||
#[serial]
|
#[serial]
|
||||||
async fn test_todo_handles_unicode_content() {
|
async fn test_todo_handles_unicode_content() {
|
||||||
let temp_dir = TempDir::new().unwrap();
|
let temp_dir = TempDir::new().unwrap();
|
||||||
let agent = create_test_agent_in_dir(&temp_dir).await;
|
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||||
let todo_path = get_todo_path(&temp_dir);
|
let todo_path = get_todo_path(&temp_dir);
|
||||||
|
|
||||||
// Create TODO with unicode characters
|
// Create TODO with unicode characters
|
||||||
@@ -283,7 +283,7 @@ async fn test_todo_handles_unicode_content() {
|
|||||||
#[serial]
|
#[serial]
|
||||||
async fn test_todo_empty_content_creates_empty_file() {
|
async fn test_todo_empty_content_creates_empty_file() {
|
||||||
let temp_dir = TempDir::new().unwrap();
|
let temp_dir = TempDir::new().unwrap();
|
||||||
let agent = create_test_agent_in_dir(&temp_dir).await;
|
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||||
let todo_path = get_todo_path(&temp_dir);
|
let todo_path = get_todo_path(&temp_dir);
|
||||||
|
|
||||||
// Write empty TODO
|
// Write empty TODO
|
||||||
@@ -306,7 +306,7 @@ async fn test_todo_empty_content_creates_empty_file() {
|
|||||||
#[serial]
|
#[serial]
|
||||||
async fn test_todo_whitespace_only_content() {
|
async fn test_todo_whitespace_only_content() {
|
||||||
let temp_dir = TempDir::new().unwrap();
|
let temp_dir = TempDir::new().unwrap();
|
||||||
let agent = create_test_agent_in_dir(&temp_dir).await;
|
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||||
|
|
||||||
// Write whitespace-only TODO
|
// Write whitespace-only TODO
|
||||||
let tool_call = g3_core::ToolCall {
|
let tool_call = g3_core::ToolCall {
|
||||||
|
|||||||
@@ -1,20 +0,0 @@
|
|||||||
[package]
|
|
||||||
name = "g3-ensembles"
|
|
||||||
version = "0.1.0"
|
|
||||||
edition = "2021"
|
|
||||||
description = "Multi-agent ensemble functionality for G3"
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
g3-core = { path = "../g3-core" }
|
|
||||||
g3-config = { path = "../g3-config" }
|
|
||||||
clap = { workspace = true }
|
|
||||||
tokio = { workspace = true }
|
|
||||||
anyhow = { workspace = true }
|
|
||||||
tracing = { workspace = true }
|
|
||||||
serde = { workspace = true }
|
|
||||||
serde_json = { workspace = true }
|
|
||||||
chrono = { version = "0.4", features = ["serde"] }
|
|
||||||
uuid = { workspace = true }
|
|
||||||
|
|
||||||
[dev-dependencies]
|
|
||||||
tempfile = "3.8"
|
|
||||||
@@ -1,422 +0,0 @@
|
|||||||
# G3 Ensembles Testing Documentation
|
|
||||||
|
|
||||||
This document describes the comprehensive test suite for the g3-ensembles crate (Flock Mode).
|
|
||||||
|
|
||||||
## Test Coverage
|
|
||||||
|
|
||||||
### Unit Tests (`src/tests.rs`)
|
|
||||||
|
|
||||||
Unit tests cover the core data structures and logic:
|
|
||||||
|
|
||||||
#### Status Module Tests
|
|
||||||
|
|
||||||
1. **`test_segment_state_display`**
|
|
||||||
- Verifies that `SegmentState` enum displays correctly with emojis
|
|
||||||
- Tests all states: Pending, Running, Completed, Failed, Cancelled
|
|
||||||
|
|
||||||
2. **`test_flock_status_creation`**
|
|
||||||
- Tests creation of `FlockStatus` with correct initial values
|
|
||||||
- Verifies session ID, segment count, and zero metrics
|
|
||||||
|
|
||||||
3. **`test_segment_status_update`**
|
|
||||||
- Tests updating a single segment's status
|
|
||||||
- Verifies metrics are correctly aggregated
|
|
||||||
|
|
||||||
4. **`test_multiple_segment_updates`**
|
|
||||||
- Tests updating multiple segments
|
|
||||||
- Verifies aggregate metrics (tokens, tool calls, errors) are summed correctly
|
|
||||||
|
|
||||||
5. **`test_is_complete`**
|
|
||||||
- Tests the completion detection logic
|
|
||||||
- Verifies that flock is only complete when all segments are in terminal states
|
|
||||||
- Tests various scenarios: no segments, partial completion, full completion
|
|
||||||
|
|
||||||
6. **`test_count_by_state`**
|
|
||||||
- Tests counting segments by their state
|
|
||||||
- Verifies correct counts for each state type
|
|
||||||
|
|
||||||
7. **`test_status_serialization`**
|
|
||||||
- Tests JSON serialization and deserialization
|
|
||||||
- Verifies round-trip conversion preserves all data
|
|
||||||
|
|
||||||
8. **`test_report_generation`**
|
|
||||||
- Tests the comprehensive report generation
|
|
||||||
- Verifies all expected sections are present
|
|
||||||
- Checks that metrics are correctly displayed
|
|
||||||
|
|
||||||
**Run unit tests:**
|
|
||||||
```bash
|
|
||||||
cargo test -p g3-ensembles --lib
|
|
||||||
```
|
|
||||||
|
|
||||||
### Integration Tests (`tests/integration_tests.rs`)
|
|
||||||
|
|
||||||
Integration tests verify end-to-end functionality with real file system and git operations:
|
|
||||||
|
|
||||||
#### Configuration Tests
|
|
||||||
|
|
||||||
1. **`test_flock_config_validation`**
|
|
||||||
- Tests validation of project directory requirements
|
|
||||||
- Verifies error messages for:
|
|
||||||
- Non-existent directory
|
|
||||||
- Non-git repository
|
|
||||||
- Missing flock-requirements.md
|
|
||||||
- Verifies successful creation with valid inputs
|
|
||||||
|
|
||||||
2. **`test_flock_config_builder`**
|
|
||||||
- Tests the builder pattern for `FlockConfig`
|
|
||||||
- Verifies `with_max_turns()` and `with_g3_binary()` methods
|
|
||||||
|
|
||||||
3. **`test_workspace_creation`**
|
|
||||||
- Tests creation of `FlockMode` instance
|
|
||||||
- Verifies project structure is valid
|
|
||||||
|
|
||||||
#### Git Operations Tests
|
|
||||||
|
|
||||||
4. **`test_git_clone_functionality`**
|
|
||||||
- Tests git cloning of project repository
|
|
||||||
- Verifies cloned repository structure:
|
|
||||||
- `.git` directory exists
|
|
||||||
- All files are present
|
|
||||||
- Git history is preserved
|
|
||||||
|
|
||||||
5. **`test_multiple_segment_clones`**
|
|
||||||
- Tests cloning multiple segments (2 segments)
|
|
||||||
- Verifies each segment is independent
|
|
||||||
- Tests that modifications in one segment don't affect others
|
|
||||||
|
|
||||||
6. **`test_git_repo_independence`**
|
|
||||||
- Comprehensive test of segment independence
|
|
||||||
- Creates commits in different segments
|
|
||||||
- Verifies git histories diverge correctly
|
|
||||||
- Ensures files in one segment don't appear in others
|
|
||||||
|
|
||||||
#### Segment Management Tests
|
|
||||||
|
|
||||||
7. **`test_segment_requirements_creation`**
|
|
||||||
- Tests creation of `segment-requirements.md` files
|
|
||||||
- Verifies content is written correctly
|
|
||||||
|
|
||||||
8. **`test_requirements_file_content`**
|
|
||||||
- Tests the structure of flock-requirements.md
|
|
||||||
- Verifies content contains expected sections
|
|
||||||
|
|
||||||
#### Status File Tests
|
|
||||||
|
|
||||||
9. **`test_status_file_operations`**
|
|
||||||
- Tests saving and loading `flock-status.json`
|
|
||||||
- Verifies JSON serialization to file
|
|
||||||
- Tests deserialization from file
|
|
||||||
|
|
||||||
#### JSON Processing Tests
|
|
||||||
|
|
||||||
10. **`test_json_extraction`**
|
|
||||||
- Tests extraction of JSON arrays from text output
|
|
||||||
- Verifies handling of various formats:
|
|
||||||
- Plain JSON
|
|
||||||
- JSON in markdown code blocks
|
|
||||||
- JSON with surrounding text
|
|
||||||
- Invalid input (no JSON)
|
|
||||||
|
|
||||||
11. **`test_partition_json_parsing`**
|
|
||||||
- Tests parsing of partition JSON structure
|
|
||||||
- Verifies module names, requirements, and dependencies are extracted correctly
|
|
||||||
|
|
||||||
**Run integration tests:**
|
|
||||||
```bash
|
|
||||||
cargo test -p g3-ensembles --test integration_tests
|
|
||||||
```
|
|
||||||
|
|
||||||
### End-to-End Test Script (`scripts/test-flock-mode.sh`)
|
|
||||||
|
|
||||||
A comprehensive bash script that tests the complete flock mode workflow:
|
|
||||||
|
|
||||||
#### Test Scenarios
|
|
||||||
|
|
||||||
1. **Project Creation**
|
|
||||||
- Creates a temporary test project
|
|
||||||
- Initializes git repository
|
|
||||||
- Creates flock-requirements.md with realistic content
|
|
||||||
- Makes initial commit
|
|
||||||
|
|
||||||
2. **Project Structure Validation**
|
|
||||||
- Verifies `.git` directory exists
|
|
||||||
- Verifies `flock-requirements.md` exists
|
|
||||||
|
|
||||||
3. **Git Operations**
|
|
||||||
- Tests cloning project to segment directories
|
|
||||||
- Verifies cloned repositories are valid
|
|
||||||
- Tests git log to ensure history is preserved
|
|
||||||
|
|
||||||
4. **Segment Independence**
|
|
||||||
- Creates two segments
|
|
||||||
- Modifies one segment
|
|
||||||
- Verifies other segment is unaffected
|
|
||||||
|
|
||||||
5. **Segment Requirements**
|
|
||||||
- Creates `segment-requirements.md` in segments
|
|
||||||
- Verifies content is written correctly
|
|
||||||
|
|
||||||
6. **Status File Operations**
|
|
||||||
- Creates `flock-status.json`
|
|
||||||
- Validates JSON structure (if `jq` is available)
|
|
||||||
|
|
||||||
**Run end-to-end test:**
|
|
||||||
```bash
|
|
||||||
./scripts/test-flock-mode.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
## Test Results
|
|
||||||
|
|
||||||
### Current Status
|
|
||||||
|
|
||||||
✅ **All tests passing**
|
|
||||||
|
|
||||||
- **Unit tests**: 8/8 passed
|
|
||||||
- **Integration tests**: 11/11 passed
|
|
||||||
- **End-to-end test**: All scenarios passed
|
|
||||||
|
|
||||||
### Test Execution Time
|
|
||||||
|
|
||||||
- Unit tests: ~0.01s
|
|
||||||
- Integration tests: ~0.35s (includes git operations)
|
|
||||||
- End-to-end test: ~1-2s (includes cleanup)
|
|
||||||
|
|
||||||
## Running All Tests
|
|
||||||
|
|
||||||
### Run all tests for g3-ensembles:
|
|
||||||
```bash
|
|
||||||
cargo test -p g3-ensembles
|
|
||||||
```
|
|
||||||
|
|
||||||
### Run with verbose output:
|
|
||||||
```bash
|
|
||||||
cargo test -p g3-ensembles -- --nocapture
|
|
||||||
```
|
|
||||||
|
|
||||||
### Run specific test:
|
|
||||||
```bash
|
|
||||||
cargo test -p g3-ensembles test_git_clone_functionality
|
|
||||||
```
|
|
||||||
|
|
||||||
### Run tests with coverage (requires cargo-tarpaulin):
|
|
||||||
```bash
|
|
||||||
cargo tarpaulin -p g3-ensembles
|
|
||||||
```
|
|
||||||
|
|
||||||
## Test Helpers
|
|
||||||
|
|
||||||
### `create_test_project(name: &str) -> TempDir`
|
|
||||||
|
|
||||||
Helper function in integration tests that creates a complete test project:
|
|
||||||
- Initializes git repository
|
|
||||||
- Configures git user
|
|
||||||
- Creates flock-requirements.md with two modules
|
|
||||||
- Creates README.md
|
|
||||||
- Makes initial commit
|
|
||||||
- Returns `TempDir` that auto-cleans on drop
|
|
||||||
|
|
||||||
**Usage:**
|
|
||||||
```rust
|
|
||||||
let project_dir = create_test_project("my-test");
|
|
||||||
// Use project_dir.path() to access the directory
|
|
||||||
// Automatically cleaned up when project_dir goes out of scope
|
|
||||||
```
|
|
||||||
|
|
||||||
### `extract_json_array(output: &str) -> Option<String>`
|
|
||||||
|
|
||||||
Helper function that extracts JSON arrays from text output:
|
|
||||||
- Finds first `[` and last `]`
|
|
||||||
- Returns content between them
|
|
||||||
- Returns `None` if no valid JSON array found
|
|
||||||
|
|
||||||
## Test Data
|
|
||||||
|
|
||||||
### Sample Requirements
|
|
||||||
|
|
||||||
The test suite uses realistic requirements for a calculator project:
|
|
||||||
|
|
||||||
**Module A: Core Library**
|
|
||||||
- Arithmetic operations (add, sub, mul, div)
|
|
||||||
- Error handling for division by zero
|
|
||||||
- Unit tests
|
|
||||||
- Documentation
|
|
||||||
|
|
||||||
**Module B: CLI Application**
|
|
||||||
- Command-line interface using clap
|
|
||||||
- Subcommands for each operation
|
|
||||||
- User-friendly output
|
|
||||||
- Error handling
|
|
||||||
|
|
||||||
This structure tests the partitioning logic with:
|
|
||||||
- Clear module boundaries
|
|
||||||
- Dependency relationship (CLI depends on Core)
|
|
||||||
- Realistic implementation requirements
|
|
||||||
|
|
||||||
## Continuous Integration
|
|
||||||
|
|
||||||
To integrate these tests into CI/CD:
|
|
||||||
|
|
||||||
### GitHub Actions Example
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
name: Test G3 Ensembles
|
|
||||||
|
|
||||||
on: [push, pull_request]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v2
|
|
||||||
- uses: actions-rs/toolchain@v1
|
|
||||||
with:
|
|
||||||
toolchain: stable
|
|
||||||
- name: Run unit tests
|
|
||||||
run: cargo test -p g3-ensembles --lib
|
|
||||||
- name: Run integration tests
|
|
||||||
run: cargo test -p g3-ensembles --test integration_tests
|
|
||||||
- name: Run end-to-end test
|
|
||||||
run: ./scripts/test-flock-mode.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
## Test Coverage Goals
|
|
||||||
|
|
||||||
### Current Coverage
|
|
||||||
|
|
||||||
- ✅ Status data structures: 100%
|
|
||||||
- ✅ Configuration validation: 100%
|
|
||||||
- ✅ Git operations: 100%
|
|
||||||
- ✅ Segment independence: 100%
|
|
||||||
- ✅ JSON processing: 100%
|
|
||||||
- ⚠️ Full flock execution: Requires LLM access (tested manually)
|
|
||||||
|
|
||||||
### Future Test Additions
|
|
||||||
|
|
||||||
1. **Mock LLM Tests**
|
|
||||||
- Mock the partitioning agent response
|
|
||||||
- Test full flock workflow without real LLM calls
|
|
||||||
|
|
||||||
2. **Performance Tests**
|
|
||||||
- Test with large numbers of segments (10+)
|
|
||||||
- Measure memory usage
|
|
||||||
- Test concurrent segment execution
|
|
||||||
|
|
||||||
3. **Error Handling Tests**
|
|
||||||
- Test behavior when git operations fail
|
|
||||||
- Test behavior when segments fail
|
|
||||||
- Test recovery scenarios
|
|
||||||
|
|
||||||
4. **Edge Cases**
|
|
||||||
- Empty requirements file
|
|
||||||
- Single segment (degenerate case)
|
|
||||||
- Very large requirements file
|
|
||||||
- Binary files in project
|
|
||||||
|
|
||||||
## Debugging Tests
|
|
||||||
|
|
||||||
### Enable debug logging:
|
|
||||||
```bash
|
|
||||||
RUST_LOG=debug cargo test -p g3-ensembles -- --nocapture
|
|
||||||
```
|
|
||||||
|
|
||||||
### Keep test artifacts:
|
|
||||||
```bash
|
|
||||||
# Modify test to not cleanup
|
|
||||||
# Or inspect TEST_DIR before cleanup in end-to-end test
|
|
||||||
export TEST_DIR=/tmp/my-test
|
|
||||||
./scripts/test-flock-mode.sh
|
|
||||||
ls -la $TEST_DIR
|
|
||||||
```
|
|
||||||
|
|
||||||
### Run single test with backtrace:
|
|
||||||
```bash
|
|
||||||
RUST_BACKTRACE=1 cargo test -p g3-ensembles test_git_clone_functionality -- --nocapture
|
|
||||||
```
|
|
||||||
|
|
||||||
## Contributing Tests
|
|
||||||
|
|
||||||
When adding new features to g3-ensembles:
|
|
||||||
|
|
||||||
1. **Add unit tests** for new data structures and logic
|
|
||||||
2. **Add integration tests** for new file/git operations
|
|
||||||
3. **Update end-to-end test** if workflow changes
|
|
||||||
4. **Document tests** in this file
|
|
||||||
5. **Ensure all tests pass** before submitting PR
|
|
||||||
|
|
||||||
### Test Naming Convention
|
|
||||||
|
|
||||||
- Unit tests: `test_<functionality>`
|
|
||||||
- Integration tests: `test_<feature>_<scenario>`
|
|
||||||
- Use descriptive names that explain what is being tested
|
|
||||||
|
|
||||||
### Test Structure
|
|
||||||
|
|
||||||
```rust
|
|
||||||
#[test]
|
|
||||||
fn test_feature_name() {
|
|
||||||
// Arrange: Set up test data
|
|
||||||
let data = create_test_data();
|
|
||||||
|
|
||||||
// Act: Perform the operation
|
|
||||||
let result = perform_operation(data);
|
|
||||||
|
|
||||||
// Assert: Verify the result
|
|
||||||
assert_eq!(result, expected_value);
|
|
||||||
assert!(result.is_ok());
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Troubleshooting
|
|
||||||
|
|
||||||
### Tests fail with "git not found"
|
|
||||||
|
|
||||||
**Solution**: Install git:
|
|
||||||
```bash
|
|
||||||
# macOS
|
|
||||||
brew install git
|
|
||||||
|
|
||||||
# Ubuntu/Debian
|
|
||||||
sudo apt-get install git
|
|
||||||
|
|
||||||
# Windows
|
|
||||||
choco install git
|
|
||||||
```
|
|
||||||
|
|
||||||
### Tests fail with permission errors
|
|
||||||
|
|
||||||
**Solution**: Ensure test directories are writable:
|
|
||||||
```bash
|
|
||||||
chmod -R u+w /tmp
|
|
||||||
```
|
|
||||||
|
|
||||||
### Integration tests are slow
|
|
||||||
|
|
||||||
**Cause**: Git operations and file I/O take time
|
|
||||||
|
|
||||||
**Solution**: Run only unit tests for quick feedback:
|
|
||||||
```bash
|
|
||||||
cargo test -p g3-ensembles --lib
|
|
||||||
```
|
|
||||||
|
|
||||||
### Test artifacts not cleaned up
|
|
||||||
|
|
||||||
**Cause**: Test panicked before cleanup
|
|
||||||
|
|
||||||
**Solution**: Manually clean temp directories:
|
|
||||||
```bash
|
|
||||||
rm -rf /tmp/tmp.*
|
|
||||||
```
|
|
||||||
|
|
||||||
## Summary
|
|
||||||
|
|
||||||
The g3-ensembles test suite provides comprehensive coverage of:
|
|
||||||
- ✅ Core data structures and logic
|
|
||||||
- ✅ Configuration validation
|
|
||||||
- ✅ Git repository operations
|
|
||||||
- ✅ Segment independence
|
|
||||||
- ✅ Status tracking and reporting
|
|
||||||
- ✅ JSON processing
|
|
||||||
- ✅ End-to-end workflow
|
|
||||||
|
|
||||||
All tests are automated, fast, and reliable. The test suite ensures that flock mode works correctly across different scenarios and edge cases.
|
|
||||||
@@ -1,647 +0,0 @@
|
|||||||
//! Flock mode implementation - parallel multi-agent development
|
|
||||||
|
|
||||||
use anyhow::{Context, Result};
|
|
||||||
use chrono::Utc;
|
|
||||||
use g3_config::Config;
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::process::Stdio;
|
|
||||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
|
||||||
use tokio::process::Command;
|
|
||||||
use tracing::{debug, error, info, warn};
|
|
||||||
use uuid::Uuid;
|
|
||||||
|
|
||||||
use crate::status::{FlockStatus, SegmentState, SegmentStatus};
|
|
||||||
|
|
||||||
/// Configuration for flock mode
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct FlockConfig {
|
|
||||||
/// Project directory (must be a git repo with flock-requirements.md)
|
|
||||||
pub project_dir: PathBuf,
|
|
||||||
|
|
||||||
/// Flock workspace directory where segments will be created
|
|
||||||
pub flock_workspace: PathBuf,
|
|
||||||
|
|
||||||
/// Number of segments to partition work into
|
|
||||||
pub num_segments: usize,
|
|
||||||
|
|
||||||
/// Maximum turns per segment (for autonomous mode)
|
|
||||||
pub max_turns: usize,
|
|
||||||
|
|
||||||
/// G3 configuration to use for agents
|
|
||||||
pub g3_config: Config,
|
|
||||||
|
|
||||||
/// Path to g3 binary (defaults to current executable)
|
|
||||||
pub g3_binary: Option<PathBuf>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FlockConfig {
|
|
||||||
/// Create a new flock configuration
|
|
||||||
pub fn new(
|
|
||||||
project_dir: PathBuf,
|
|
||||||
flock_workspace: PathBuf,
|
|
||||||
num_segments: usize,
|
|
||||||
) -> Result<Self> {
|
|
||||||
// Validate project directory
|
|
||||||
if !project_dir.exists() {
|
|
||||||
anyhow::bail!("Project directory does not exist: {}", project_dir.display());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if it's a git repo
|
|
||||||
if !project_dir.join(".git").exists() {
|
|
||||||
anyhow::bail!("Project directory must be a git repository: {}", project_dir.display());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for flock-requirements.md
|
|
||||||
let requirements_path = project_dir.join("flock-requirements.md");
|
|
||||||
if !requirements_path.exists() {
|
|
||||||
anyhow::bail!(
|
|
||||||
"Project directory must contain flock-requirements.md: {}",
|
|
||||||
project_dir.display()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load default config
|
|
||||||
let g3_config = Config::load(None)?;
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
project_dir,
|
|
||||||
flock_workspace,
|
|
||||||
num_segments,
|
|
||||||
max_turns: 5, // Default
|
|
||||||
g3_config,
|
|
||||||
g3_binary: None,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set maximum turns per segment
|
|
||||||
pub fn with_max_turns(mut self, max_turns: usize) -> Self {
|
|
||||||
self.max_turns = max_turns;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set custom g3 binary path
|
|
||||||
pub fn with_g3_binary(mut self, binary: PathBuf) -> Self {
|
|
||||||
self.g3_binary = Some(binary);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set custom g3 config
|
|
||||||
pub fn with_config(mut self, config: Config) -> Self {
|
|
||||||
self.g3_config = config;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Flock mode orchestrator
|
|
||||||
pub struct FlockMode {
|
|
||||||
config: FlockConfig,
|
|
||||||
status: FlockStatus,
|
|
||||||
session_id: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FlockMode {
|
|
||||||
/// Create a new flock mode instance
|
|
||||||
pub fn new(config: FlockConfig) -> Result<Self> {
|
|
||||||
let session_id = Uuid::new_v4().to_string();
|
|
||||||
|
|
||||||
let status = FlockStatus::new(
|
|
||||||
session_id.clone(),
|
|
||||||
config.project_dir.clone(),
|
|
||||||
config.flock_workspace.clone(),
|
|
||||||
config.num_segments,
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
config,
|
|
||||||
status,
|
|
||||||
session_id,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Run flock mode
|
|
||||||
pub async fn run(&mut self) -> Result<()> {
|
|
||||||
info!("Starting flock mode with {} segments", self.config.num_segments);
|
|
||||||
|
|
||||||
// Step 1: Partition requirements
|
|
||||||
println!("\n🧠 Step 1: Partitioning requirements into {} segments...", self.config.num_segments);
|
|
||||||
let partitions = self.partition_requirements().await?;
|
|
||||||
|
|
||||||
// Step 2: Create segment workspaces
|
|
||||||
println!("\n📁 Step 2: Creating segment workspaces...");
|
|
||||||
self.create_segment_workspaces(&partitions).await?;
|
|
||||||
|
|
||||||
// Step 3: Run segments in parallel
|
|
||||||
println!("\n🚀 Step 3: Running {} segments in parallel...", self.config.num_segments);
|
|
||||||
self.run_segments_parallel().await?;
|
|
||||||
|
|
||||||
// Step 4: Generate final report
|
|
||||||
println!("\n📊 Step 4: Generating final report...");
|
|
||||||
self.status.completed_at = Some(Utc::now());
|
|
||||||
self.save_status()?;
|
|
||||||
|
|
||||||
let report = self.status.generate_report();
|
|
||||||
println!("{}", report);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Partition requirements using an AI agent
|
|
||||||
async fn partition_requirements(&mut self) -> Result<Vec<String>> {
|
|
||||||
let requirements_path = self.config.project_dir.join("flock-requirements.md");
|
|
||||||
let requirements_content = std::fs::read_to_string(&requirements_path)
|
|
||||||
.context("Failed to read flock-requirements.md")?;
|
|
||||||
|
|
||||||
// Create a temporary workspace for the partitioning agent
|
|
||||||
let partition_workspace = self.config.flock_workspace.join("_partition");
|
|
||||||
std::fs::create_dir_all(&partition_workspace)?;
|
|
||||||
|
|
||||||
// Create the partitioning prompt
|
|
||||||
let partition_prompt = format!(
|
|
||||||
"You are a software architect tasked with partitioning project requirements into {} logical, \
|
|
||||||
largely non-overlapping modules that can grow into separate architectural components \
|
|
||||||
(e.g., crates, services, or packages).\n\n\
|
|
||||||
REQUIREMENTS:\n{}\n\n\
|
|
||||||
INSTRUCTIONS:\n\
|
|
||||||
1. Analyze the requirements carefully\n\
|
|
||||||
2. Identify {} distinct architectural modules that:\n\
|
|
||||||
- Have minimal overlap and dependencies\n\
|
|
||||||
- Can be developed largely independently\n\
|
|
||||||
- Represent logical architectural boundaries\n\
|
|
||||||
- Could eventually become separate crates or services\n\
|
|
||||||
3. For each module, provide:\n\
|
|
||||||
- A clear module name\n\
|
|
||||||
- The specific requirements that belong to this module\n\
|
|
||||||
- Any dependencies on other modules\n\n\
|
|
||||||
4. Use the final_output tool to provide your partitioning as a JSON array of objects, where each object has:\n\
|
|
||||||
- \"module_name\": string\n\
|
|
||||||
- \"requirements\": string (the requirements text for this module)\n\
|
|
||||||
- \"dependencies\": array of strings (names of other modules this depends on)\n\n\
|
|
||||||
Example format:\n\
|
|
||||||
```json\n\
|
|
||||||
[\n\
|
|
||||||
{{\n\
|
|
||||||
\"module_name\": \"core-engine\",\n\
|
|
||||||
\"requirements\": \"Implement the core processing engine...\",\n\
|
|
||||||
\"dependencies\": []\n\
|
|
||||||
}},\n\
|
|
||||||
{{\n\
|
|
||||||
\"module_name\": \"api-server\",\n\
|
|
||||||
\"requirements\": \"Create REST API endpoints...\",\n\
|
|
||||||
\"dependencies\": [\"core-engine\"]\n\
|
|
||||||
}}\n\
|
|
||||||
]\n\
|
|
||||||
```\n\n\
|
|
||||||
Be thoughtful and strategic in your partitioning. The goal is to enable parallel development.",
|
|
||||||
self.config.num_segments,
|
|
||||||
requirements_content,
|
|
||||||
self.config.num_segments
|
|
||||||
);
|
|
||||||
|
|
||||||
// Get g3 binary path
|
|
||||||
let g3_binary = self.get_g3_binary()?;
|
|
||||||
|
|
||||||
// Run g3 in single-shot mode to partition requirements
|
|
||||||
println!(" Analyzing requirements and creating partitions...");
|
|
||||||
let output = Command::new(&g3_binary)
|
|
||||||
.arg("--workspace")
|
|
||||||
.arg(&partition_workspace)
|
|
||||||
.arg("--quiet") // Disable logging for partitioning agent
|
|
||||||
.arg(&partition_prompt)
|
|
||||||
.output()
|
|
||||||
.await
|
|
||||||
.context("Failed to run g3 for partitioning")?;
|
|
||||||
|
|
||||||
if !output.status.success() {
|
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
||||||
anyhow::bail!("Partitioning agent failed: {}", stderr);
|
|
||||||
}
|
|
||||||
|
|
||||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
||||||
debug!("Partitioning agent output: {}", stdout);
|
|
||||||
|
|
||||||
// Extract JSON from the output
|
|
||||||
let partitions_json = self.extract_json_from_output(&stdout)
|
|
||||||
.context("Failed to extract partition JSON from agent output")?;
|
|
||||||
|
|
||||||
// Parse the partitions
|
|
||||||
let partitions: Vec<serde_json::Value> = serde_json::from_str(&partitions_json)
|
|
||||||
.context("Failed to parse partition JSON")?;
|
|
||||||
|
|
||||||
if partitions.len() != self.config.num_segments {
|
|
||||||
warn!(
|
|
||||||
"Expected {} partitions but got {}. Adjusting...",
|
|
||||||
self.config.num_segments,
|
|
||||||
partitions.len()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract requirements text from each partition
|
|
||||||
let mut partition_texts = Vec::new();
|
|
||||||
for (i, partition) in partitions.iter().enumerate() {
|
|
||||||
let default_name = format!("module-{}", i + 1);
|
|
||||||
let module_name = partition["module_name"]
|
|
||||||
.as_str()
|
|
||||||
.unwrap_or(&default_name);
|
|
||||||
let requirements = partition["requirements"]
|
|
||||||
.as_str()
|
|
||||||
.context("Missing requirements field in partition")?;
|
|
||||||
let dependencies = partition["dependencies"]
|
|
||||||
.as_array()
|
|
||||||
.map(|arr| {
|
|
||||||
arr.iter()
|
|
||||||
.filter_map(|v| v.as_str())
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join(", ")
|
|
||||||
})
|
|
||||||
.unwrap_or_default();
|
|
||||||
|
|
||||||
let partition_text = format!(
|
|
||||||
"# Module: {}\n\n## Dependencies\n{}\n\n## Requirements\n\n{}",
|
|
||||||
module_name,
|
|
||||||
if dependencies.is_empty() {
|
|
||||||
"None".to_string()
|
|
||||||
} else {
|
|
||||||
dependencies
|
|
||||||
},
|
|
||||||
requirements
|
|
||||||
);
|
|
||||||
|
|
||||||
partition_texts.push(partition_text);
|
|
||||||
println!(" ✓ Created partition {}: {}", i + 1, module_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(partition_texts)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extract JSON from agent output (looks for JSON array in output)
|
|
||||||
fn extract_json_from_output(&self, output: &str) -> Result<String> {
|
|
||||||
// Look for JSON array in the output
|
|
||||||
// Try to find content between [ and ]
|
|
||||||
if let Some(start) = output.find('[') {
|
|
||||||
if let Some(end) = output.rfind(']') {
|
|
||||||
if end > start {
|
|
||||||
return Ok(output[start..=end].to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we can't find JSON array markers, try to parse the whole output
|
|
||||||
anyhow::bail!("Could not find JSON array in agent output")
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create segment workspaces by copying project directory
|
|
||||||
async fn create_segment_workspaces(&mut self, partitions: &[String]) -> Result<()> {
|
|
||||||
// Ensure flock workspace exists
|
|
||||||
std::fs::create_dir_all(&self.config.flock_workspace)?;
|
|
||||||
|
|
||||||
for (i, partition) in partitions.iter().enumerate() {
|
|
||||||
let segment_id = i + 1;
|
|
||||||
let segment_dir = self.config.flock_workspace.join(format!("segment-{}", segment_id));
|
|
||||||
|
|
||||||
println!(" Creating segment {} workspace...", segment_id);
|
|
||||||
|
|
||||||
// Copy project directory to segment directory
|
|
||||||
self.copy_git_repo(&self.config.project_dir, &segment_dir)
|
|
||||||
.await
|
|
||||||
.context(format!("Failed to copy project to segment {}", segment_id))?;
|
|
||||||
|
|
||||||
// Write segment-requirements.md
|
|
||||||
let requirements_path = segment_dir.join("segment-requirements.md");
|
|
||||||
std::fs::write(&requirements_path, partition)
|
|
||||||
.context(format!("Failed to write requirements for segment {}", segment_id))?;
|
|
||||||
|
|
||||||
println!(" ✓ Segment {} workspace ready at {}", segment_id, segment_dir.display());
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Copy a git repository to a new location
|
|
||||||
async fn copy_git_repo(&self, source: &Path, dest: &Path) -> Result<()> {
|
|
||||||
// Use git clone for efficient copying
|
|
||||||
let output = Command::new("git")
|
|
||||||
.arg("clone")
|
|
||||||
.arg(source)
|
|
||||||
.arg(dest)
|
|
||||||
.output()
|
|
||||||
.await
|
|
||||||
.context("Failed to run git clone")?;
|
|
||||||
|
|
||||||
if !output.status.success() {
|
|
||||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
||||||
anyhow::bail!("Git clone failed: {}", stderr);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Run all segments in parallel
|
|
||||||
async fn run_segments_parallel(&mut self) -> Result<()> {
|
|
||||||
let mut handles = Vec::new();
|
|
||||||
|
|
||||||
for segment_id in 1..=self.config.num_segments {
|
|
||||||
let segment_dir = self.config.flock_workspace.join(format!("segment-{}", segment_id));
|
|
||||||
let max_turns = self.config.max_turns;
|
|
||||||
let g3_binary = self.get_g3_binary()?;
|
|
||||||
let status_file = self.get_status_file_path();
|
|
||||||
let session_id = self.session_id.clone();
|
|
||||||
|
|
||||||
// Initialize segment status
|
|
||||||
let segment_status = SegmentStatus {
|
|
||||||
segment_id,
|
|
||||||
workspace: segment_dir.clone(),
|
|
||||||
state: SegmentState::Running,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: None,
|
|
||||||
tokens_used: 0,
|
|
||||||
tool_calls: 0,
|
|
||||||
errors: 0,
|
|
||||||
current_turn: 0,
|
|
||||||
max_turns,
|
|
||||||
last_message: Some("Starting...".to_string()),
|
|
||||||
error_message: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
self.status.update_segment(segment_id, segment_status);
|
|
||||||
self.save_status()?;
|
|
||||||
|
|
||||||
// Spawn a task for this segment
|
|
||||||
let handle = tokio::spawn(async move {
|
|
||||||
run_segment(
|
|
||||||
segment_id,
|
|
||||||
segment_dir,
|
|
||||||
max_turns,
|
|
||||||
g3_binary,
|
|
||||||
status_file,
|
|
||||||
session_id,
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
});
|
|
||||||
|
|
||||||
handles.push((segment_id, handle));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for all segments to complete
|
|
||||||
for (segment_id, handle) in handles {
|
|
||||||
match handle.await {
|
|
||||||
Ok(Ok(final_status)) => {
|
|
||||||
println!("\n✅ Segment {} completed", segment_id);
|
|
||||||
self.status.update_segment(segment_id, final_status);
|
|
||||||
self.save_status()?;
|
|
||||||
}
|
|
||||||
Ok(Err(e)) => {
|
|
||||||
error!("Segment {} failed: {}", segment_id, e);
|
|
||||||
let mut segment_status = self.status.segments.get(&segment_id).cloned()
|
|
||||||
.unwrap_or_else(|| SegmentStatus {
|
|
||||||
segment_id,
|
|
||||||
workspace: self.config.flock_workspace.join(format!("segment-{}", segment_id)),
|
|
||||||
state: SegmentState::Failed,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: Some(Utc::now()),
|
|
||||||
tokens_used: 0,
|
|
||||||
tool_calls: 0,
|
|
||||||
errors: 1,
|
|
||||||
current_turn: 0,
|
|
||||||
max_turns: self.config.max_turns,
|
|
||||||
last_message: None,
|
|
||||||
error_message: Some(e.to_string()),
|
|
||||||
});
|
|
||||||
segment_status.state = SegmentState::Failed;
|
|
||||||
segment_status.completed_at = Some(Utc::now());
|
|
||||||
segment_status.error_message = Some(e.to_string());
|
|
||||||
segment_status.errors += 1;
|
|
||||||
self.status.update_segment(segment_id, segment_status);
|
|
||||||
self.save_status()?;
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
error!("Segment {} task panicked: {}", segment_id, e);
|
|
||||||
let mut segment_status = self.status.segments.get(&segment_id).cloned()
|
|
||||||
.unwrap_or_else(|| SegmentStatus {
|
|
||||||
segment_id,
|
|
||||||
workspace: self.config.flock_workspace.join(format!("segment-{}", segment_id)),
|
|
||||||
state: SegmentState::Failed,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: Some(Utc::now()),
|
|
||||||
tokens_used: 0,
|
|
||||||
tool_calls: 0,
|
|
||||||
errors: 1,
|
|
||||||
current_turn: 0,
|
|
||||||
max_turns: self.config.max_turns,
|
|
||||||
last_message: None,
|
|
||||||
error_message: Some(format!("Task panicked: {}", e)),
|
|
||||||
});
|
|
||||||
segment_status.state = SegmentState::Failed;
|
|
||||||
segment_status.completed_at = Some(Utc::now());
|
|
||||||
segment_status.error_message = Some(format!("Task panicked: {}", e));
|
|
||||||
segment_status.errors += 1;
|
|
||||||
self.status.update_segment(segment_id, segment_status);
|
|
||||||
self.save_status()?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the g3 binary path
|
|
||||||
fn get_g3_binary(&self) -> Result<PathBuf> {
|
|
||||||
if let Some(ref binary) = self.config.g3_binary {
|
|
||||||
Ok(binary.clone())
|
|
||||||
} else {
|
|
||||||
// Use current executable
|
|
||||||
std::env::current_exe().context("Failed to get current executable path")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the status file path
|
|
||||||
fn get_status_file_path(&self) -> PathBuf {
|
|
||||||
self.config.flock_workspace.join("flock-status.json")
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Save current status to file
|
|
||||||
fn save_status(&self) -> Result<()> {
|
|
||||||
let status_file = self.get_status_file_path();
|
|
||||||
self.status.save_to_file(&status_file)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Run a single segment worker
|
|
||||||
async fn run_segment(
|
|
||||||
segment_id: usize,
|
|
||||||
segment_dir: PathBuf,
|
|
||||||
max_turns: usize,
|
|
||||||
g3_binary: PathBuf,
|
|
||||||
status_file: PathBuf,
|
|
||||||
session_id: String,
|
|
||||||
) -> Result<SegmentStatus> {
|
|
||||||
info!("Starting segment {} in {}", segment_id, segment_dir.display());
|
|
||||||
|
|
||||||
let mut segment_status = SegmentStatus {
|
|
||||||
segment_id,
|
|
||||||
workspace: segment_dir.clone(),
|
|
||||||
state: SegmentState::Running,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: None,
|
|
||||||
tokens_used: 0,
|
|
||||||
tool_calls: 0,
|
|
||||||
errors: 0,
|
|
||||||
current_turn: 0,
|
|
||||||
max_turns,
|
|
||||||
last_message: Some("Starting autonomous mode...".to_string()),
|
|
||||||
error_message: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Run g3 in autonomous mode with segment-requirements.md
|
|
||||||
let mut child = Command::new(&g3_binary)
|
|
||||||
.arg("--workspace")
|
|
||||||
.arg(&segment_dir)
|
|
||||||
.arg("--autonomous")
|
|
||||||
.arg("--max-turns")
|
|
||||||
.arg(max_turns.to_string())
|
|
||||||
.arg("--requirements")
|
|
||||||
.arg(std::fs::read_to_string(segment_dir.join("segment-requirements.md"))?)
|
|
||||||
.arg("--quiet") // Disable session logging for workers
|
|
||||||
.stdout(Stdio::piped())
|
|
||||||
.stderr(Stdio::piped())
|
|
||||||
.spawn()
|
|
||||||
.context("Failed to spawn g3 process")?;
|
|
||||||
|
|
||||||
// Stream output and update status
|
|
||||||
let stdout = child.stdout.take().context("Failed to get stdout")?;
|
|
||||||
let stderr = child.stderr.take().context("Failed to get stderr")?;
|
|
||||||
|
|
||||||
let stdout_reader = BufReader::new(stdout);
|
|
||||||
let stderr_reader = BufReader::new(stderr);
|
|
||||||
|
|
||||||
let mut stdout_lines = stdout_reader.lines();
|
|
||||||
let mut stderr_lines = stderr_reader.lines();
|
|
||||||
|
|
||||||
// Read output and update status
|
|
||||||
loop {
|
|
||||||
tokio::select! {
|
|
||||||
line = stdout_lines.next_line() => {
|
|
||||||
match line {
|
|
||||||
Ok(Some(line)) => {
|
|
||||||
println!("[Segment {}] {}", segment_id, line);
|
|
||||||
|
|
||||||
// Parse output for status updates
|
|
||||||
if line.contains("TURN") {
|
|
||||||
// Extract turn number if possible
|
|
||||||
if let Some(turn_str) = line.split("TURN").nth(1) {
|
|
||||||
if let Ok(turn) = turn_str.trim().split('/').next().unwrap_or("0").parse::<usize>() {
|
|
||||||
segment_status.current_turn = turn;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
segment_status.last_message = Some(line);
|
|
||||||
update_status_file(&status_file, &session_id, segment_status.clone())?;
|
|
||||||
}
|
|
||||||
Ok(None) => break,
|
|
||||||
Err(e) => {
|
|
||||||
error!("Error reading stdout for segment {}: {}", segment_id, e);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
line = stderr_lines.next_line() => {
|
|
||||||
match line {
|
|
||||||
Ok(Some(line)) => {
|
|
||||||
eprintln!("[Segment {} ERROR] {}", segment_id, line);
|
|
||||||
segment_status.errors += 1;
|
|
||||||
update_status_file(&status_file, &session_id, segment_status.clone())?;
|
|
||||||
}
|
|
||||||
Ok(None) => break,
|
|
||||||
Err(e) => {
|
|
||||||
error!("Error reading stderr for segment {}: {}", segment_id, e);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for process to complete
|
|
||||||
let status = child.wait().await.context("Failed to wait for g3 process")?;
|
|
||||||
|
|
||||||
segment_status.completed_at = Some(Utc::now());
|
|
||||||
|
|
||||||
if status.success() {
|
|
||||||
segment_status.state = SegmentState::Completed;
|
|
||||||
segment_status.last_message = Some("Completed successfully".to_string());
|
|
||||||
} else {
|
|
||||||
segment_status.state = SegmentState::Failed;
|
|
||||||
segment_status.error_message = Some(format!("Process exited with status: {}", status));
|
|
||||||
segment_status.errors += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to extract metrics from session log if available
|
|
||||||
let log_dir = segment_dir.join("logs");
|
|
||||||
if log_dir.exists() {
|
|
||||||
if let Ok(entries) = std::fs::read_dir(&log_dir) {
|
|
||||||
for entry in entries.flatten() {
|
|
||||||
let path = entry.path();
|
|
||||||
if path.extension().and_then(|s| s.to_str()) == Some("json") {
|
|
||||||
if let Ok(log_content) = std::fs::read_to_string(&path) {
|
|
||||||
if let Ok(log_json) = serde_json::from_str::<serde_json::Value>(&log_content) {
|
|
||||||
// Extract token usage
|
|
||||||
if let Some(context) = log_json.get("context_window") {
|
|
||||||
if let Some(cumulative) = context.get("cumulative_tokens") {
|
|
||||||
if let Some(tokens) = cumulative.as_u64() {
|
|
||||||
segment_status.tokens_used = tokens;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Count tool calls from conversation history
|
|
||||||
if let Some(context) = log_json.get("context_window") {
|
|
||||||
if let Some(history) = context.get("conversation_history") {
|
|
||||||
if let Some(messages) = history.as_array() {
|
|
||||||
let tool_call_count = messages
|
|
||||||
.iter()
|
|
||||||
.filter(|msg| {
|
|
||||||
msg.get("role")
|
|
||||||
.and_then(|r| r.as_str())
|
|
||||||
== Some("tool")
|
|
||||||
})
|
|
||||||
.count();
|
|
||||||
segment_status.tool_calls = tool_call_count as u64;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
update_status_file(&status_file, &session_id, segment_status.clone())?;
|
|
||||||
|
|
||||||
Ok(segment_status)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Update the status file with new segment status
|
|
||||||
fn update_status_file(
|
|
||||||
status_file: &PathBuf,
|
|
||||||
session_id: &str,
|
|
||||||
segment_status: SegmentStatus,
|
|
||||||
) -> Result<()> {
|
|
||||||
// Load existing status or create new one
|
|
||||||
let mut flock_status = if status_file.exists() {
|
|
||||||
FlockStatus::load_from_file(status_file)?
|
|
||||||
} else {
|
|
||||||
// This shouldn't happen, but handle it gracefully
|
|
||||||
FlockStatus::new(
|
|
||||||
session_id.to_string(),
|
|
||||||
PathBuf::new(),
|
|
||||||
PathBuf::new(),
|
|
||||||
0,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
flock_status.update_segment(segment_status.segment_id, segment_status);
|
|
||||||
flock_status.save_to_file(status_file)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
//! G3 Ensembles - Multi-agent ensemble functionality
|
|
||||||
//!
|
|
||||||
//! This crate provides functionality for running multiple G3 agents in coordination,
|
|
||||||
//! enabling parallel development across different architectural modules.
|
|
||||||
|
|
||||||
pub mod flock;
|
|
||||||
pub mod status;
|
|
||||||
mod tests;
|
|
||||||
|
|
||||||
/// Re-export main types for convenience
|
|
||||||
pub use flock::{FlockConfig, FlockMode};
|
|
||||||
pub use status::{FlockStatus, SegmentStatus};
|
|
||||||
@@ -1,240 +0,0 @@
|
|||||||
//! Status tracking for flock mode
|
|
||||||
|
|
||||||
use chrono::{DateTime, Utc};
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
|
|
||||||
/// Status of an individual segment worker
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub struct SegmentStatus {
|
|
||||||
/// Segment number
|
|
||||||
pub segment_id: usize,
|
|
||||||
|
|
||||||
/// Segment workspace directory
|
|
||||||
pub workspace: PathBuf,
|
|
||||||
|
|
||||||
/// Current state of the segment
|
|
||||||
pub state: SegmentState,
|
|
||||||
|
|
||||||
/// Start time
|
|
||||||
pub started_at: DateTime<Utc>,
|
|
||||||
|
|
||||||
/// Completion time (if finished)
|
|
||||||
pub completed_at: Option<DateTime<Utc>>,
|
|
||||||
|
|
||||||
/// Total tokens used
|
|
||||||
pub tokens_used: u64,
|
|
||||||
|
|
||||||
/// Number of tool calls made
|
|
||||||
pub tool_calls: u64,
|
|
||||||
|
|
||||||
/// Number of errors encountered
|
|
||||||
pub errors: u64,
|
|
||||||
|
|
||||||
/// Current turn number (for autonomous mode)
|
|
||||||
pub current_turn: usize,
|
|
||||||
|
|
||||||
/// Maximum turns allowed
|
|
||||||
pub max_turns: usize,
|
|
||||||
|
|
||||||
/// Last status message
|
|
||||||
pub last_message: Option<String>,
|
|
||||||
|
|
||||||
/// Error message (if failed)
|
|
||||||
pub error_message: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// State of a segment worker
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
|
||||||
pub enum SegmentState {
|
|
||||||
/// Waiting to start
|
|
||||||
Pending,
|
|
||||||
|
|
||||||
/// Currently running
|
|
||||||
Running,
|
|
||||||
|
|
||||||
/// Completed successfully
|
|
||||||
Completed,
|
|
||||||
|
|
||||||
/// Failed with error
|
|
||||||
Failed,
|
|
||||||
|
|
||||||
/// Cancelled by user
|
|
||||||
Cancelled,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for SegmentState {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
match self {
|
|
||||||
SegmentState::Pending => write!(f, "⏳ Pending"),
|
|
||||||
SegmentState::Running => write!(f, "🔄 Running"),
|
|
||||||
SegmentState::Completed => write!(f, "✅ Completed"),
|
|
||||||
SegmentState::Failed => write!(f, "❌ Failed"),
|
|
||||||
SegmentState::Cancelled => write!(f, "⚠️ Cancelled"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Overall flock status
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub struct FlockStatus {
|
|
||||||
/// Flock session ID
|
|
||||||
pub session_id: String,
|
|
||||||
|
|
||||||
/// Project directory
|
|
||||||
pub project_dir: PathBuf,
|
|
||||||
|
|
||||||
/// Flock workspace directory
|
|
||||||
pub flock_workspace: PathBuf,
|
|
||||||
|
|
||||||
/// Number of segments
|
|
||||||
pub num_segments: usize,
|
|
||||||
|
|
||||||
/// Start time
|
|
||||||
pub started_at: DateTime<Utc>,
|
|
||||||
|
|
||||||
/// Completion time (if finished)
|
|
||||||
pub completed_at: Option<DateTime<Utc>>,
|
|
||||||
|
|
||||||
/// Status of each segment
|
|
||||||
pub segments: HashMap<usize, SegmentStatus>,
|
|
||||||
|
|
||||||
/// Total tokens used across all segments
|
|
||||||
pub total_tokens: u64,
|
|
||||||
|
|
||||||
/// Total tool calls across all segments
|
|
||||||
pub total_tool_calls: u64,
|
|
||||||
|
|
||||||
/// Total errors across all segments
|
|
||||||
pub total_errors: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl FlockStatus {
|
|
||||||
/// Create a new flock status
|
|
||||||
pub fn new(
|
|
||||||
session_id: String,
|
|
||||||
project_dir: PathBuf,
|
|
||||||
flock_workspace: PathBuf,
|
|
||||||
num_segments: usize,
|
|
||||||
) -> Self {
|
|
||||||
Self {
|
|
||||||
session_id,
|
|
||||||
project_dir,
|
|
||||||
flock_workspace,
|
|
||||||
num_segments,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: None,
|
|
||||||
segments: HashMap::new(),
|
|
||||||
total_tokens: 0,
|
|
||||||
total_tool_calls: 0,
|
|
||||||
total_errors: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Update segment status
|
|
||||||
pub fn update_segment(&mut self, segment_id: usize, status: SegmentStatus) {
|
|
||||||
self.segments.insert(segment_id, status);
|
|
||||||
self.recalculate_totals();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Recalculate total metrics
|
|
||||||
fn recalculate_totals(&mut self) {
|
|
||||||
self.total_tokens = self.segments.values().map(|s| s.tokens_used).sum();
|
|
||||||
self.total_tool_calls = self.segments.values().map(|s| s.tool_calls).sum();
|
|
||||||
self.total_errors = self.segments.values().map(|s| s.errors).sum();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check if all segments are complete
|
|
||||||
pub fn is_complete(&self) -> bool {
|
|
||||||
self.segments.len() == self.num_segments
|
|
||||||
&& self.segments.values().all(|s| {
|
|
||||||
matches!(
|
|
||||||
s.state,
|
|
||||||
SegmentState::Completed | SegmentState::Failed | SegmentState::Cancelled
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get count of segments by state
|
|
||||||
pub fn count_by_state(&self, state: SegmentState) -> usize {
|
|
||||||
self.segments.values().filter(|s| s.state == state).count()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Save status to file
|
|
||||||
pub fn save_to_file(&self, path: &PathBuf) -> anyhow::Result<()> {
|
|
||||||
let json = serde_json::to_string_pretty(self)?;
|
|
||||||
std::fs::write(path, json)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Load status from file
|
|
||||||
pub fn load_from_file(path: &PathBuf) -> anyhow::Result<Self> {
|
|
||||||
let json = std::fs::read_to_string(path)?;
|
|
||||||
let status = serde_json::from_str(&json)?;
|
|
||||||
Ok(status)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Generate a summary report
|
|
||||||
pub fn generate_report(&self) -> String {
|
|
||||||
let mut report = String::new();
|
|
||||||
|
|
||||||
report.push_str(&format!("\n{}", "=".repeat(80)));
|
|
||||||
report.push_str(&format!("\n📊 FLOCK MODE SESSION REPORT"));
|
|
||||||
report.push_str(&format!("\n{}", "=".repeat(80)));
|
|
||||||
|
|
||||||
report.push_str(&format!("\n\n🆔 Session ID: {}", self.session_id));
|
|
||||||
report.push_str(&format!("\n📁 Project: {}", self.project_dir.display()));
|
|
||||||
report.push_str(&format!("\n🗂️ Workspace: {}", self.flock_workspace.display()));
|
|
||||||
report.push_str(&format!("\n🔢 Segments: {}", self.num_segments));
|
|
||||||
|
|
||||||
let duration = if let Some(completed) = self.completed_at {
|
|
||||||
completed.signed_duration_since(self.started_at)
|
|
||||||
} else {
|
|
||||||
Utc::now().signed_duration_since(self.started_at)
|
|
||||||
};
|
|
||||||
|
|
||||||
report.push_str(&format!("\n⏱️ Duration: {:.2}s", duration.num_milliseconds() as f64 / 1000.0));
|
|
||||||
|
|
||||||
// Segment status summary
|
|
||||||
report.push_str(&format!("\n\n📈 Segment Status:"));
|
|
||||||
report.push_str(&format!("\n • Completed: {}", self.count_by_state(SegmentState::Completed)));
|
|
||||||
report.push_str(&format!("\n • Running: {}", self.count_by_state(SegmentState::Running)));
|
|
||||||
report.push_str(&format!("\n • Failed: {}", self.count_by_state(SegmentState::Failed)));
|
|
||||||
report.push_str(&format!("\n • Pending: {}", self.count_by_state(SegmentState::Pending)));
|
|
||||||
report.push_str(&format!("\n • Cancelled: {}", self.count_by_state(SegmentState::Cancelled)));
|
|
||||||
|
|
||||||
// Metrics
|
|
||||||
report.push_str(&format!("\n\n📊 Aggregate Metrics:"));
|
|
||||||
report.push_str(&format!("\n • Total Tokens: {}", self.total_tokens));
|
|
||||||
report.push_str(&format!("\n • Total Tool Calls: {}", self.total_tool_calls));
|
|
||||||
report.push_str(&format!("\n • Total Errors: {}", self.total_errors));
|
|
||||||
|
|
||||||
// Per-segment details
|
|
||||||
report.push_str(&format!("\n\n🔍 Segment Details:"));
|
|
||||||
let mut segments: Vec<_> = self.segments.iter().collect();
|
|
||||||
segments.sort_by_key(|(id, _)| *id);
|
|
||||||
|
|
||||||
for (id, segment) in segments {
|
|
||||||
report.push_str(&format!("\n\n Segment {}:", id));
|
|
||||||
report.push_str(&format!("\n Status: {}", segment.state));
|
|
||||||
report.push_str(&format!("\n Workspace: {}", segment.workspace.display()));
|
|
||||||
report.push_str(&format!("\n Tokens: {}", segment.tokens_used));
|
|
||||||
report.push_str(&format!("\n Tool Calls: {}", segment.tool_calls));
|
|
||||||
report.push_str(&format!("\n Errors: {}", segment.errors));
|
|
||||||
report.push_str(&format!("\n Turn: {}/{}", segment.current_turn, segment.max_turns));
|
|
||||||
|
|
||||||
if let Some(ref msg) = segment.last_message {
|
|
||||||
report.push_str(&format!("\n Last Message: {}", msg));
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(ref err) = segment.error_message {
|
|
||||||
report.push_str(&format!("\n Error: {}", err));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
report.push_str(&format!("\n\n{}", "=".repeat(80)));
|
|
||||||
|
|
||||||
report
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,331 +0,0 @@
|
|||||||
//! Unit tests for g3-ensembles
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use crate::status::{FlockStatus, SegmentState, SegmentStatus};
|
|
||||||
use chrono::Utc;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_segment_state_display() {
|
|
||||||
assert_eq!(format!("{}", SegmentState::Pending), "⏳ Pending");
|
|
||||||
assert_eq!(format!("{}", SegmentState::Running), "🔄 Running");
|
|
||||||
assert_eq!(format!("{}", SegmentState::Completed), "✅ Completed");
|
|
||||||
assert_eq!(format!("{}", SegmentState::Failed), "❌ Failed");
|
|
||||||
assert_eq!(format!("{}", SegmentState::Cancelled), "⚠️ Cancelled");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_flock_status_creation() {
|
|
||||||
let status = FlockStatus::new(
|
|
||||||
"test-session".to_string(),
|
|
||||||
PathBuf::from("/test/project"),
|
|
||||||
PathBuf::from("/test/workspace"),
|
|
||||||
3,
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(status.session_id, "test-session");
|
|
||||||
assert_eq!(status.num_segments, 3);
|
|
||||||
assert_eq!(status.segments.len(), 0);
|
|
||||||
assert_eq!(status.total_tokens, 0);
|
|
||||||
assert_eq!(status.total_tool_calls, 0);
|
|
||||||
assert_eq!(status.total_errors, 0);
|
|
||||||
assert!(status.completed_at.is_none());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_segment_status_update() {
|
|
||||||
let mut status = FlockStatus::new(
|
|
||||||
"test-session".to_string(),
|
|
||||||
PathBuf::from("/test/project"),
|
|
||||||
PathBuf::from("/test/workspace"),
|
|
||||||
2,
|
|
||||||
);
|
|
||||||
|
|
||||||
let segment1 = SegmentStatus {
|
|
||||||
segment_id: 1,
|
|
||||||
workspace: PathBuf::from("/test/workspace/segment-1"),
|
|
||||||
state: SegmentState::Completed,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: Some(Utc::now()),
|
|
||||||
tokens_used: 1000,
|
|
||||||
tool_calls: 50,
|
|
||||||
errors: 2,
|
|
||||||
current_turn: 5,
|
|
||||||
max_turns: 10,
|
|
||||||
last_message: Some("Done".to_string()),
|
|
||||||
error_message: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
status.update_segment(1, segment1);
|
|
||||||
|
|
||||||
assert_eq!(status.segments.len(), 1);
|
|
||||||
assert_eq!(status.total_tokens, 1000);
|
|
||||||
assert_eq!(status.total_tool_calls, 50);
|
|
||||||
assert_eq!(status.total_errors, 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_multiple_segment_updates() {
|
|
||||||
let mut status = FlockStatus::new(
|
|
||||||
"test-session".to_string(),
|
|
||||||
PathBuf::from("/test/project"),
|
|
||||||
PathBuf::from("/test/workspace"),
|
|
||||||
2,
|
|
||||||
);
|
|
||||||
|
|
||||||
let segment1 = SegmentStatus {
|
|
||||||
segment_id: 1,
|
|
||||||
workspace: PathBuf::from("/test/workspace/segment-1"),
|
|
||||||
state: SegmentState::Completed,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: Some(Utc::now()),
|
|
||||||
tokens_used: 1000,
|
|
||||||
tool_calls: 50,
|
|
||||||
errors: 2,
|
|
||||||
current_turn: 5,
|
|
||||||
max_turns: 10,
|
|
||||||
last_message: Some("Done".to_string()),
|
|
||||||
error_message: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let segment2 = SegmentStatus {
|
|
||||||
segment_id: 2,
|
|
||||||
workspace: PathBuf::from("/test/workspace/segment-2"),
|
|
||||||
state: SegmentState::Failed,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: Some(Utc::now()),
|
|
||||||
tokens_used: 500,
|
|
||||||
tool_calls: 25,
|
|
||||||
errors: 5,
|
|
||||||
current_turn: 3,
|
|
||||||
max_turns: 10,
|
|
||||||
last_message: Some("Error".to_string()),
|
|
||||||
error_message: Some("Test error".to_string()),
|
|
||||||
};
|
|
||||||
|
|
||||||
status.update_segment(1, segment1);
|
|
||||||
status.update_segment(2, segment2);
|
|
||||||
|
|
||||||
assert_eq!(status.segments.len(), 2);
|
|
||||||
assert_eq!(status.total_tokens, 1500);
|
|
||||||
assert_eq!(status.total_tool_calls, 75);
|
|
||||||
assert_eq!(status.total_errors, 7);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_is_complete() {
|
|
||||||
let mut status = FlockStatus::new(
|
|
||||||
"test-session".to_string(),
|
|
||||||
PathBuf::from("/test/project"),
|
|
||||||
PathBuf::from("/test/workspace"),
|
|
||||||
2,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Not complete - no segments
|
|
||||||
assert!(!status.is_complete());
|
|
||||||
|
|
||||||
// Add one completed segment
|
|
||||||
let segment1 = SegmentStatus {
|
|
||||||
segment_id: 1,
|
|
||||||
workspace: PathBuf::from("/test/workspace/segment-1"),
|
|
||||||
state: SegmentState::Completed,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: Some(Utc::now()),
|
|
||||||
tokens_used: 1000,
|
|
||||||
tool_calls: 50,
|
|
||||||
errors: 0,
|
|
||||||
current_turn: 5,
|
|
||||||
max_turns: 10,
|
|
||||||
last_message: None,
|
|
||||||
error_message: None,
|
|
||||||
};
|
|
||||||
status.update_segment(1, segment1);
|
|
||||||
|
|
||||||
// Still not complete - only 1 of 2 segments
|
|
||||||
assert!(!status.is_complete());
|
|
||||||
|
|
||||||
// Add second segment (running)
|
|
||||||
let segment2 = SegmentStatus {
|
|
||||||
segment_id: 2,
|
|
||||||
workspace: PathBuf::from("/test/workspace/segment-2"),
|
|
||||||
state: SegmentState::Running,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: None,
|
|
||||||
tokens_used: 500,
|
|
||||||
tool_calls: 25,
|
|
||||||
errors: 0,
|
|
||||||
current_turn: 3,
|
|
||||||
max_turns: 10,
|
|
||||||
last_message: None,
|
|
||||||
error_message: None,
|
|
||||||
};
|
|
||||||
status.update_segment(2, segment2);
|
|
||||||
|
|
||||||
// Still not complete - segment 2 is running
|
|
||||||
assert!(!status.is_complete());
|
|
||||||
|
|
||||||
// Update segment 2 to completed
|
|
||||||
let segment2_done = SegmentStatus {
|
|
||||||
segment_id: 2,
|
|
||||||
workspace: PathBuf::from("/test/workspace/segment-2"),
|
|
||||||
state: SegmentState::Completed,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: Some(Utc::now()),
|
|
||||||
tokens_used: 500,
|
|
||||||
tool_calls: 25,
|
|
||||||
errors: 0,
|
|
||||||
current_turn: 5,
|
|
||||||
max_turns: 10,
|
|
||||||
last_message: None,
|
|
||||||
error_message: None,
|
|
||||||
};
|
|
||||||
status.update_segment(2, segment2_done);
|
|
||||||
|
|
||||||
// Now complete
|
|
||||||
assert!(status.is_complete());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_count_by_state() {
|
|
||||||
let mut status = FlockStatus::new(
|
|
||||||
"test-session".to_string(),
|
|
||||||
PathBuf::from("/test/project"),
|
|
||||||
PathBuf::from("/test/workspace"),
|
|
||||||
3,
|
|
||||||
);
|
|
||||||
|
|
||||||
let segment1 = SegmentStatus {
|
|
||||||
segment_id: 1,
|
|
||||||
workspace: PathBuf::from("/test/workspace/segment-1"),
|
|
||||||
state: SegmentState::Completed,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: Some(Utc::now()),
|
|
||||||
tokens_used: 1000,
|
|
||||||
tool_calls: 50,
|
|
||||||
errors: 0,
|
|
||||||
current_turn: 5,
|
|
||||||
max_turns: 10,
|
|
||||||
last_message: None,
|
|
||||||
error_message: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let segment2 = SegmentStatus {
|
|
||||||
segment_id: 2,
|
|
||||||
workspace: PathBuf::from("/test/workspace/segment-2"),
|
|
||||||
state: SegmentState::Failed,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: Some(Utc::now()),
|
|
||||||
tokens_used: 500,
|
|
||||||
tool_calls: 25,
|
|
||||||
errors: 5,
|
|
||||||
current_turn: 3,
|
|
||||||
max_turns: 10,
|
|
||||||
last_message: None,
|
|
||||||
error_message: Some("Error".to_string()),
|
|
||||||
};
|
|
||||||
|
|
||||||
let segment3 = SegmentStatus {
|
|
||||||
segment_id: 3,
|
|
||||||
workspace: PathBuf::from("/test/workspace/segment-3"),
|
|
||||||
state: SegmentState::Completed,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: Some(Utc::now()),
|
|
||||||
tokens_used: 800,
|
|
||||||
tool_calls: 40,
|
|
||||||
errors: 1,
|
|
||||||
current_turn: 4,
|
|
||||||
max_turns: 10,
|
|
||||||
last_message: None,
|
|
||||||
error_message: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
status.update_segment(1, segment1);
|
|
||||||
status.update_segment(2, segment2);
|
|
||||||
status.update_segment(3, segment3);
|
|
||||||
|
|
||||||
assert_eq!(status.count_by_state(SegmentState::Completed), 2);
|
|
||||||
assert_eq!(status.count_by_state(SegmentState::Failed), 1);
|
|
||||||
assert_eq!(status.count_by_state(SegmentState::Running), 0);
|
|
||||||
assert_eq!(status.count_by_state(SegmentState::Pending), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_status_serialization() {
|
|
||||||
let mut status = FlockStatus::new(
|
|
||||||
"test-session".to_string(),
|
|
||||||
PathBuf::from("/test/project"),
|
|
||||||
PathBuf::from("/test/workspace"),
|
|
||||||
1,
|
|
||||||
);
|
|
||||||
|
|
||||||
let segment1 = SegmentStatus {
|
|
||||||
segment_id: 1,
|
|
||||||
workspace: PathBuf::from("/test/workspace/segment-1"),
|
|
||||||
state: SegmentState::Completed,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: Some(Utc::now()),
|
|
||||||
tokens_used: 1000,
|
|
||||||
tool_calls: 50,
|
|
||||||
errors: 2,
|
|
||||||
current_turn: 5,
|
|
||||||
max_turns: 10,
|
|
||||||
last_message: Some("Done".to_string()),
|
|
||||||
error_message: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
status.update_segment(1, segment1);
|
|
||||||
|
|
||||||
// Serialize to JSON
|
|
||||||
let json = serde_json::to_string(&status).expect("Failed to serialize");
|
|
||||||
assert!(json.contains("test-session"));
|
|
||||||
assert!(json.contains("segment_id"));
|
|
||||||
assert!(json.contains("Completed"));
|
|
||||||
|
|
||||||
// Deserialize back
|
|
||||||
let deserialized: FlockStatus =
|
|
||||||
serde_json::from_str(&json).expect("Failed to deserialize");
|
|
||||||
assert_eq!(deserialized.session_id, "test-session");
|
|
||||||
assert_eq!(deserialized.segments.len(), 1);
|
|
||||||
assert_eq!(deserialized.total_tokens, 1000);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_report_generation() {
|
|
||||||
let mut status = FlockStatus::new(
|
|
||||||
"test-session".to_string(),
|
|
||||||
PathBuf::from("/test/project"),
|
|
||||||
PathBuf::from("/test/workspace"),
|
|
||||||
2,
|
|
||||||
);
|
|
||||||
|
|
||||||
let segment1 = SegmentStatus {
|
|
||||||
segment_id: 1,
|
|
||||||
workspace: PathBuf::from("/test/workspace/segment-1"),
|
|
||||||
state: SegmentState::Completed,
|
|
||||||
started_at: Utc::now(),
|
|
||||||
completed_at: Some(Utc::now()),
|
|
||||||
tokens_used: 1000,
|
|
||||||
tool_calls: 50,
|
|
||||||
errors: 2,
|
|
||||||
current_turn: 5,
|
|
||||||
max_turns: 10,
|
|
||||||
last_message: Some("Done".to_string()),
|
|
||||||
error_message: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
status.update_segment(1, segment1);
|
|
||||||
|
|
||||||
let report = status.generate_report();
|
|
||||||
|
|
||||||
// Check that report contains expected sections
|
|
||||||
assert!(report.contains("FLOCK MODE SESSION REPORT"));
|
|
||||||
assert!(report.contains("test-session"));
|
|
||||||
assert!(report.contains("Segment Status:"));
|
|
||||||
assert!(report.contains("Aggregate Metrics:"));
|
|
||||||
assert!(report.contains("Segment Details:"));
|
|
||||||
assert!(report.contains("Total Tokens: 1000"));
|
|
||||||
assert!(report.contains("Total Tool Calls: 50"));
|
|
||||||
assert!(report.contains("Total Errors: 2"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,443 +0,0 @@
|
|||||||
//! Integration tests for g3-ensembles flock mode
|
|
||||||
|
|
||||||
use g3_ensembles::{FlockConfig, FlockMode};
|
|
||||||
use std::fs;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
use std::process::Command;
|
|
||||||
use tempfile::TempDir;
|
|
||||||
|
|
||||||
/// Helper to create a test git repository with flock-requirements.md
|
|
||||||
fn create_test_project(name: &str) -> TempDir {
|
|
||||||
let temp_dir = TempDir::new().expect("Failed to create temp dir");
|
|
||||||
let project_path = temp_dir.path();
|
|
||||||
|
|
||||||
// Initialize git repo
|
|
||||||
let output = Command::new("git")
|
|
||||||
.arg("init")
|
|
||||||
.current_dir(project_path)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to run git init");
|
|
||||||
assert!(output.status.success(), "git init failed");
|
|
||||||
|
|
||||||
// Configure git user (required for commits)
|
|
||||||
Command::new("git")
|
|
||||||
.args(["config", "user.email", "test@example.com"])
|
|
||||||
.current_dir(project_path)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to configure git email");
|
|
||||||
|
|
||||||
Command::new("git")
|
|
||||||
.args(["config", "user.name", "Test User"])
|
|
||||||
.current_dir(project_path)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to configure git name");
|
|
||||||
|
|
||||||
// Create flock-requirements.md
|
|
||||||
let requirements = format!(
|
|
||||||
"# {} Test Project\n\n\
|
|
||||||
## Module A\n\
|
|
||||||
- Create a simple Rust library\n\
|
|
||||||
- Add a function that returns \"Hello from Module A\"\n\
|
|
||||||
- Write a unit test for the function\n\n\
|
|
||||||
## Module B\n\
|
|
||||||
- Create another Rust library\n\
|
|
||||||
- Add a function that returns \"Hello from Module B\"\n\
|
|
||||||
- Write a unit test for the function\n",
|
|
||||||
name
|
|
||||||
);
|
|
||||||
|
|
||||||
fs::write(project_path.join("flock-requirements.md"), requirements)
|
|
||||||
.expect("Failed to write requirements");
|
|
||||||
|
|
||||||
// Create a simple README
|
|
||||||
fs::write(project_path.join("README.md"), format!("# {}\n", name))
|
|
||||||
.expect("Failed to write README");
|
|
||||||
|
|
||||||
// Create initial commit
|
|
||||||
Command::new("git")
|
|
||||||
.args(["add", "."])
|
|
||||||
.current_dir(project_path)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to git add");
|
|
||||||
|
|
||||||
let output = Command::new("git")
|
|
||||||
.args(["commit", "-m", "Initial commit"])
|
|
||||||
.current_dir(project_path)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to git commit");
|
|
||||||
assert!(output.status.success(), "git commit failed");
|
|
||||||
|
|
||||||
temp_dir
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_flock_config_validation() {
|
|
||||||
let temp_dir = TempDir::new().unwrap();
|
|
||||||
let project_path = temp_dir.path().to_path_buf();
|
|
||||||
let workspace_path = temp_dir.path().join("workspace");
|
|
||||||
|
|
||||||
// Should fail - not a git repo
|
|
||||||
let result = FlockConfig::new(project_path.clone(), workspace_path.clone(), 2);
|
|
||||||
assert!(result.is_err());
|
|
||||||
assert!(result
|
|
||||||
.unwrap_err()
|
|
||||||
.to_string()
|
|
||||||
.contains("must be a git repository"));
|
|
||||||
|
|
||||||
// Initialize git repo
|
|
||||||
Command::new("git")
|
|
||||||
.arg("init")
|
|
||||||
.current_dir(&project_path)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to run git init");
|
|
||||||
|
|
||||||
// Should fail - no flock-requirements.md
|
|
||||||
let result = FlockConfig::new(project_path.clone(), workspace_path.clone(), 2);
|
|
||||||
assert!(result.is_err());
|
|
||||||
assert!(result
|
|
||||||
.unwrap_err()
|
|
||||||
.to_string()
|
|
||||||
.contains("flock-requirements.md"));
|
|
||||||
|
|
||||||
// Create flock-requirements.md
|
|
||||||
fs::write(project_path.join("flock-requirements.md"), "# Test\n")
|
|
||||||
.expect("Failed to write requirements");
|
|
||||||
|
|
||||||
// Should succeed now
|
|
||||||
let result = FlockConfig::new(project_path, workspace_path, 2);
|
|
||||||
assert!(result.is_ok());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_flock_config_builder() {
|
|
||||||
let project_dir = create_test_project("builder-test");
|
|
||||||
let workspace_dir = TempDir::new().unwrap();
|
|
||||||
|
|
||||||
let config = FlockConfig::new(
|
|
||||||
project_dir.path().to_path_buf(),
|
|
||||||
workspace_dir.path().to_path_buf(),
|
|
||||||
2,
|
|
||||||
)
|
|
||||||
.expect("Failed to create config")
|
|
||||||
.with_max_turns(15)
|
|
||||||
.with_g3_binary(PathBuf::from("/custom/g3"));
|
|
||||||
|
|
||||||
assert_eq!(config.num_segments, 2);
|
|
||||||
assert_eq!(config.max_turns, 15);
|
|
||||||
assert_eq!(config.g3_binary, Some(PathBuf::from("/custom/g3")));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_workspace_creation() {
|
|
||||||
let project_dir = create_test_project("workspace-test");
|
|
||||||
let workspace_dir = TempDir::new().unwrap();
|
|
||||||
|
|
||||||
let config = FlockConfig::new(
|
|
||||||
project_dir.path().to_path_buf(),
|
|
||||||
workspace_dir.path().to_path_buf(),
|
|
||||||
2,
|
|
||||||
)
|
|
||||||
.expect("Failed to create config");
|
|
||||||
|
|
||||||
// Create FlockMode instance
|
|
||||||
let _flock = FlockMode::new(config).expect("Failed to create FlockMode");
|
|
||||||
|
|
||||||
// Verify workspace directory structure will be created
|
|
||||||
// (We can't run the full flock without LLM access, but we can test the setup)
|
|
||||||
assert!(project_dir.path().join(".git").exists());
|
|
||||||
assert!(project_dir.path().join("flock-requirements.md").exists());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_git_clone_functionality() {
|
|
||||||
let project_dir = create_test_project("clone-test");
|
|
||||||
let workspace_dir = TempDir::new().unwrap();
|
|
||||||
|
|
||||||
// Manually test git cloning (what flock mode does internally)
|
|
||||||
let segment_dir = workspace_dir.path().join("segment-1");
|
|
||||||
|
|
||||||
let output = Command::new("git")
|
|
||||||
.arg("clone")
|
|
||||||
.arg(project_dir.path())
|
|
||||||
.arg(&segment_dir)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to run git clone");
|
|
||||||
|
|
||||||
assert!(output.status.success(), "git clone failed: {:?}", output);
|
|
||||||
|
|
||||||
// Verify the clone
|
|
||||||
assert!(segment_dir.exists());
|
|
||||||
assert!(segment_dir.join(".git").exists());
|
|
||||||
assert!(segment_dir.join("flock-requirements.md").exists());
|
|
||||||
assert!(segment_dir.join("README.md").exists());
|
|
||||||
|
|
||||||
// Verify it's a proper git repo
|
|
||||||
let output = Command::new("git")
|
|
||||||
.args(["log", "--oneline"])
|
|
||||||
.current_dir(&segment_dir)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to run git log");
|
|
||||||
|
|
||||||
assert!(output.status.success());
|
|
||||||
let log = String::from_utf8_lossy(&output.stdout);
|
|
||||||
assert!(log.contains("Initial commit"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_multiple_segment_clones() {
|
|
||||||
let project_dir = create_test_project("multi-clone-test");
|
|
||||||
let workspace_dir = TempDir::new().unwrap();
|
|
||||||
|
|
||||||
// Clone multiple segments
|
|
||||||
for i in 1..=2 {
|
|
||||||
let segment_dir = workspace_dir.path().join(format!("segment-{}", i));
|
|
||||||
|
|
||||||
let output = Command::new("git")
|
|
||||||
.arg("clone")
|
|
||||||
.arg(project_dir.path())
|
|
||||||
.arg(&segment_dir)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to run git clone");
|
|
||||||
|
|
||||||
assert!(output.status.success(), "git clone {} failed", i);
|
|
||||||
assert!(segment_dir.exists());
|
|
||||||
assert!(segment_dir.join(".git").exists());
|
|
||||||
assert!(segment_dir.join("flock-requirements.md").exists());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify both segments exist and are independent
|
|
||||||
let segment1 = workspace_dir.path().join("segment-1");
|
|
||||||
let segment2 = workspace_dir.path().join("segment-2");
|
|
||||||
|
|
||||||
assert!(segment1.exists());
|
|
||||||
assert!(segment2.exists());
|
|
||||||
|
|
||||||
// Modify segment 1
|
|
||||||
fs::write(segment1.join("test.txt"), "segment 1")
|
|
||||||
.expect("Failed to write to segment 1");
|
|
||||||
|
|
||||||
// Verify segment 2 is unaffected
|
|
||||||
assert!(!segment2.join("test.txt").exists());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_segment_requirements_creation() {
|
|
||||||
let project_dir = create_test_project("segment-req-test");
|
|
||||||
let workspace_dir = TempDir::new().unwrap();
|
|
||||||
|
|
||||||
// Clone a segment
|
|
||||||
let segment_dir = workspace_dir.path().join("segment-1");
|
|
||||||
Command::new("git")
|
|
||||||
.arg("clone")
|
|
||||||
.arg(project_dir.path())
|
|
||||||
.arg(&segment_dir)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to clone");
|
|
||||||
|
|
||||||
// Create segment-requirements.md (what flock mode does)
|
|
||||||
let segment_requirements = "# Module A\n\nImplement module A functionality\n";
|
|
||||||
fs::write(segment_dir.join("segment-requirements.md"), segment_requirements)
|
|
||||||
.expect("Failed to write segment requirements");
|
|
||||||
|
|
||||||
// Verify it was created
|
|
||||||
assert!(segment_dir.join("segment-requirements.md").exists());
|
|
||||||
let content = fs::read_to_string(segment_dir.join("segment-requirements.md"))
|
|
||||||
.expect("Failed to read segment requirements");
|
|
||||||
assert!(content.contains("Module A"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_status_file_operations() {
|
|
||||||
use g3_ensembles::FlockStatus;
|
|
||||||
|
|
||||||
let temp_dir = TempDir::new().unwrap();
|
|
||||||
let status_file = temp_dir.path().join("flock-status.json");
|
|
||||||
|
|
||||||
// Create a status
|
|
||||||
let status = FlockStatus::new(
|
|
||||||
"test-session".to_string(),
|
|
||||||
PathBuf::from("/test/project"),
|
|
||||||
PathBuf::from("/test/workspace"),
|
|
||||||
2,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Save to file
|
|
||||||
status
|
|
||||||
.save_to_file(&status_file)
|
|
||||||
.expect("Failed to save status");
|
|
||||||
|
|
||||||
// Verify file exists
|
|
||||||
assert!(status_file.exists());
|
|
||||||
|
|
||||||
// Load from file
|
|
||||||
let loaded = FlockStatus::load_from_file(&status_file).expect("Failed to load status");
|
|
||||||
|
|
||||||
assert_eq!(loaded.session_id, "test-session");
|
|
||||||
assert_eq!(loaded.num_segments, 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_json_extraction() {
|
|
||||||
// Test the JSON extraction logic used in partition_requirements
|
|
||||||
let test_cases = vec![
|
|
||||||
(
|
|
||||||
"Here is the result: [{\"module_name\": \"test\"}]",
|
|
||||||
Some("[{\"module_name\": \"test\"}]"),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"```json\n[{\"module_name\": \"test\"}]\n```",
|
|
||||||
Some("[{\"module_name\": \"test\"}]"),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"Some text before\n[{\"a\": 1}, {\"b\": 2}]\nSome text after",
|
|
||||||
Some("[{\"a\": 1}, {\"b\": 2}]"),
|
|
||||||
),
|
|
||||||
("No JSON here", None),
|
|
||||||
];
|
|
||||||
|
|
||||||
for (input, expected) in test_cases {
|
|
||||||
let result = extract_json_array(input);
|
|
||||||
match expected {
|
|
||||||
Some(exp) => {
|
|
||||||
assert!(result.is_some(), "Failed to extract from: {}", input);
|
|
||||||
assert_eq!(result.unwrap(), exp);
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
assert!(result.is_none(), "Should not extract from: {}", input);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper function to extract JSON array (mimics the logic in flock.rs)
|
|
||||||
fn extract_json_array(output: &str) -> Option<String> {
|
|
||||||
if let Some(start) = output.find('[') {
|
|
||||||
if let Some(end) = output.rfind(']') {
|
|
||||||
if end > start {
|
|
||||||
return Some(output[start..=end].to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_partition_json_parsing() {
|
|
||||||
// Test parsing of partition JSON
|
|
||||||
let json = r#"[
|
|
||||||
{
|
|
||||||
"module_name": "core-library",
|
|
||||||
"requirements": "Build the core library with basic functionality",
|
|
||||||
"dependencies": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"module_name": "cli-tool",
|
|
||||||
"requirements": "Create a CLI tool that uses the core library",
|
|
||||||
"dependencies": ["core-library"]
|
|
||||||
}
|
|
||||||
]"#;
|
|
||||||
|
|
||||||
let partitions: Vec<serde_json::Value> =
|
|
||||||
serde_json::from_str(json).expect("Failed to parse JSON");
|
|
||||||
|
|
||||||
assert_eq!(partitions.len(), 2);
|
|
||||||
assert_eq!(partitions[0]["module_name"], "core-library");
|
|
||||||
assert_eq!(partitions[1]["module_name"], "cli-tool");
|
|
||||||
assert_eq!(partitions[1]["dependencies"][0], "core-library");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_requirements_file_content() {
|
|
||||||
let project_dir = create_test_project("content-test");
|
|
||||||
|
|
||||||
let requirements_path = project_dir.path().join("flock-requirements.md");
|
|
||||||
let content = fs::read_to_string(&requirements_path).expect("Failed to read requirements");
|
|
||||||
|
|
||||||
// Verify content structure
|
|
||||||
assert!(content.contains("# content-test Test Project"));
|
|
||||||
assert!(content.contains("## Module A"));
|
|
||||||
assert!(content.contains("## Module B"));
|
|
||||||
assert!(content.contains("Hello from Module A"));
|
|
||||||
assert!(content.contains("Hello from Module B"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_git_repo_independence() {
|
|
||||||
let project_dir = create_test_project("independence-test");
|
|
||||||
let workspace_dir = TempDir::new().unwrap();
|
|
||||||
|
|
||||||
// Clone two segments
|
|
||||||
let segment1 = workspace_dir.path().join("segment-1");
|
|
||||||
let segment2 = workspace_dir.path().join("segment-2");
|
|
||||||
|
|
||||||
Command::new("git")
|
|
||||||
.arg("clone")
|
|
||||||
.arg(project_dir.path())
|
|
||||||
.arg(&segment1)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to clone segment 1");
|
|
||||||
|
|
||||||
Command::new("git")
|
|
||||||
.arg("clone")
|
|
||||||
.arg(project_dir.path())
|
|
||||||
.arg(&segment2)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to clone segment 2");
|
|
||||||
|
|
||||||
// Make a commit in segment 1
|
|
||||||
fs::write(segment1.join("file1.txt"), "content 1").expect("Failed to write file1");
|
|
||||||
|
|
||||||
Command::new("git")
|
|
||||||
.args(["add", "file1.txt"])
|
|
||||||
.current_dir(&segment1)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to git add");
|
|
||||||
|
|
||||||
Command::new("git")
|
|
||||||
.args(["commit", "-m", "Add file1"])
|
|
||||||
.current_dir(&segment1)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to commit in segment 1");
|
|
||||||
|
|
||||||
// Make a different commit in segment 2
|
|
||||||
fs::write(segment2.join("file2.txt"), "content 2").expect("Failed to write file2");
|
|
||||||
|
|
||||||
Command::new("git")
|
|
||||||
.args(["add", "file2.txt"])
|
|
||||||
.current_dir(&segment2)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to git add");
|
|
||||||
|
|
||||||
Command::new("git")
|
|
||||||
.args(["commit", "-m", "Add file2"])
|
|
||||||
.current_dir(&segment2)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to commit in segment 2");
|
|
||||||
|
|
||||||
// Verify they have different commits
|
|
||||||
let log1 = Command::new("git")
|
|
||||||
.args(["log", "--oneline"])
|
|
||||||
.current_dir(&segment1)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to get log 1");
|
|
||||||
|
|
||||||
let log2 = Command::new("git")
|
|
||||||
.args(["log", "--oneline"])
|
|
||||||
.current_dir(&segment2)
|
|
||||||
.output()
|
|
||||||
.expect("Failed to get log 2");
|
|
||||||
|
|
||||||
let log1_str = String::from_utf8_lossy(&log1.stdout);
|
|
||||||
let log2_str = String::from_utf8_lossy(&log2.stdout);
|
|
||||||
|
|
||||||
assert!(log1_str.contains("Add file1"));
|
|
||||||
assert!(!log1_str.contains("Add file2"));
|
|
||||||
assert!(log2_str.contains("Add file2"));
|
|
||||||
assert!(!log2_str.contains("Add file1"));
|
|
||||||
|
|
||||||
// Verify files exist only in their respective segments
|
|
||||||
assert!(segment1.join("file1.txt").exists());
|
|
||||||
assert!(!segment1.join("file2.txt").exists());
|
|
||||||
assert!(segment2.join("file2.txt").exists());
|
|
||||||
assert!(!segment2.join("file1.txt").exists());
|
|
||||||
}
|
|
||||||
@@ -21,22 +21,18 @@
|
|||||||
//! // Create the provider with your API key
|
//! // Create the provider with your API key
|
||||||
//! let provider = AnthropicProvider::new(
|
//! let provider = AnthropicProvider::new(
|
||||||
//! "your-api-key".to_string(),
|
//! "your-api-key".to_string(),
|
||||||
//! Some("claude-3-5-sonnet-20241022".to_string()), // Optional: defaults to claude-3-5-sonnet-20241022
|
//! Some("claude-3-5-sonnet-20241022".to_string()),
|
||||||
//! Some(4096), // Optional: max tokens
|
//! Some(4096),
|
||||||
//! Some(0.1), // Optional: temperature
|
//! Some(0.1),
|
||||||
|
//! None, // cache_config
|
||||||
|
//! None, // enable_1m_context
|
||||||
//! )?;
|
//! )?;
|
||||||
//!
|
//!
|
||||||
//! // Create a completion request
|
//! // Create a completion request
|
||||||
//! let request = CompletionRequest {
|
//! let request = CompletionRequest {
|
||||||
//! messages: vec![
|
//! messages: vec![
|
||||||
//! Message {
|
//! Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
|
||||||
//! role: MessageRole::System,
|
//! Message::new(MessageRole::User, "Hello! How are you?".to_string()),
|
||||||
//! content: "You are a helpful assistant.".to_string(),
|
|
||||||
//! },
|
|
||||||
//! Message {
|
|
||||||
//! role: MessageRole::User,
|
|
||||||
//! content: "Hello! How are you?".to_string(),
|
|
||||||
//! },
|
|
||||||
//! ],
|
//! ],
|
||||||
//! max_tokens: Some(1000),
|
//! max_tokens: Some(1000),
|
||||||
//! temperature: Some(0.7),
|
//! temperature: Some(0.7),
|
||||||
@@ -62,15 +58,16 @@
|
|||||||
//! async fn main() -> anyhow::Result<()> {
|
//! async fn main() -> anyhow::Result<()> {
|
||||||
//! let provider = AnthropicProvider::new(
|
//! let provider = AnthropicProvider::new(
|
||||||
//! "your-api-key".to_string(),
|
//! "your-api-key".to_string(),
|
||||||
//! None, None, None,
|
//! None,
|
||||||
|
//! None,
|
||||||
|
//! None,
|
||||||
|
//! None, // cache_config
|
||||||
|
//! None, // enable_1m_context
|
||||||
//! )?;
|
//! )?;
|
||||||
//!
|
//!
|
||||||
//! let request = CompletionRequest {
|
//! let request = CompletionRequest {
|
||||||
//! messages: vec![
|
//! messages: vec![
|
||||||
//! Message {
|
//! Message::new(MessageRole::User, "Write a short story about a robot.".to_string()),
|
||||||
//! role: MessageRole::User,
|
|
||||||
//! content: "Write a short story about a robot.".to_string(),
|
|
||||||
//! },
|
|
||||||
//! ],
|
//! ],
|
||||||
//! max_tokens: Some(1000),
|
//! max_tokens: Some(1000),
|
||||||
//! temperature: Some(0.7),
|
//! temperature: Some(0.7),
|
||||||
@@ -123,6 +120,8 @@ pub struct AnthropicProvider {
|
|||||||
model: String,
|
model: String,
|
||||||
max_tokens: u32,
|
max_tokens: u32,
|
||||||
temperature: f32,
|
temperature: f32,
|
||||||
|
cache_config: Option<String>,
|
||||||
|
enable_1m_context: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AnthropicProvider {
|
impl AnthropicProvider {
|
||||||
@@ -131,6 +130,8 @@ impl AnthropicProvider {
|
|||||||
model: Option<String>,
|
model: Option<String>,
|
||||||
max_tokens: Option<u32>,
|
max_tokens: Option<u32>,
|
||||||
temperature: Option<f32>,
|
temperature: Option<f32>,
|
||||||
|
cache_config: Option<String>,
|
||||||
|
enable_1m_context: Option<bool>,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let client = Client::builder()
|
let client = Client::builder()
|
||||||
.timeout(Duration::from_secs(300))
|
.timeout(Duration::from_secs(300))
|
||||||
@@ -147,6 +148,8 @@ impl AnthropicProvider {
|
|||||||
model,
|
model,
|
||||||
max_tokens: max_tokens.unwrap_or(4096),
|
max_tokens: max_tokens.unwrap_or(4096),
|
||||||
temperature: temperature.unwrap_or(0.1),
|
temperature: temperature.unwrap_or(0.1),
|
||||||
|
cache_config,
|
||||||
|
enable_1m_context: enable_1m_context.unwrap_or(false),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -156,9 +159,12 @@ impl AnthropicProvider {
|
|||||||
.post(ANTHROPIC_API_URL)
|
.post(ANTHROPIC_API_URL)
|
||||||
.header("x-api-key", &self.api_key)
|
.header("x-api-key", &self.api_key)
|
||||||
.header("anthropic-version", ANTHROPIC_VERSION)
|
.header("anthropic-version", ANTHROPIC_VERSION)
|
||||||
// Anthropic beta 1m context window. Enable if needed. It costs extra, so check first.
|
|
||||||
// .header("anthropic-beta", "context-1m-2025-08-07")
|
|
||||||
.header("content-type", "application/json");
|
.header("content-type", "application/json");
|
||||||
|
|
||||||
|
if self.enable_1m_context {
|
||||||
|
builder = builder.header("anthropic-beta", "context-1m-2025-08-07");
|
||||||
|
}
|
||||||
|
|
||||||
if streaming {
|
if streaming {
|
||||||
builder = builder.header("accept", "text/event-stream");
|
builder = builder.header("accept", "text/event-stream");
|
||||||
}
|
}
|
||||||
@@ -166,6 +172,11 @@ impl AnthropicProvider {
|
|||||||
builder
|
builder
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn convert_cache_control(cache_control: &crate::CacheControl) -> crate::CacheControl {
|
||||||
|
// Anthropic uses the same format, so just clone it
|
||||||
|
cache_control.clone()
|
||||||
|
}
|
||||||
|
|
||||||
fn convert_tools(&self, tools: &[Tool]) -> Vec<AnthropicTool> {
|
fn convert_tools(&self, tools: &[Tool]) -> Vec<AnthropicTool> {
|
||||||
tools
|
tools
|
||||||
.iter()
|
.iter()
|
||||||
@@ -214,6 +225,8 @@ impl AnthropicProvider {
|
|||||||
role: "user".to_string(),
|
role: "user".to_string(),
|
||||||
content: vec![AnthropicContent::Text {
|
content: vec![AnthropicContent::Text {
|
||||||
text: message.content.clone(),
|
text: message.content.clone(),
|
||||||
|
cache_control: message.cache_control.as_ref()
|
||||||
|
.map(Self::convert_cache_control),
|
||||||
}],
|
}],
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -222,6 +235,8 @@ impl AnthropicProvider {
|
|||||||
role: "assistant".to_string(),
|
role: "assistant".to_string(),
|
||||||
content: vec![AnthropicContent::Text {
|
content: vec![AnthropicContent::Text {
|
||||||
text: message.content.clone(),
|
text: message.content.clone(),
|
||||||
|
cache_control: message.cache_control.as_ref()
|
||||||
|
.map(Self::convert_cache_control),
|
||||||
}],
|
}],
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -564,7 +579,7 @@ impl LLMProvider for AnthropicProvider {
|
|||||||
.content
|
.content
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|c| match c {
|
.filter_map(|c| match c {
|
||||||
AnthropicContent::Text { text } => Some(text.as_str()),
|
AnthropicContent::Text { text, .. } => Some(text.as_str()),
|
||||||
_ => None,
|
_ => None,
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
@@ -658,6 +673,11 @@ impl LLMProvider for AnthropicProvider {
|
|||||||
// Claude models support native tool calling
|
// Claude models support native tool calling
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn supports_cache_control(&self) -> bool {
|
||||||
|
// Anthropic supports cache control
|
||||||
|
true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Anthropic API request/response structures
|
// Anthropic API request/response structures
|
||||||
@@ -701,7 +721,11 @@ struct AnthropicMessage {
|
|||||||
#[serde(tag = "type")]
|
#[serde(tag = "type")]
|
||||||
enum AnthropicContent {
|
enum AnthropicContent {
|
||||||
#[serde(rename = "text")]
|
#[serde(rename = "text")]
|
||||||
Text { text: String },
|
Text {
|
||||||
|
text: String,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
cache_control: Option<crate::CacheControl>,
|
||||||
|
},
|
||||||
#[serde(rename = "tool_use")]
|
#[serde(rename = "tool_use")]
|
||||||
ToolUse {
|
ToolUse {
|
||||||
id: String,
|
id: String,
|
||||||
@@ -771,21 +795,14 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
).unwrap();
|
).unwrap();
|
||||||
|
|
||||||
let messages = vec![
|
let messages = vec![
|
||||||
Message {
|
Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
|
||||||
role: MessageRole::System,
|
Message::new(MessageRole::User, "Hello!".to_string()),
|
||||||
content: "You are a helpful assistant.".to_string(),
|
Message::new(MessageRole::Assistant, "Hi there!".to_string()),
|
||||||
},
|
|
||||||
Message {
|
|
||||||
role: MessageRole::User,
|
|
||||||
content: "Hello!".to_string(),
|
|
||||||
},
|
|
||||||
Message {
|
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: "Hi there!".to_string(),
|
|
||||||
},
|
|
||||||
];
|
];
|
||||||
|
|
||||||
let (system, anthropic_messages) = provider.convert_messages(&messages).unwrap();
|
let (system, anthropic_messages) = provider.convert_messages(&messages).unwrap();
|
||||||
@@ -803,14 +820,11 @@ mod tests {
|
|||||||
Some("claude-3-haiku-20240307".to_string()),
|
Some("claude-3-haiku-20240307".to_string()),
|
||||||
Some(1000),
|
Some(1000),
|
||||||
Some(0.5),
|
Some(0.5),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
).unwrap();
|
).unwrap();
|
||||||
|
|
||||||
let messages = vec![
|
let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];
|
||||||
Message {
|
|
||||||
role: MessageRole::User,
|
|
||||||
content: "Test message".to_string(),
|
|
||||||
},
|
|
||||||
];
|
|
||||||
|
|
||||||
let request_body = provider
|
let request_body = provider
|
||||||
.create_request_body(&messages, None, false, 1000, 0.5)
|
.create_request_body(&messages, None, false, 1000, 0.5)
|
||||||
@@ -831,6 +845,8 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
).unwrap();
|
).unwrap();
|
||||||
|
|
||||||
let tools = vec![
|
let tools = vec![
|
||||||
@@ -859,4 +875,48 @@ mod tests {
|
|||||||
assert!(anthropic_tools[0].input_schema.required.is_some());
|
assert!(anthropic_tools[0].input_schema.required.is_some());
|
||||||
assert_eq!(anthropic_tools[0].input_schema.required.as_ref().unwrap()[0], "location");
|
assert_eq!(anthropic_tools[0].input_schema.required.as_ref().unwrap()[0], "location");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cache_control_serialization() {
|
||||||
|
let provider = AnthropicProvider::new(
|
||||||
|
"test-key".to_string(),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
).unwrap();
|
||||||
|
|
||||||
|
// Test message WITHOUT cache_control
|
||||||
|
let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
|
||||||
|
let (_, anthropic_messages_without) = provider.convert_messages(&messages_without).unwrap();
|
||||||
|
let json_without = serde_json::to_string(&anthropic_messages_without).unwrap();
|
||||||
|
|
||||||
|
println!("Anthropic JSON without cache_control: {}", json_without);
|
||||||
|
// Check if cache_control appears in the JSON
|
||||||
|
if json_without.contains("cache_control") {
|
||||||
|
println!("WARNING: JSON contains 'cache_control' field when not configured!");
|
||||||
|
assert!(!json_without.contains("\"cache_control\":null"),
|
||||||
|
"JSON should not contain 'cache_control: null'");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test message WITH cache_control
|
||||||
|
let messages_with = vec![Message::with_cache_control(
|
||||||
|
MessageRole::User,
|
||||||
|
"Hello".to_string(),
|
||||||
|
crate::CacheControl::ephemeral(),
|
||||||
|
)];
|
||||||
|
let (_, anthropic_messages_with) = provider.convert_messages(&messages_with).unwrap();
|
||||||
|
let json_with = serde_json::to_string(&anthropic_messages_with).unwrap();
|
||||||
|
|
||||||
|
println!("Anthropic JSON with cache_control: {}", json_with);
|
||||||
|
assert!(json_with.contains("cache_control"),
|
||||||
|
"JSON should contain 'cache_control' field when configured");
|
||||||
|
assert!(json_with.contains("ephemeral"),
|
||||||
|
"JSON should contain 'ephemeral' type");
|
||||||
|
|
||||||
|
// The key assertion: when cache_control is None, it should not appear in JSON
|
||||||
|
assert!(!json_without.contains("cache_control") || !json_without.contains("null"),
|
||||||
|
"JSON should not contain 'cache_control' field or null values when not configured");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,10 +39,7 @@
|
|||||||
//! // Create a completion request
|
//! // Create a completion request
|
||||||
//! let request = CompletionRequest {
|
//! let request = CompletionRequest {
|
||||||
//! messages: vec![
|
//! messages: vec![
|
||||||
//! Message {
|
//! Message::new(MessageRole::User, "Hello! How are you?".to_string()),
|
||||||
//! role: MessageRole::User,
|
|
||||||
//! content: "Hello! How are you?".to_string(),
|
|
||||||
//! },
|
|
||||||
//! ],
|
//! ],
|
||||||
//! max_tokens: Some(1000),
|
//! max_tokens: Some(1000),
|
||||||
//! temperature: Some(0.7),
|
//! temperature: Some(0.7),
|
||||||
@@ -251,9 +248,12 @@ impl DatabricksProvider {
|
|||||||
MessageRole::Assistant => "assistant",
|
MessageRole::Assistant => "assistant",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Always use simple string format (Databricks doesn't support cache_control)
|
||||||
|
let content = serde_json::Value::String(message.content.clone());
|
||||||
|
|
||||||
databricks_messages.push(DatabricksMessage {
|
databricks_messages.push(DatabricksMessage {
|
||||||
role: role.to_string(),
|
role: role.to_string(),
|
||||||
content: Some(message.content.clone()),
|
content: Some(content),
|
||||||
tool_calls: None, // Only used in responses, not requests
|
tool_calls: None, // Only used in responses, not requests
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -864,8 +864,22 @@ impl LLMProvider for DatabricksProvider {
|
|||||||
let content = databricks_response
|
let content = databricks_response
|
||||||
.choices
|
.choices
|
||||||
.first()
|
.first()
|
||||||
.and_then(|choice| choice.message.content.as_ref())
|
.and_then(|choice| {
|
||||||
.cloned()
|
choice.message.content.as_ref().map(|c| {
|
||||||
|
// Handle both string and array formats
|
||||||
|
if let Some(s) = c.as_str() {
|
||||||
|
s.to_string()
|
||||||
|
} else if let Some(arr) = c.as_array() {
|
||||||
|
// Extract text from content blocks
|
||||||
|
arr.iter()
|
||||||
|
.filter_map(|block| block.get("text").and_then(|t| t.as_str()))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("")
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
|
|
||||||
// Check if there are tool calls in the response
|
// Check if there are tool calls in the response
|
||||||
@@ -1037,6 +1051,10 @@ impl LLMProvider for DatabricksProvider {
|
|||||||
// This includes Claude, Llama, DBRX, and most other models on the platform
|
// This includes Claude, Llama, DBRX, and most other models on the platform
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn supports_cache_control(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Databricks API request/response structures
|
// Databricks API request/response structures
|
||||||
@@ -1067,7 +1085,8 @@ struct DatabricksFunction {
|
|||||||
#[derive(Debug, Serialize, Deserialize)]
|
#[derive(Debug, Serialize, Deserialize)]
|
||||||
struct DatabricksMessage {
|
struct DatabricksMessage {
|
||||||
role: String,
|
role: String,
|
||||||
content: Option<String>, // Make content optional since tool calls might not have content
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
content: Option<serde_json::Value>, // Can be string or array of content blocks
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
tool_calls: Option<Vec<DatabricksToolCall>>, // Add tool_calls field for responses
|
tool_calls: Option<Vec<DatabricksToolCall>>, // Add tool_calls field for responses
|
||||||
}
|
}
|
||||||
@@ -1154,18 +1173,9 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let messages = vec![
|
let messages = vec![
|
||||||
Message {
|
Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
|
||||||
role: MessageRole::System,
|
Message::new(MessageRole::User, "Hello!".to_string()),
|
||||||
content: "You are a helpful assistant.".to_string(),
|
Message::new(MessageRole::Assistant, "Hi there!".to_string()),
|
||||||
},
|
|
||||||
Message {
|
|
||||||
role: MessageRole::User,
|
|
||||||
content: "Hello!".to_string(),
|
|
||||||
},
|
|
||||||
Message {
|
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: "Hi there!".to_string(),
|
|
||||||
},
|
|
||||||
];
|
];
|
||||||
|
|
||||||
let databricks_messages = provider.convert_messages(&messages).unwrap();
|
let databricks_messages = provider.convert_messages(&messages).unwrap();
|
||||||
@@ -1187,10 +1197,7 @@ mod tests {
|
|||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let messages = vec![Message {
|
let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];
|
||||||
role: MessageRole::User,
|
|
||||||
content: "Test message".to_string(),
|
|
||||||
}];
|
|
||||||
|
|
||||||
let request_body = provider
|
let request_body = provider
|
||||||
.create_request_body(&messages, None, false, 1000, 0.5)
|
.create_request_body(&messages, None, false, 1000, 0.5)
|
||||||
@@ -1273,4 +1280,62 @@ mod tests {
|
|||||||
assert!(llama_provider.has_native_tool_calling());
|
assert!(llama_provider.has_native_tool_calling());
|
||||||
assert!(dbrx_provider.has_native_tool_calling());
|
assert!(dbrx_provider.has_native_tool_calling());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cache_control_serialization() {
|
||||||
|
let provider = DatabricksProvider::from_token(
|
||||||
|
"https://test.databricks.com".to_string(),
|
||||||
|
"test-token".to_string(),
|
||||||
|
"databricks-claude-sonnet-4".to_string(),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Test message WITHOUT cache_control
|
||||||
|
let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
|
||||||
|
let databricks_messages_without = provider.convert_messages(&messages_without).unwrap();
|
||||||
|
let json_without = serde_json::to_string(&databricks_messages_without).unwrap();
|
||||||
|
|
||||||
|
println!("JSON without cache_control: {}", json_without);
|
||||||
|
assert!(!json_without.contains("cache_control"),
|
||||||
|
"JSON should not contain 'cache_control' field when not configured");
|
||||||
|
|
||||||
|
// Test message WITH cache_control - should still NOT include it (Databricks doesn't support it)
|
||||||
|
let messages_with = vec![Message::with_cache_control(
|
||||||
|
MessageRole::User,
|
||||||
|
"Hello".to_string(),
|
||||||
|
crate::CacheControl::ephemeral(),
|
||||||
|
)];
|
||||||
|
let databricks_messages_with = provider.convert_messages(&messages_with).unwrap();
|
||||||
|
let json_with = serde_json::to_string(&databricks_messages_with).unwrap();
|
||||||
|
|
||||||
|
println!("JSON with cache_control: {}", json_with);
|
||||||
|
assert!(!json_with.contains("cache_control"),
|
||||||
|
"JSON should NOT contain 'cache_control' field - Databricks doesn't support it");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_databricks_does_not_support_cache_control() {
|
||||||
|
let claude_provider = DatabricksProvider::from_token(
|
||||||
|
"https://test.databricks.com".to_string(),
|
||||||
|
"test-token".to_string(),
|
||||||
|
"databricks-claude-sonnet-4".to_string(),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let llama_provider = DatabricksProvider::from_token(
|
||||||
|
"https://test.databricks.com".to_string(),
|
||||||
|
"test-token".to_string(),
|
||||||
|
"databricks-meta-llama-3-3-70b-instruct".to_string(),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert!(!claude_provider.supports_cache_control(), "Databricks should not support cache_control even for Claude models");
|
||||||
|
assert!(!llama_provider.supports_cache_control(), "Databricks should not support cache_control for Llama models");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,6 +21,11 @@ pub trait LLMProvider: Send + Sync {
|
|||||||
fn has_native_tool_calling(&self) -> bool {
|
fn has_native_tool_calling(&self) -> bool {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if the provider supports cache control
|
||||||
|
fn supports_cache_control(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@@ -32,10 +37,40 @@ pub struct CompletionRequest {
|
|||||||
pub tools: Option<Vec<Tool>>,
|
pub tools: Option<Vec<Tool>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct CacheControl {
|
||||||
|
#[serde(rename = "type")]
|
||||||
|
pub cache_type: CacheType,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub ttl: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||||
|
#[serde(rename_all = "lowercase")]
|
||||||
|
pub enum CacheType {
|
||||||
|
Ephemeral,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CacheControl {
|
||||||
|
pub fn ephemeral() -> Self {
|
||||||
|
Self { cache_type: CacheType::Ephemeral, ttl: None }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn five_minute() -> Self {
|
||||||
|
Self { cache_type: CacheType::Ephemeral, ttl: Some("5m".to_string()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn one_hour() -> Self {
|
||||||
|
Self { cache_type: CacheType::Ephemeral, ttl: Some("1h".to_string()) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct Message {
|
pub struct Message {
|
||||||
pub role: MessageRole,
|
pub role: MessageRole,
|
||||||
pub content: String,
|
pub content: String,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub cache_control: Option<CacheControl>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@@ -95,6 +130,45 @@ pub use databricks::DatabricksProvider;
|
|||||||
pub use embedded::EmbeddedProvider;
|
pub use embedded::EmbeddedProvider;
|
||||||
pub use openai::OpenAIProvider;
|
pub use openai::OpenAIProvider;
|
||||||
|
|
||||||
|
impl Message {
|
||||||
|
/// Create a new message with optional cache control
|
||||||
|
pub fn new(role: MessageRole, content: String) -> Self {
|
||||||
|
Self {
|
||||||
|
role,
|
||||||
|
content,
|
||||||
|
cache_control: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new message with cache control
|
||||||
|
pub fn with_cache_control(role: MessageRole, content: String, cache_control: CacheControl) -> Self {
|
||||||
|
Self {
|
||||||
|
role,
|
||||||
|
content,
|
||||||
|
cache_control: Some(cache_control),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a message with cache control, with provider validation
|
||||||
|
pub fn with_cache_control_validated(
|
||||||
|
role: MessageRole,
|
||||||
|
content: String,
|
||||||
|
cache_control: CacheControl,
|
||||||
|
provider: &dyn LLMProvider
|
||||||
|
) -> Self {
|
||||||
|
if !provider.supports_cache_control() {
|
||||||
|
tracing::warn!(
|
||||||
|
"Cache control requested for provider '{}' which does not support it. \
|
||||||
|
Cache control is only supported by Anthropic and Anthropic via Databricks.",
|
||||||
|
provider.name()
|
||||||
|
);
|
||||||
|
return Self::new(role, content);
|
||||||
|
}
|
||||||
|
|
||||||
|
Self::with_cache_control(role, content, cache_control)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Provider registry for managing multiple LLM providers
|
/// Provider registry for managing multiple LLM providers
|
||||||
pub struct ProviderRegistry {
|
pub struct ProviderRegistry {
|
||||||
providers: HashMap<String, Box<dyn LLMProvider>>,
|
providers: HashMap<String, Box<dyn LLMProvider>>,
|
||||||
@@ -144,3 +218,68 @@ impl Default for ProviderRegistry {
|
|||||||
Self::new()
|
Self::new()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_message_serialization_without_cache_control() {
|
||||||
|
let msg = Message::new(MessageRole::User, "Hello".to_string());
|
||||||
|
let json = serde_json::to_string(&msg).unwrap();
|
||||||
|
|
||||||
|
println!("Message JSON without cache_control: {}", json);
|
||||||
|
assert!(!json.contains("cache_control"),
|
||||||
|
"JSON should not contain 'cache_control' field when not configured");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_message_serialization_with_cache_control() {
|
||||||
|
let msg = Message::with_cache_control(
|
||||||
|
MessageRole::User,
|
||||||
|
"Hello".to_string(),
|
||||||
|
CacheControl::ephemeral(),
|
||||||
|
);
|
||||||
|
let json = serde_json::to_string(&msg).unwrap();
|
||||||
|
|
||||||
|
println!("Message JSON with cache_control: {}", json);
|
||||||
|
assert!(json.contains("cache_control"),
|
||||||
|
"JSON should contain 'cache_control' field when configured");
|
||||||
|
assert!(json.contains("ephemeral"),
|
||||||
|
"JSON should contain 'ephemeral' value");
|
||||||
|
assert!(json.contains("\"type\":"),
|
||||||
|
"JSON should contain 'type' field in cache_control");
|
||||||
|
assert!(!json.contains("null"),
|
||||||
|
"JSON should not contain null values");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cache_control_five_minute_serialization() {
|
||||||
|
let msg = Message::with_cache_control(
|
||||||
|
MessageRole::User,
|
||||||
|
"Hello".to_string(),
|
||||||
|
CacheControl::five_minute(),
|
||||||
|
);
|
||||||
|
let json = serde_json::to_string(&msg).unwrap();
|
||||||
|
|
||||||
|
println!("Message JSON with 5-minute cache_control: {}", json);
|
||||||
|
assert!(json.contains("cache_control"), "JSON should contain 'cache_control' field");
|
||||||
|
assert!(json.contains("ephemeral"), "JSON should contain 'ephemeral' type");
|
||||||
|
assert!(json.contains("\"ttl\":\"5m\""), "JSON should contain ttl field with 5m value");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cache_control_one_hour_serialization() {
|
||||||
|
let msg = Message::with_cache_control(
|
||||||
|
MessageRole::User,
|
||||||
|
"Hello".to_string(),
|
||||||
|
CacheControl::one_hour(),
|
||||||
|
);
|
||||||
|
let json = serde_json::to_string(&msg).unwrap();
|
||||||
|
|
||||||
|
println!("Message JSON with 1-hour cache_control: {}", json);
|
||||||
|
assert!(json.contains("cache_control"), "JSON should contain 'cache_control' field");
|
||||||
|
assert!(json.contains("ephemeral"), "JSON should contain 'ephemeral' type");
|
||||||
|
assert!(json.contains("\"ttl\":\"1h\""), "JSON should contain ttl field with 1h value");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
131
crates/g3-providers/tests/cache_control_error_regression_test.rs
Normal file
131
crates/g3-providers/tests/cache_control_error_regression_test.rs
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
//! Regression test for cache_control serialization bug
|
||||||
|
//!
|
||||||
|
//! This test verifies that cache_control is NOT serialized in the wrong format.
|
||||||
|
//! The bug was that it serialized as:
|
||||||
|
//! - `system.0.cache_control.ephemeral.ttl` (WRONG)
|
||||||
|
//!
|
||||||
|
//! It should serialize as:
|
||||||
|
//! - `"cache_control": {"type": "ephemeral"}` for ephemeral
|
||||||
|
//! - `"cache_control": {"type": "ephemeral", "ttl": "5m"}` for 5minute
|
||||||
|
//! - `"cache_control": {"type": "ephemeral", "ttl": "1h"}` for 1hour
|
||||||
|
|
||||||
|
use g3_providers::{CacheControl, Message, MessageRole};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_no_wrong_serialization_format() {
|
||||||
|
// Test ephemeral
|
||||||
|
let msg = Message::with_cache_control(
|
||||||
|
MessageRole::System,
|
||||||
|
"Test".to_string(),
|
||||||
|
CacheControl::ephemeral(),
|
||||||
|
);
|
||||||
|
let json = serde_json::to_string(&msg).unwrap();
|
||||||
|
|
||||||
|
println!("Ephemeral message JSON: {}", json);
|
||||||
|
|
||||||
|
// Should NOT contain the wrong format
|
||||||
|
assert!(!json.contains("system.0.cache_control"),
|
||||||
|
"JSON should not contain 'system.0.cache_control' path");
|
||||||
|
assert!(!json.contains("cache_control.ephemeral"),
|
||||||
|
"JSON should not contain 'cache_control.ephemeral' path");
|
||||||
|
|
||||||
|
// Should contain the correct format
|
||||||
|
assert!(json.contains(r#""cache_control":{"type":"ephemeral"}"#),
|
||||||
|
"JSON should contain correct cache_control format");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_five_minute_no_wrong_format() {
|
||||||
|
let msg = Message::with_cache_control(
|
||||||
|
MessageRole::System,
|
||||||
|
"Test".to_string(),
|
||||||
|
CacheControl::five_minute(),
|
||||||
|
);
|
||||||
|
let json = serde_json::to_string(&msg).unwrap();
|
||||||
|
|
||||||
|
println!("5-minute message JSON: {}", json);
|
||||||
|
|
||||||
|
// Should NOT contain the wrong format
|
||||||
|
assert!(!json.contains("system.0.cache_control"),
|
||||||
|
"JSON should not contain 'system.0.cache_control' path");
|
||||||
|
assert!(!json.contains("cache_control.ephemeral.ttl"),
|
||||||
|
"JSON should not contain 'cache_control.ephemeral.ttl' path");
|
||||||
|
|
||||||
|
// Should contain the correct format with ttl as a direct field
|
||||||
|
assert!(json.contains(r#""type":"ephemeral""#),
|
||||||
|
"JSON should contain type field");
|
||||||
|
assert!(json.contains(r#""ttl":"5m""#),
|
||||||
|
"JSON should contain ttl field with value 5m");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_one_hour_no_wrong_format() {
|
||||||
|
let msg = Message::with_cache_control(
|
||||||
|
MessageRole::System,
|
||||||
|
"Test".to_string(),
|
||||||
|
CacheControl::one_hour(),
|
||||||
|
);
|
||||||
|
let json = serde_json::to_string(&msg).unwrap();
|
||||||
|
|
||||||
|
println!("1-hour message JSON: {}", json);
|
||||||
|
|
||||||
|
// Should NOT contain the wrong format
|
||||||
|
assert!(!json.contains("system.0.cache_control"),
|
||||||
|
"JSON should not contain 'system.0.cache_control' path");
|
||||||
|
assert!(!json.contains("cache_control.ephemeral.ttl"),
|
||||||
|
"JSON should not contain 'cache_control.ephemeral.ttl' path");
|
||||||
|
|
||||||
|
// Should contain the correct format with ttl as a direct field
|
||||||
|
assert!(json.contains(r#""type":"ephemeral""#),
|
||||||
|
"JSON should contain type field");
|
||||||
|
assert!(json.contains(r#""ttl":"1h""#),
|
||||||
|
"JSON should contain ttl field with value 1h");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cache_control_structure_is_flat() {
|
||||||
|
// Verify that the cache_control object has a flat structure
|
||||||
|
// with 'type' and optional 'ttl' at the same level
|
||||||
|
|
||||||
|
let cache_control = CacheControl::five_minute();
|
||||||
|
let json_value = serde_json::to_value(&cache_control).unwrap();
|
||||||
|
|
||||||
|
println!("Cache control as JSON value: {}", serde_json::to_string_pretty(&json_value).unwrap());
|
||||||
|
|
||||||
|
let obj = json_value.as_object().expect("Should be an object");
|
||||||
|
|
||||||
|
// Should have exactly 2 keys at the top level
|
||||||
|
assert_eq!(obj.len(), 2, "Cache control should have exactly 2 top-level fields");
|
||||||
|
|
||||||
|
// Both 'type' and 'ttl' should be at the same level
|
||||||
|
assert!(obj.contains_key("type"), "Should have 'type' field");
|
||||||
|
assert!(obj.contains_key("ttl"), "Should have 'ttl' field");
|
||||||
|
|
||||||
|
// 'type' should be a string, not an object
|
||||||
|
assert!(obj["type"].is_string(), "'type' should be a string value");
|
||||||
|
|
||||||
|
// 'ttl' should be a string, not nested
|
||||||
|
assert!(obj["ttl"].is_string(), "'ttl' should be a string value");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_ephemeral_cache_control_structure() {
|
||||||
|
let cache_control = CacheControl::ephemeral();
|
||||||
|
let json_value = serde_json::to_value(&cache_control).unwrap();
|
||||||
|
|
||||||
|
println!("Ephemeral cache control as JSON value: {}", serde_json::to_string_pretty(&json_value).unwrap());
|
||||||
|
|
||||||
|
let obj = json_value.as_object().expect("Should be an object");
|
||||||
|
|
||||||
|
// Should have exactly 1 key (only 'type', no 'ttl')
|
||||||
|
assert_eq!(obj.len(), 1, "Ephemeral cache control should have exactly 1 top-level field");
|
||||||
|
|
||||||
|
// Should have 'type' field
|
||||||
|
assert!(obj.contains_key("type"), "Should have 'type' field");
|
||||||
|
|
||||||
|
// Should NOT have 'ttl' field
|
||||||
|
assert!(!obj.contains_key("ttl"), "Ephemeral should not have 'ttl' field");
|
||||||
|
|
||||||
|
// 'type' should be a string with value "ephemeral"
|
||||||
|
assert_eq!(obj["type"].as_str().unwrap(), "ephemeral");
|
||||||
|
}
|
||||||
164
crates/g3-providers/tests/cache_control_integration_test.rs
Normal file
164
crates/g3-providers/tests/cache_control_integration_test.rs
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
//! Integration tests for cache_control feature
|
||||||
|
//!
|
||||||
|
//! These tests verify that cache_control is correctly serialized in messages
|
||||||
|
//! for both Anthropic and Databricks providers.
|
||||||
|
|
||||||
|
use g3_providers::{CacheControl, Message, MessageRole};
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_ephemeral_cache_control_serialization() {
|
||||||
|
let cache_control = CacheControl::ephemeral();
|
||||||
|
let json = serde_json::to_value(&cache_control).unwrap();
|
||||||
|
|
||||||
|
println!("Ephemeral cache_control JSON: {}", serde_json::to_string(&json).unwrap());
|
||||||
|
|
||||||
|
assert_eq!(json, json!({
|
||||||
|
"type": "ephemeral"
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Verify no ttl field is present
|
||||||
|
assert!(!json.as_object().unwrap().contains_key("ttl"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_five_minute_cache_control_serialization() {
|
||||||
|
let cache_control = CacheControl::five_minute();
|
||||||
|
let json = serde_json::to_value(&cache_control).unwrap();
|
||||||
|
|
||||||
|
println!("5-minute cache_control JSON: {}", serde_json::to_string(&json).unwrap());
|
||||||
|
|
||||||
|
assert_eq!(json, json!({
|
||||||
|
"type": "ephemeral",
|
||||||
|
"ttl": "5m"
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_one_hour_cache_control_serialization() {
|
||||||
|
let cache_control = CacheControl::one_hour();
|
||||||
|
let json = serde_json::to_value(&cache_control).unwrap();
|
||||||
|
|
||||||
|
println!("1-hour cache_control JSON: {}", serde_json::to_string(&json).unwrap());
|
||||||
|
|
||||||
|
assert_eq!(json, json!({
|
||||||
|
"type": "ephemeral",
|
||||||
|
"ttl": "1h"
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_message_with_ephemeral_cache_control() {
|
||||||
|
let msg = Message::with_cache_control(
|
||||||
|
MessageRole::System,
|
||||||
|
"System prompt".to_string(),
|
||||||
|
CacheControl::ephemeral(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let json = serde_json::to_value(&msg).unwrap();
|
||||||
|
println!("Message with ephemeral cache_control: {}", serde_json::to_string(&json).unwrap());
|
||||||
|
|
||||||
|
let cache_control = json.get("cache_control").expect("cache_control field should exist");
|
||||||
|
assert_eq!(cache_control.get("type").unwrap(), "ephemeral");
|
||||||
|
assert!(!cache_control.as_object().unwrap().contains_key("ttl"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_message_with_five_minute_cache_control() {
|
||||||
|
let msg = Message::with_cache_control(
|
||||||
|
MessageRole::System,
|
||||||
|
"System prompt".to_string(),
|
||||||
|
CacheControl::five_minute(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let json = serde_json::to_value(&msg).unwrap();
|
||||||
|
println!("Message with 5-minute cache_control: {}", serde_json::to_string(&json).unwrap());
|
||||||
|
|
||||||
|
let cache_control = json.get("cache_control").expect("cache_control field should exist");
|
||||||
|
assert_eq!(cache_control.get("type").unwrap(), "ephemeral");
|
||||||
|
assert_eq!(cache_control.get("ttl").unwrap(), "5m");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_message_with_one_hour_cache_control() {
|
||||||
|
let msg = Message::with_cache_control(
|
||||||
|
MessageRole::System,
|
||||||
|
"System prompt".to_string(),
|
||||||
|
CacheControl::one_hour(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let json = serde_json::to_value(&msg).unwrap();
|
||||||
|
println!("Message with 1-hour cache_control: {}", serde_json::to_string(&json).unwrap());
|
||||||
|
|
||||||
|
let cache_control = json.get("cache_control").expect("cache_control field should exist");
|
||||||
|
assert_eq!(cache_control.get("type").unwrap(), "ephemeral");
|
||||||
|
assert_eq!(cache_control.get("ttl").unwrap(), "1h");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_message_without_cache_control() {
|
||||||
|
let msg = Message::new(MessageRole::User, "Hello".to_string());
|
||||||
|
|
||||||
|
let json = serde_json::to_value(&msg).unwrap();
|
||||||
|
println!("Message without cache_control: {}", serde_json::to_string(&json).unwrap());
|
||||||
|
|
||||||
|
// cache_control field should not be present when not set
|
||||||
|
assert!(!json.as_object().unwrap().contains_key("cache_control"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cache_control_json_format_ephemeral() {
|
||||||
|
let cache_control = CacheControl::ephemeral();
|
||||||
|
let json_str = serde_json::to_string(&cache_control).unwrap();
|
||||||
|
|
||||||
|
println!("Ephemeral JSON string: {}", json_str);
|
||||||
|
|
||||||
|
// Verify exact JSON format
|
||||||
|
assert_eq!(json_str, r#"{"type":"ephemeral"}"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cache_control_json_format_five_minute() {
|
||||||
|
let cache_control = CacheControl::five_minute();
|
||||||
|
let json_str = serde_json::to_string(&cache_control).unwrap();
|
||||||
|
|
||||||
|
println!("5-minute JSON string: {}", json_str);
|
||||||
|
|
||||||
|
// Verify exact JSON format
|
||||||
|
assert_eq!(json_str, r#"{"type":"ephemeral","ttl":"5m"}"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_cache_control_json_format_one_hour() {
|
||||||
|
let cache_control = CacheControl::one_hour();
|
||||||
|
let json_str = serde_json::to_string(&cache_control).unwrap();
|
||||||
|
|
||||||
|
println!("1-hour JSON string: {}", json_str);
|
||||||
|
|
||||||
|
// Verify exact JSON format
|
||||||
|
assert_eq!(json_str, r#"{"type":"ephemeral","ttl":"1h"}"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_deserialization_ephemeral() {
|
||||||
|
let json_str = r#"{"type":"ephemeral"}"#;
|
||||||
|
let cache_control: CacheControl = serde_json::from_str(json_str).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(cache_control.ttl, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_deserialization_five_minute() {
|
||||||
|
let json_str = r#"{"type":"ephemeral","ttl":"5m"}"#;
|
||||||
|
let cache_control: CacheControl = serde_json::from_str(json_str).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(cache_control.ttl, Some("5m".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_deserialization_one_hour() {
|
||||||
|
let json_str = r#"{"type":"ephemeral","ttl":"1h"}"#;
|
||||||
|
let cache_control: CacheControl = serde_json::from_str(json_str).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(cache_control.ttl, Some("1h".to_string()));
|
||||||
|
}
|
||||||
39
test-ai-requirements.sh
Executable file
39
test-ai-requirements.sh
Executable file
@@ -0,0 +1,39 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Test script for AI-enhanced interactive requirements mode
|
||||||
|
|
||||||
|
echo "Testing AI-enhanced interactive requirements mode..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Create a test workspace
|
||||||
|
TEST_WORKSPACE="/tmp/g3-test-interactive-$(date +%s)"
|
||||||
|
mkdir -p "$TEST_WORKSPACE"
|
||||||
|
|
||||||
|
echo "Test workspace: $TEST_WORKSPACE"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Create sample brief input
|
||||||
|
BRIEF_INPUT="build a calculator cli in rust with basic operations"
|
||||||
|
|
||||||
|
echo "Brief input:"
|
||||||
|
echo "---"
|
||||||
|
echo "$BRIEF_INPUT"
|
||||||
|
echo "---"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "This will:"
|
||||||
|
echo "1. Send brief input to AI"
|
||||||
|
echo "2. AI generates structured requirements.md"
|
||||||
|
echo "3. Show enhanced requirements"
|
||||||
|
echo "4. Prompt for confirmation (y/e/n)"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "To test manually, run:"
|
||||||
|
echo "cargo run -- --autonomous --interactive-requirements --workspace $TEST_WORKSPACE"
|
||||||
|
echo ""
|
||||||
|
echo "Then type: $BRIEF_INPUT"
|
||||||
|
echo "Press Ctrl+D"
|
||||||
|
echo "Review the AI-generated requirements"
|
||||||
|
echo "Choose 'y' to proceed, 'e' to edit, or 'n' to cancel"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
echo "Test workspace will be at: $TEST_WORKSPACE"
|
||||||
70
test_anthropic_fix.md
Normal file
70
test_anthropic_fix.md
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# Anthropic max_tokens Error Fix - Test Plan
|
||||||
|
|
||||||
|
## Changes Made
|
||||||
|
|
||||||
|
### 1. Fixed Context Window Size Detection
|
||||||
|
- **Problem**: Code used hardcoded 200k limit for Anthropic instead of configured max_tokens
|
||||||
|
- **Fix**: Modified `determine_context_length()` to check configured max_tokens first before falling back to defaults
|
||||||
|
- **Files**: `crates/g3-core/src/lib.rs` lines 923-945, 967-985
|
||||||
|
|
||||||
|
### 2. Added Thinning Before Summarization
|
||||||
|
- **Problem**: Code attempted summarization even when context window was nearly full
|
||||||
|
- **Fix**: Added logic to try thinning first when context usage is between 80-90%
|
||||||
|
- **Files**: `crates/g3-core/src/lib.rs` lines 2415-2439
|
||||||
|
|
||||||
|
### 3. Added Capacity Checks Before Summarization
|
||||||
|
- **Problem**: No validation that sufficient tokens remained for summarization
|
||||||
|
- **Fix**: Added capacity checks for all provider types with helpful error messages
|
||||||
|
- **Files**: `crates/g3-core/src/lib.rs` lines 2480-2520
|
||||||
|
|
||||||
|
### 4. Improved Error Messages
|
||||||
|
- **Problem**: Generic errors when summarization failed
|
||||||
|
- **Fix**: Specific error messages suggesting `/thinnify` and `/compact` commands
|
||||||
|
- **Files**: Multiple locations in summarization logic
|
||||||
|
|
||||||
|
### 5. Dynamic Buffer Calculation
|
||||||
|
- **Problem**: Fixed 5k buffer regardless of model size
|
||||||
|
- **Fix**: Proportional buffer (2.5% of model limit, min 1k, max 10k)
|
||||||
|
- **Files**: `crates/g3-core/src/lib.rs` line 2487
|
||||||
|
|
||||||
|
## Test Cases
|
||||||
|
|
||||||
|
### Test 1: Configured max_tokens Respected
|
||||||
|
```toml
|
||||||
|
# In g3.toml
|
||||||
|
[providers.anthropic]
|
||||||
|
api_key = "your-key"
|
||||||
|
model = "claude-3-5-sonnet-20241022"
|
||||||
|
max_tokens = 50000 # Should use this instead of 200k default
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test 2: Thinning Before Summarization
|
||||||
|
- Fill context to 85% capacity
|
||||||
|
- Verify thinning is attempted before summarization
|
||||||
|
- Check that summarization is skipped if thinning resolves the issue
|
||||||
|
|
||||||
|
### Test 3: Capacity Error Handling
|
||||||
|
- Fill context to 98% capacity
|
||||||
|
- Verify helpful error message is shown instead of API error
|
||||||
|
- Check that `/thinnify` and `/compact` commands are suggested
|
||||||
|
|
||||||
|
### Test 4: Provider-Specific Handling
|
||||||
|
- Test with different providers (anthropic, databricks, embedded)
|
||||||
|
- Verify each uses appropriate capacity checks and buffers
|
||||||
|
|
||||||
|
## Expected Behavior
|
||||||
|
|
||||||
|
1. **No more max_tokens API errors** from Anthropic when context window is full
|
||||||
|
2. **Automatic thinning** when approaching capacity (80-90%)
|
||||||
|
3. **Clear error messages** with actionable suggestions when at capacity
|
||||||
|
4. **Respect configured limits** instead of hardcoded defaults
|
||||||
|
5. **Graceful degradation** with helpful user guidance
|
||||||
|
|
||||||
|
## Manual Testing Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Test with small max_tokens to trigger the issue quickly
|
||||||
|
g3 --chat
|
||||||
|
# Then paste large amounts of text to fill context window
|
||||||
|
# Verify thinning and error handling work correctly
|
||||||
|
```
|
||||||
Reference in New Issue
Block a user