Merge sessions/hopper/1156b5c9

This commit is contained in:
Dhanji R. Prasanna
2026-01-12 11:53:14 +05:30
4 changed files with 1358 additions and 0 deletions

View File

@@ -0,0 +1,236 @@
//! Compaction Behavior Integration Tests
//!
//! CHARACTERIZATION: These tests verify the observable behavior of context
//! compaction through stable public interfaces.
//!
//! What these tests protect:
//! - Compaction configuration calculation (token caps, thinking mode)
//! - Summary message building from conversation history
//! - Compaction result handling (success/failure)
//!
//! What these tests intentionally do NOT assert:
//! - Internal implementation details of compaction
//! - Specific LLM responses (mocked at provider boundary)
//! - Exact token counts (only relative behavior)
use g3_core::compaction::{
calculate_capped_summary_tokens, should_disable_thinking, build_summary_messages,
CompactionResult, SUMMARY_MIN_TOKENS,
};
use g3_core::ContextWindow;
use g3_providers::{Message, MessageRole};
// =============================================================================
// Test: Token cap calculation for different providers
// =============================================================================
mod token_cap_calculation {
use super::*;
/// Test that Anthropic provider gets appropriate token caps
#[test]
fn test_anthropic_token_cap() {
let config = g3_config::Config::default();
// Large base tokens should be capped
let capped = calculate_capped_summary_tokens(&config, "anthropic", 50000);
assert!(capped <= 10000, "Anthropic should cap at 10000 by default, got {}", capped);
assert!(capped >= SUMMARY_MIN_TOKENS, "Should respect minimum floor");
}
/// Test that Databricks provider gets appropriate token caps
#[test]
fn test_databricks_token_cap() {
let config = g3_config::Config::default();
let capped = calculate_capped_summary_tokens(&config, "databricks", 50000);
assert!(capped <= 10000, "Databricks should cap at 10000, got {}", capped);
assert!(capped >= SUMMARY_MIN_TOKENS, "Should respect minimum floor");
}
/// Test that embedded provider gets lower token caps
#[test]
fn test_embedded_token_cap() {
let config = g3_config::Config::default();
let capped = calculate_capped_summary_tokens(&config, "embedded", 50000);
assert!(capped <= 3000, "Embedded should cap at 3000, got {}", capped);
assert!(capped >= SUMMARY_MIN_TOKENS, "Should respect minimum floor");
}
/// Test that unknown providers get conservative caps
#[test]
fn test_unknown_provider_token_cap() {
let config = g3_config::Config::default();
let capped = calculate_capped_summary_tokens(&config, "unknown_provider", 50000);
assert!(capped <= 5000, "Unknown providers should cap at 5000, got {}", capped);
assert!(capped >= SUMMARY_MIN_TOKENS, "Should respect minimum floor");
}
/// Test that small base tokens are preserved (not increased)
#[test]
fn test_small_base_tokens_preserved() {
let config = g3_config::Config::default();
// If base is already small, it should be preserved (but not below minimum)
let capped = calculate_capped_summary_tokens(&config, "anthropic", 2000);
assert_eq!(capped, 2000, "Small base tokens should be preserved");
}
/// Test minimum floor is enforced
#[test]
fn test_minimum_floor_enforced() {
let config = g3_config::Config::default();
// Even with very small base, minimum should be enforced
let capped = calculate_capped_summary_tokens(&config, "anthropic", 100);
assert_eq!(capped, SUMMARY_MIN_TOKENS, "Minimum floor should be enforced");
}
}
// =============================================================================
// Test: Thinking mode disable logic
// =============================================================================
mod thinking_mode_disable {
use super::*;
/// Test that thinking mode is not disabled when no thinking config exists
#[test]
fn test_no_thinking_config_no_disable() {
let config = g3_config::Config::default();
// Without thinking config, should never disable
let should_disable = should_disable_thinking(&config, "anthropic", 5000);
assert!(!should_disable, "Should not disable thinking when no config exists");
}
/// Test that non-Anthropic providers don't trigger thinking disable
#[test]
fn test_non_anthropic_no_thinking_disable() {
let config = g3_config::Config::default();
// Non-Anthropic providers don't have thinking mode
let should_disable = should_disable_thinking(&config, "databricks", 1000);
assert!(!should_disable, "Non-Anthropic providers should not disable thinking");
}
}
// =============================================================================
// Test: Summary message building
// =============================================================================
mod summary_message_building {
use super::*;
/// Test that summary messages are built correctly from conversation
#[test]
fn test_build_summary_messages_basic() {
let mut context = ContextWindow::new(10000);
// Add a simple conversation
context.add_message(Message::new(
MessageRole::System,
"You are a helpful assistant.".to_string(),
));
context.add_message(Message::new(
MessageRole::User,
"Hello, how are you?".to_string(),
));
context.add_message(Message::new(
MessageRole::Assistant,
"I'm doing well, thank you!".to_string(),
));
let messages = build_summary_messages(&context);
// Should have exactly 2 messages: system prompt and user request
assert_eq!(messages.len(), 2, "Should have system and user messages");
// First should be system message for summarization
assert!(matches!(messages[0].role, MessageRole::System));
assert!(messages[0].content.contains("concise summaries"));
// Second should be user message with conversation
assert!(matches!(messages[1].role, MessageRole::User));
assert!(messages[1].content.contains("Hello, how are you?"));
assert!(messages[1].content.contains("I'm doing well"));
}
/// Test that empty conversation produces valid summary request
#[test]
fn test_build_summary_messages_empty_conversation() {
let context = ContextWindow::new(10000);
let messages = build_summary_messages(&context);
// Should still produce valid structure
assert_eq!(messages.len(), 2);
assert!(matches!(messages[0].role, MessageRole::System));
assert!(matches!(messages[1].role, MessageRole::User));
}
/// Test that long conversations are included in summary request
#[test]
fn test_build_summary_messages_long_conversation() {
let mut context = ContextWindow::new(100000);
// Add many messages
for i in 0..50 {
context.add_message(Message::new(
MessageRole::User,
format!("User message number {}", i),
));
context.add_message(Message::new(
MessageRole::Assistant,
format!("Assistant response number {}", i),
));
}
let messages = build_summary_messages(&context);
// Should include all conversation content
let user_content = &messages[1].content;
assert!(user_content.contains("User message number 0"));
assert!(user_content.contains("User message number 49"));
assert!(user_content.contains("Assistant response number 49"));
}
}
// =============================================================================
// Test: CompactionResult behavior
// =============================================================================
mod compaction_result {
use super::*;
/// Test success result creation
#[test]
fn test_success_result() {
let result = CompactionResult::success(5000);
assert!(result.success);
assert_eq!(result.chars_saved, 5000);
assert!(result.error.is_none());
}
/// Test failure result creation
#[test]
fn test_failure_result() {
let result = CompactionResult::failure("API error".to_string());
assert!(!result.success);
assert_eq!(result.chars_saved, 0);
assert_eq!(result.error, Some("API error".to_string()));
}
/// Test zero chars saved is valid success
#[test]
fn test_zero_chars_saved_success() {
let result = CompactionResult::success(0);
assert!(result.success);
assert_eq!(result.chars_saved, 0);
}
}

View File

@@ -0,0 +1,356 @@
//! Error Classification Integration Tests
//!
//! CHARACTERIZATION: These tests verify the observable behavior of error
//! classification through stable public interfaces.
//!
//! What these tests protect:
//! - Error messages are correctly classified as recoverable/non-recoverable
//! - Specific error types (rate limit, timeout, server error) are detected
//! - Retry delay calculation produces reasonable values
//!
//! What these tests intentionally do NOT assert:
//! - Exact delay values (only ranges and relative behavior)
//! - Internal classification implementation details
use g3_core::error_handling::{
classify_error, calculate_retry_delay, ErrorType, RecoverableError,
};
// =============================================================================
// Test: Error classification for recoverable errors
// =============================================================================
mod recoverable_error_classification {
use super::*;
/// Test rate limit errors are classified as recoverable
#[test]
fn test_rate_limit_detected() {
let error = anyhow::anyhow!("Rate limit exceeded");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::RateLimit)),
"Rate limit should be recoverable: {:?}", error_type
);
}
/// Test 429 status code is classified as rate limit
#[test]
fn test_429_status_detected() {
let error = anyhow::anyhow!("HTTP 429 Too Many Requests");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::RateLimit)),
"429 should be rate limit: {:?}", error_type
);
}
/// Test timeout errors are classified as recoverable
#[test]
fn test_timeout_detected() {
let error = anyhow::anyhow!("Request timed out");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::Timeout)),
"Timeout should be recoverable: {:?}", error_type
);
}
/// Test server errors (5xx) are classified as recoverable
#[test]
fn test_server_error_500_detected() {
let error = anyhow::anyhow!("Server error 500");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::ServerError)),
"500 should be server error: {:?}", error_type
);
}
/// Test 502 Bad Gateway is classified as server error
#[test]
fn test_server_error_502_detected() {
let error = anyhow::anyhow!("502 Bad Gateway");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::ServerError)),
"502 should be server error: {:?}", error_type
);
}
/// Test 503 Service Unavailable is classified as server error
#[test]
fn test_server_error_503_detected() {
let error = anyhow::anyhow!("503 Service Unavailable");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::ServerError)),
"503 should be server error: {:?}", error_type
);
}
/// Test network errors are classified as recoverable
#[test]
fn test_network_error_detected() {
let error = anyhow::anyhow!("Connection refused");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::NetworkError)),
"Connection refused should be network error: {:?}", error_type
);
}
/// Test connection reset is classified as network error
#[test]
fn test_connection_reset_detected() {
let error = anyhow::anyhow!("Connection reset by peer");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::NetworkError)),
"Connection reset should be network error: {:?}", error_type
);
}
/// Test "overloaded" is classified as busy
#[test]
fn test_model_busy_detected() {
let error = anyhow::anyhow!("Server is overloaded");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::ModelBusy)),
"Overloaded should be model busy: {:?}", error_type
);
}
/// Test context length exceeded requires 400 status code
/// CHARACTERIZATION: The error must contain "400" or "bad request" along with
/// context length keywords to be classified as ContextLengthExceeded
#[test]
fn test_context_length_exceeded_detected() {
let error = anyhow::anyhow!("400 Bad Request: context_length_exceeded: too many tokens");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)),
"Context length exceeded should be detected: {:?}", error_type
);
}
/// Test token limit exceeded is classified correctly
/// CHARACTERIZATION: Must contain "token" AND ("limit" OR "exceeded")
#[test]
fn test_token_limit_detected() {
let error = anyhow::anyhow!("token limit exceeded");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::TokenLimit)),
"Token limit should be detected: {:?}", error_type
);
}
}
// =============================================================================
// Test: Error classification for non-recoverable errors
// =============================================================================
mod non_recoverable_error_classification {
use super::*;
/// Test invalid API key is non-recoverable
#[test]
fn test_invalid_api_key_non_recoverable() {
let error = anyhow::anyhow!("Invalid API key");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::NonRecoverable),
"Invalid API key should be non-recoverable: {:?}", error_type
);
}
/// Test authentication failure is non-recoverable
#[test]
fn test_auth_failure_non_recoverable() {
let error = anyhow::anyhow!("Authentication failed");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::NonRecoverable),
"Auth failure should be non-recoverable: {:?}", error_type
);
}
/// Test generic errors are non-recoverable
#[test]
fn test_generic_error_non_recoverable() {
let error = anyhow::anyhow!("Something went wrong");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::NonRecoverable),
"Generic error should be non-recoverable: {:?}", error_type
);
}
/// Test 401 Unauthorized is non-recoverable
#[test]
fn test_401_non_recoverable() {
let error = anyhow::anyhow!("401 Unauthorized");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::NonRecoverable),
"401 should be non-recoverable: {:?}", error_type
);
}
/// Test 403 Forbidden is non-recoverable
#[test]
fn test_403_non_recoverable() {
let error = anyhow::anyhow!("403 Forbidden");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::NonRecoverable),
"403 should be non-recoverable: {:?}", error_type
);
}
}
// =============================================================================
// Test: Retry delay calculation
// =============================================================================
mod retry_delay_calculation {
use super::*;
use std::time::Duration;
/// Test first retry has reasonable delay
#[test]
fn test_first_retry_delay() {
let delay = calculate_retry_delay(1, false);
// First retry should be around 1-2 seconds (with jitter)
assert!(delay >= Duration::from_millis(500), "Delay should be at least 500ms: {:?}", delay);
assert!(delay <= Duration::from_secs(5), "Delay should be at most 5s: {:?}", delay);
}
/// Test delays increase with retry count
#[test]
fn test_delays_increase() {
let delay1 = calculate_retry_delay(1, false);
let delay2 = calculate_retry_delay(2, false);
let delay3 = calculate_retry_delay(3, false);
// Later retries should generally have longer delays
// (accounting for jitter, we check the trend)
assert!(delay2 >= delay1 || delay3 >= delay2,
"Delays should generally increase: {:?} -> {:?} -> {:?}", delay1, delay2, delay3);
}
/// Test autonomous mode has different delays
#[test]
fn test_autonomous_mode_delays() {
let default_delay = calculate_retry_delay(3, false);
let autonomous_delay = calculate_retry_delay(3, true);
// Autonomous mode should have longer delays (spread over 10 minutes)
// But with jitter, we just check they're both reasonable
assert!(default_delay <= Duration::from_secs(30),
"Default delay should be reasonable: {:?}", default_delay);
assert!(autonomous_delay <= Duration::from_secs(180),
"Autonomous delay should be reasonable: {:?}", autonomous_delay);
}
/// Test delays are capped at maximum
#[test]
fn test_delay_cap() {
// Even with high retry count, delay should be capped
let delay = calculate_retry_delay(10, false);
assert!(delay <= Duration::from_secs(15),
"Default mode delay should be capped: {:?}", delay);
}
/// Test autonomous mode delay cap
/// CHARACTERIZATION: Autonomous mode uses longer delays spread over 10 minutes
#[test]
fn test_autonomous_delay_cap() {
let delay = calculate_retry_delay(10, true);
// Autonomous mode has longer delays (up to ~200s + jitter)
assert!(delay <= Duration::from_secs(300),
"Autonomous delay should be capped: {:?}", delay);
}
}
// =============================================================================
// Test: Edge cases and priority
// =============================================================================
mod edge_cases {
use super::*;
/// Test error with multiple keywords uses correct priority
#[test]
fn test_rate_limit_priority_over_timeout() {
// Rate limit should take priority
let error = anyhow::anyhow!("Rate limit exceeded after timeout");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::RateLimit)),
"Rate limit should take priority: {:?}", error_type
);
}
/// Test case insensitivity
#[test]
fn test_case_insensitive_detection() {
let error = anyhow::anyhow!("RATE LIMIT EXCEEDED");
let error_type = classify_error(&error);
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::RateLimit)),
"Should detect uppercase: {:?}", error_type
);
}
/// Test empty error message
#[test]
fn test_empty_error_message() {
let error = anyhow::anyhow!("");
let error_type = classify_error(&error);
// Empty message should be non-recoverable
assert!(
matches!(error_type, ErrorType::NonRecoverable),
"Empty error should be non-recoverable: {:?}", error_type
);
}
/// Test connection timeout is network error (not timeout)
/// Note: This documents the current behavior where "connection" keyword
/// takes priority over "timeout"
#[test]
fn test_connection_timeout_classification() {
let error = anyhow::anyhow!("Connection timeout");
let error_type = classify_error(&error);
// Per memory: "Connection timeout" classifies as NetworkError due to "connection" keyword priority
assert!(
matches!(error_type, ErrorType::Recoverable(RecoverableError::NetworkError)),
"Connection timeout should be network error (per priority): {:?}", error_type
);
}
}

View File

@@ -0,0 +1,300 @@
//! Retry Behavior Integration Tests
//!
//! CHARACTERIZATION: These tests verify the observable behavior of retry
//! infrastructure through stable public interfaces.
//!
//! What these tests protect:
//! - RetryConfig construction and presets
//! - RetryResult state transitions
//! - retry_operation behavior with simulated errors
//!
//! What these tests intentionally do NOT assert:
//! - Internal timing details (only that delays occur)
//! - Specific backoff calculations (only that they increase)
//! - Agent internals (tested via execute_with_retry separately)
use g3_core::retry::{RetryConfig, RetryResult, retry_operation};
use g3_core::ContextWindow;
use g3_core::TaskResult;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
// =============================================================================
// Test: RetryConfig presets and customization
// =============================================================================
mod retry_config_presets {
use super::*;
/// Test default config values
#[test]
fn test_default_config() {
let config = RetryConfig::default();
assert_eq!(config.max_retries, 3);
assert!(!config.is_autonomous);
assert_eq!(config.role_name, "agent");
}
/// Test player preset
#[test]
fn test_player_preset() {
let config = RetryConfig::player();
assert_eq!(config.max_retries, 3);
assert!(config.is_autonomous, "Player should be autonomous");
assert_eq!(config.role_name, "player");
}
/// Test coach preset
#[test]
fn test_coach_preset() {
let config = RetryConfig::coach();
assert_eq!(config.max_retries, 3);
assert!(config.is_autonomous, "Coach should be autonomous");
assert_eq!(config.role_name, "coach");
}
/// Test planning preset with custom role
#[test]
fn test_planning_preset() {
let config = RetryConfig::planning("reviewer");
assert_eq!(config.max_retries, 3);
assert!(config.is_autonomous, "Planning should be autonomous");
assert_eq!(config.role_name, "reviewer");
}
/// Test custom max retries
#[test]
fn test_custom_max_retries() {
let config = RetryConfig::player().with_max_retries(10);
assert_eq!(config.max_retries, 10);
// Other fields should be preserved
assert!(config.is_autonomous);
assert_eq!(config.role_name, "player");
}
/// Test chaining customizations
#[test]
fn test_chained_customization() {
let config = RetryConfig::default()
.with_max_retries(5);
assert_eq!(config.max_retries, 5);
assert!(!config.is_autonomous); // Default is not autonomous
}
}
// =============================================================================
// Test: RetryResult state handling
// =============================================================================
mod retry_result_states {
use super::*;
/// Test success result
#[test]
fn test_success_is_success() {
let ctx = ContextWindow::new(1000);
let result = RetryResult::Success(TaskResult::new("done".to_string(), ctx));
assert!(result.is_success());
}
/// Test max retries reached is not success
#[test]
fn test_max_retries_not_success() {
let result = RetryResult::MaxRetriesReached("timeout".to_string());
assert!(!result.is_success());
}
/// Test context length exceeded is not success
#[test]
fn test_context_exceeded_not_success() {
let result = RetryResult::ContextLengthExceeded("too long".to_string());
assert!(!result.is_success());
}
/// Test panic is not success
#[test]
fn test_panic_not_success() {
let result = RetryResult::Panic(anyhow::anyhow!("panic occurred"));
assert!(!result.is_success());
}
/// Test into_result extracts TaskResult on success
#[test]
fn test_into_result_success() {
let ctx = ContextWindow::new(1000);
let result = RetryResult::Success(TaskResult::new("done".to_string(), ctx));
let task_result = result.into_result();
assert!(task_result.is_some());
assert_eq!(task_result.unwrap().response, "done");
}
/// Test into_result returns None on failure
#[test]
fn test_into_result_failure() {
let result = RetryResult::MaxRetriesReached("error".to_string());
let task_result = result.into_result();
assert!(task_result.is_none());
}
}
// =============================================================================
// Test: retry_operation behavior
// =============================================================================
mod retry_operation_behavior {
use super::*;
/// Test successful operation on first try
#[tokio::test]
async fn test_success_first_try() {
let call_count = Arc::new(AtomicU32::new(0));
let call_count_clone = call_count.clone();
let result = retry_operation(
"test_op",
|| {
let count = call_count_clone.clone();
async move {
count.fetch_add(1, Ordering::SeqCst);
Ok::<_, anyhow::Error>("success")
}
},
3,
false,
|_msg| {},
).await;
assert!(result.is_ok());
assert_eq!(result.unwrap(), "success");
assert_eq!(call_count.load(Ordering::SeqCst), 1, "Should only call once on success");
}
/// Test non-recoverable error fails immediately
#[tokio::test]
async fn test_non_recoverable_fails_immediately() {
let call_count = Arc::new(AtomicU32::new(0));
let call_count_clone = call_count.clone();
let result = retry_operation(
"test_op",
|| {
let count = call_count_clone.clone();
async move {
count.fetch_add(1, Ordering::SeqCst);
Err::<String, _>(anyhow::anyhow!("Invalid API key"))
}
},
3,
false,
|_msg| {},
).await;
assert!(result.is_err());
assert_eq!(call_count.load(Ordering::SeqCst), 1, "Non-recoverable should not retry");
}
/// Test recoverable error retries up to max
#[tokio::test]
async fn test_recoverable_retries_to_max() {
let call_count = Arc::new(AtomicU32::new(0));
let call_count_clone = call_count.clone();
let result = retry_operation(
"test_op",
|| {
let count = call_count_clone.clone();
async move {
count.fetch_add(1, Ordering::SeqCst);
// Rate limit is a recoverable error
Err::<String, _>(anyhow::anyhow!("Rate limit exceeded"))
}
},
3, // max retries
false,
|_msg| {},
).await;
assert!(result.is_err());
// Should try initial + max_retries times
assert_eq!(call_count.load(Ordering::SeqCst), 3, "Should retry up to max");
}
/// Test recoverable error succeeds on retry
#[tokio::test]
async fn test_recoverable_succeeds_on_retry() {
let call_count = Arc::new(AtomicU32::new(0));
let call_count_clone = call_count.clone();
let result = retry_operation(
"test_op",
|| {
let count = call_count_clone.clone();
async move {
let current = count.fetch_add(1, Ordering::SeqCst);
if current < 2 {
// Fail first two times with recoverable error
Err(anyhow::anyhow!("Server error 500"))
} else {
// Succeed on third try
Ok("success after retry")
}
}
},
5, // max retries
false,
|_msg| {},
).await;
assert!(result.is_ok());
assert_eq!(result.unwrap(), "success after retry");
assert_eq!(call_count.load(Ordering::SeqCst), 3, "Should succeed on third try");
}
/// Test print function is called on retry
#[tokio::test]
async fn test_print_fn_called_on_retry() {
let messages = Arc::new(std::sync::Mutex::new(Vec::new()));
let messages_clone = messages.clone();
let call_count = Arc::new(AtomicU32::new(0));
let call_count_clone = call_count.clone();
let _ = retry_operation(
"test_op",
|| {
let count = call_count_clone.clone();
async move {
let current = count.fetch_add(1, Ordering::SeqCst);
if current < 1 {
Err(anyhow::anyhow!("Rate limit exceeded"))
} else {
Ok("success")
}
}
},
3,
false,
|msg| {
messages_clone.lock().unwrap().push(msg.to_string());
},
).await;
let msgs = messages.lock().unwrap();
assert!(!msgs.is_empty(), "Should have printed retry messages");
// Should mention the error type
assert!(msgs.iter().any(|m| m.contains("RateLimit") || m.contains("rate")),
"Should mention rate limit in messages: {:?}", msgs);
}
}

View File

@@ -0,0 +1,466 @@
//! Tool Execution Round-Trip Integration Tests
//!
//! CHARACTERIZATION: These tests verify that tools execute correctly through
//! the Agent interface, testing the full round-trip from tool call to result.
//!
//! What these tests protect:
//! - File operations (read, write, str_replace) work end-to-end
//! - Shell command execution produces expected output
//! - TODO operations persist correctly
//! - Error handling for invalid inputs
//!
//! What these tests intentionally do NOT assert:
//! - Internal implementation details of tools
//! - Specific formatting of success messages (only key content)
//! - UI writer behavior (uses NullUiWriter)
use g3_core::ui_writer::NullUiWriter;
use g3_core::{Agent, ToolCall};
use serial_test::serial;
use std::fs;
use tempfile::TempDir;
// =============================================================================
// Test Helpers
// =============================================================================
/// Create a test agent in a temporary directory
async fn create_test_agent(temp_dir: &TempDir) -> Agent<NullUiWriter> {
std::env::set_current_dir(temp_dir.path()).unwrap();
let config = g3_config::Config::default();
let ui_writer = NullUiWriter;
Agent::new(config, ui_writer).await.unwrap()
}
/// Create a ToolCall with the given tool name and arguments
fn make_tool_call(tool: &str, args: serde_json::Value) -> ToolCall {
ToolCall {
tool: tool.to_string(),
args,
}
}
// =============================================================================
// Test: read_file tool execution
// =============================================================================
mod read_file_execution {
use super::*;
/// Test reading an existing file
#[tokio::test]
#[serial]
async fn test_read_existing_file() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("test.txt");
fs::write(&test_file, "Hello, World!\nLine 2\nLine 3").unwrap();
let mut agent = create_test_agent(&temp_dir).await;
let tool_call = make_tool_call(
"read_file",
serde_json::json!({ "file_path": test_file.to_string_lossy() }),
);
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("Hello, World!"), "Should contain file content: {}", result);
assert!(result.contains("Line 2"), "Should contain all lines: {}", result);
}
/// Test reading a non-existent file returns error
#[tokio::test]
#[serial]
async fn test_read_nonexistent_file() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent(&temp_dir).await;
let tool_call = make_tool_call(
"read_file",
serde_json::json!({ "file_path": "/nonexistent/path/file.txt" }),
);
let result = agent.execute_tool(&tool_call).await;
// Should return an error or error message
assert!(
result.is_err() || result.as_ref().unwrap().contains("error") || result.as_ref().unwrap().contains("not found") || result.as_ref().unwrap().contains("No such file"),
"Should indicate file not found: {:?}", result
);
}
/// Test reading with character range
#[tokio::test]
#[serial]
async fn test_read_file_with_range() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("test.txt");
fs::write(&test_file, "0123456789ABCDEF").unwrap();
let mut agent = create_test_agent(&temp_dir).await;
let tool_call = make_tool_call(
"read_file",
serde_json::json!({
"file_path": test_file.to_string_lossy(),
"start": 5,
"end": 10
}),
);
let result = agent.execute_tool(&tool_call).await.unwrap();
// Should contain the substring from position 5 to 10
assert!(result.contains("56789"), "Should contain range content: {}", result);
}
}
// =============================================================================
// Test: write_file tool execution
// =============================================================================
mod write_file_execution {
use super::*;
/// Test writing a new file
#[tokio::test]
#[serial]
async fn test_write_new_file() {
let temp_dir = TempDir::new().unwrap();
let new_file = temp_dir.path().join("new_file.txt");
assert!(!new_file.exists(), "File should not exist initially");
let mut agent = create_test_agent(&temp_dir).await;
let tool_call = make_tool_call(
"write_file",
serde_json::json!({
"file_path": new_file.to_string_lossy(),
"content": "New content here"
}),
);
let result = agent.execute_tool(&tool_call).await.unwrap();
// Should report success
assert!(result.contains("") || result.to_lowercase().contains("success") || result.to_lowercase().contains("wrote"),
"Should report success: {}", result);
// File should now exist with correct content
assert!(new_file.exists(), "File should exist after write");
let content = fs::read_to_string(&new_file).unwrap();
assert_eq!(content, "New content here");
}
/// Test overwriting an existing file
#[tokio::test]
#[serial]
async fn test_overwrite_existing_file() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("existing.txt");
fs::write(&test_file, "Original content").unwrap();
let mut agent = create_test_agent(&temp_dir).await;
let tool_call = make_tool_call(
"write_file",
serde_json::json!({
"file_path": test_file.to_string_lossy(),
"content": "Replaced content"
}),
);
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("") || result.to_lowercase().contains("success") || result.to_lowercase().contains("wrote"),
"Should report success: {}", result);
let content = fs::read_to_string(&test_file).unwrap();
assert_eq!(content, "Replaced content");
}
/// Test writing creates parent directories
#[tokio::test]
#[serial]
async fn test_write_creates_parent_dirs() {
let temp_dir = TempDir::new().unwrap();
let nested_file = temp_dir.path().join("a/b/c/nested.txt");
let mut agent = create_test_agent(&temp_dir).await;
let tool_call = make_tool_call(
"write_file",
serde_json::json!({
"file_path": nested_file.to_string_lossy(),
"content": "Nested content"
}),
);
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("") || result.to_lowercase().contains("success") || result.to_lowercase().contains("wrote"),
"Should report success: {}", result);
assert!(nested_file.exists(), "Nested file should exist");
let content = fs::read_to_string(&nested_file).unwrap();
assert_eq!(content, "Nested content");
}
}
// =============================================================================
// Test: shell tool execution
// =============================================================================
mod shell_execution {
use super::*;
/// Test simple echo command
#[tokio::test]
#[serial]
async fn test_shell_echo() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent(&temp_dir).await;
let tool_call = make_tool_call(
"shell",
serde_json::json!({ "command": "echo 'hello world'" }),
);
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("hello world"), "Should contain echo output: {}", result);
}
/// Test command that produces multi-line output
#[tokio::test]
#[serial]
async fn test_shell_multiline_output() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent(&temp_dir).await;
let tool_call = make_tool_call(
"shell",
serde_json::json!({ "command": "echo 'line1'; echo 'line2'; echo 'line3'" }),
);
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("line1"), "Should contain line1: {}", result);
assert!(result.contains("line2"), "Should contain line2: {}", result);
assert!(result.contains("line3"), "Should contain line3: {}", result);
}
/// Test command that fails
#[tokio::test]
#[serial]
async fn test_shell_failing_command() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent(&temp_dir).await;
let tool_call = make_tool_call(
"shell",
serde_json::json!({ "command": "exit 1" }),
);
let result = agent.execute_tool(&tool_call).await;
// Should indicate failure (either error or non-zero exit)
assert!(
result.is_err() || result.as_ref().unwrap().contains("exit") || result.as_ref().unwrap().contains("failed") || result.as_ref().unwrap().contains("error"),
"Should indicate command failure: {:?}", result
);
}
/// Test command with working directory context
#[tokio::test]
#[serial]
async fn test_shell_pwd() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent(&temp_dir).await;
let tool_call = make_tool_call(
"shell",
serde_json::json!({ "command": "pwd" }),
);
let result = agent.execute_tool(&tool_call).await.unwrap();
// Should show the temp directory path
let temp_path = temp_dir.path().to_string_lossy();
assert!(result.contains(&*temp_path) || result.contains("private"),
"Should show current directory: {} (expected to contain {})", result, temp_path);
}
}
// =============================================================================
// Test: str_replace tool execution
// =============================================================================
mod str_replace_execution {
use super::*;
/// Test applying a simple diff
#[tokio::test]
#[serial]
async fn test_str_replace_simple() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("test.txt");
fs::write(&test_file, "line 1\nold line\nline 3\n").unwrap();
let mut agent = create_test_agent(&temp_dir).await;
let diff = "@@ -1,3 +1,3 @@\n line 1\n-old line\n+new line\n line 3\n";
let tool_call = make_tool_call(
"str_replace",
serde_json::json!({
"file_path": test_file.to_string_lossy(),
"diff": diff
}),
);
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("") || result.to_lowercase().contains("applied") || result.to_lowercase().contains("success"),
"Should report success: {}", result);
let content = fs::read_to_string(&test_file).unwrap();
assert!(content.contains("new line"), "Should contain new content: {}", content);
assert!(!content.contains("old line"), "Should not contain old content: {}", content);
}
/// Test diff that adds lines
#[tokio::test]
#[serial]
async fn test_str_replace_add_lines() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("test.txt");
fs::write(&test_file, "line 1\nline 3\n").unwrap();
let mut agent = create_test_agent(&temp_dir).await;
let diff = "@@ -1,2 +1,3 @@\n line 1\n+line 2\n line 3\n";
let tool_call = make_tool_call(
"str_replace",
serde_json::json!({
"file_path": test_file.to_string_lossy(),
"diff": diff
}),
);
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("") || result.to_lowercase().contains("applied"),
"Should report success: {}", result);
let content = fs::read_to_string(&test_file).unwrap();
assert!(content.contains("line 2"), "Should contain added line: {}", content);
}
/// Test diff with pattern not found
#[tokio::test]
#[serial]
async fn test_str_replace_pattern_not_found() {
let temp_dir = TempDir::new().unwrap();
let test_file = temp_dir.path().join("test.txt");
fs::write(&test_file, "actual content\n").unwrap();
let mut agent = create_test_agent(&temp_dir).await;
let diff = "@@ -1,1 +1,1 @@\n-nonexistent pattern\n+replacement\n";
let tool_call = make_tool_call(
"str_replace",
serde_json::json!({
"file_path": test_file.to_string_lossy(),
"diff": diff
}),
);
let result = agent.execute_tool(&tool_call).await;
// Should indicate pattern not found
assert!(
result.is_err() || result.as_ref().unwrap().to_lowercase().contains("not found") || result.as_ref().unwrap().to_lowercase().contains("pattern") || result.as_ref().unwrap().to_lowercase().contains("error"),
"Should indicate pattern not found: {:?}", result
);
}
}
// =============================================================================
// Test: TODO tool execution
// =============================================================================
mod todo_execution {
use super::*;
/// Test writing and reading TODO
#[tokio::test]
#[serial]
async fn test_todo_write_and_read() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent(&temp_dir).await;
// Write TODO
let write_call = make_tool_call(
"todo_write",
serde_json::json!({
"content": "- [ ] Task 1\n- [x] Task 2\n- [ ] Task 3"
}),
);
let write_result = agent.execute_tool(&write_call).await.unwrap();
assert!(write_result.contains("") || write_result.to_lowercase().contains("success"),
"Write should succeed: {}", write_result);
// Read TODO
let read_call = make_tool_call("todo_read", serde_json::json!({}));
let read_result = agent.execute_tool(&read_call).await.unwrap();
assert!(read_result.contains("Task 1"), "Should contain Task 1: {}", read_result);
assert!(read_result.contains("Task 2"), "Should contain Task 2: {}", read_result);
assert!(read_result.contains("Task 3"), "Should contain Task 3: {}", read_result);
}
/// Test reading empty TODO
#[tokio::test]
#[serial]
async fn test_todo_read_empty() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent(&temp_dir).await;
let read_call = make_tool_call("todo_read", serde_json::json!({}));
let result = agent.execute_tool(&read_call).await.unwrap();
assert!(result.to_lowercase().contains("empty") || result.contains("no todo"),
"Should indicate empty: {}", result);
}
/// Test TODO persists to file
#[tokio::test]
#[serial]
async fn test_todo_persists_to_file() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
{
let mut agent = create_test_agent(&temp_dir).await;
let write_call = make_tool_call(
"todo_write",
serde_json::json!({
"content": "- [ ] Persistent task"
}),
);
agent.execute_tool(&write_call).await.unwrap();
}
// File should exist after agent is dropped
assert!(todo_path.exists(), "TODO file should persist");
let content = fs::read_to_string(&todo_path).unwrap();
assert!(content.contains("Persistent task"), "Content should persist: {}", content);
}
}