Merge sessions/hopper/1156b5c9
This commit is contained in:
236
crates/g3-core/tests/compaction_behavior_test.rs
Normal file
236
crates/g3-core/tests/compaction_behavior_test.rs
Normal file
@@ -0,0 +1,236 @@
|
|||||||
|
//! Compaction Behavior Integration Tests
|
||||||
|
//!
|
||||||
|
//! CHARACTERIZATION: These tests verify the observable behavior of context
|
||||||
|
//! compaction through stable public interfaces.
|
||||||
|
//!
|
||||||
|
//! What these tests protect:
|
||||||
|
//! - Compaction configuration calculation (token caps, thinking mode)
|
||||||
|
//! - Summary message building from conversation history
|
||||||
|
//! - Compaction result handling (success/failure)
|
||||||
|
//!
|
||||||
|
//! What these tests intentionally do NOT assert:
|
||||||
|
//! - Internal implementation details of compaction
|
||||||
|
//! - Specific LLM responses (mocked at provider boundary)
|
||||||
|
//! - Exact token counts (only relative behavior)
|
||||||
|
|
||||||
|
use g3_core::compaction::{
|
||||||
|
calculate_capped_summary_tokens, should_disable_thinking, build_summary_messages,
|
||||||
|
CompactionResult, SUMMARY_MIN_TOKENS,
|
||||||
|
};
|
||||||
|
use g3_core::ContextWindow;
|
||||||
|
use g3_providers::{Message, MessageRole};
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: Token cap calculation for different providers
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod token_cap_calculation {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test that Anthropic provider gets appropriate token caps
|
||||||
|
#[test]
|
||||||
|
fn test_anthropic_token_cap() {
|
||||||
|
let config = g3_config::Config::default();
|
||||||
|
|
||||||
|
// Large base tokens should be capped
|
||||||
|
let capped = calculate_capped_summary_tokens(&config, "anthropic", 50000);
|
||||||
|
assert!(capped <= 10000, "Anthropic should cap at 10000 by default, got {}", capped);
|
||||||
|
assert!(capped >= SUMMARY_MIN_TOKENS, "Should respect minimum floor");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test that Databricks provider gets appropriate token caps
|
||||||
|
#[test]
|
||||||
|
fn test_databricks_token_cap() {
|
||||||
|
let config = g3_config::Config::default();
|
||||||
|
|
||||||
|
let capped = calculate_capped_summary_tokens(&config, "databricks", 50000);
|
||||||
|
assert!(capped <= 10000, "Databricks should cap at 10000, got {}", capped);
|
||||||
|
assert!(capped >= SUMMARY_MIN_TOKENS, "Should respect minimum floor");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test that embedded provider gets lower token caps
|
||||||
|
#[test]
|
||||||
|
fn test_embedded_token_cap() {
|
||||||
|
let config = g3_config::Config::default();
|
||||||
|
|
||||||
|
let capped = calculate_capped_summary_tokens(&config, "embedded", 50000);
|
||||||
|
assert!(capped <= 3000, "Embedded should cap at 3000, got {}", capped);
|
||||||
|
assert!(capped >= SUMMARY_MIN_TOKENS, "Should respect minimum floor");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test that unknown providers get conservative caps
|
||||||
|
#[test]
|
||||||
|
fn test_unknown_provider_token_cap() {
|
||||||
|
let config = g3_config::Config::default();
|
||||||
|
|
||||||
|
let capped = calculate_capped_summary_tokens(&config, "unknown_provider", 50000);
|
||||||
|
assert!(capped <= 5000, "Unknown providers should cap at 5000, got {}", capped);
|
||||||
|
assert!(capped >= SUMMARY_MIN_TOKENS, "Should respect minimum floor");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test that small base tokens are preserved (not increased)
|
||||||
|
#[test]
|
||||||
|
fn test_small_base_tokens_preserved() {
|
||||||
|
let config = g3_config::Config::default();
|
||||||
|
|
||||||
|
// If base is already small, it should be preserved (but not below minimum)
|
||||||
|
let capped = calculate_capped_summary_tokens(&config, "anthropic", 2000);
|
||||||
|
assert_eq!(capped, 2000, "Small base tokens should be preserved");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test minimum floor is enforced
|
||||||
|
#[test]
|
||||||
|
fn test_minimum_floor_enforced() {
|
||||||
|
let config = g3_config::Config::default();
|
||||||
|
|
||||||
|
// Even with very small base, minimum should be enforced
|
||||||
|
let capped = calculate_capped_summary_tokens(&config, "anthropic", 100);
|
||||||
|
assert_eq!(capped, SUMMARY_MIN_TOKENS, "Minimum floor should be enforced");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: Thinking mode disable logic
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod thinking_mode_disable {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test that thinking mode is not disabled when no thinking config exists
|
||||||
|
#[test]
|
||||||
|
fn test_no_thinking_config_no_disable() {
|
||||||
|
let config = g3_config::Config::default();
|
||||||
|
|
||||||
|
// Without thinking config, should never disable
|
||||||
|
let should_disable = should_disable_thinking(&config, "anthropic", 5000);
|
||||||
|
assert!(!should_disable, "Should not disable thinking when no config exists");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test that non-Anthropic providers don't trigger thinking disable
|
||||||
|
#[test]
|
||||||
|
fn test_non_anthropic_no_thinking_disable() {
|
||||||
|
let config = g3_config::Config::default();
|
||||||
|
|
||||||
|
// Non-Anthropic providers don't have thinking mode
|
||||||
|
let should_disable = should_disable_thinking(&config, "databricks", 1000);
|
||||||
|
assert!(!should_disable, "Non-Anthropic providers should not disable thinking");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: Summary message building
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod summary_message_building {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test that summary messages are built correctly from conversation
|
||||||
|
#[test]
|
||||||
|
fn test_build_summary_messages_basic() {
|
||||||
|
let mut context = ContextWindow::new(10000);
|
||||||
|
|
||||||
|
// Add a simple conversation
|
||||||
|
context.add_message(Message::new(
|
||||||
|
MessageRole::System,
|
||||||
|
"You are a helpful assistant.".to_string(),
|
||||||
|
));
|
||||||
|
context.add_message(Message::new(
|
||||||
|
MessageRole::User,
|
||||||
|
"Hello, how are you?".to_string(),
|
||||||
|
));
|
||||||
|
context.add_message(Message::new(
|
||||||
|
MessageRole::Assistant,
|
||||||
|
"I'm doing well, thank you!".to_string(),
|
||||||
|
));
|
||||||
|
|
||||||
|
let messages = build_summary_messages(&context);
|
||||||
|
|
||||||
|
// Should have exactly 2 messages: system prompt and user request
|
||||||
|
assert_eq!(messages.len(), 2, "Should have system and user messages");
|
||||||
|
|
||||||
|
// First should be system message for summarization
|
||||||
|
assert!(matches!(messages[0].role, MessageRole::System));
|
||||||
|
assert!(messages[0].content.contains("concise summaries"));
|
||||||
|
|
||||||
|
// Second should be user message with conversation
|
||||||
|
assert!(matches!(messages[1].role, MessageRole::User));
|
||||||
|
assert!(messages[1].content.contains("Hello, how are you?"));
|
||||||
|
assert!(messages[1].content.contains("I'm doing well"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test that empty conversation produces valid summary request
|
||||||
|
#[test]
|
||||||
|
fn test_build_summary_messages_empty_conversation() {
|
||||||
|
let context = ContextWindow::new(10000);
|
||||||
|
|
||||||
|
let messages = build_summary_messages(&context);
|
||||||
|
|
||||||
|
// Should still produce valid structure
|
||||||
|
assert_eq!(messages.len(), 2);
|
||||||
|
assert!(matches!(messages[0].role, MessageRole::System));
|
||||||
|
assert!(matches!(messages[1].role, MessageRole::User));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test that long conversations are included in summary request
|
||||||
|
#[test]
|
||||||
|
fn test_build_summary_messages_long_conversation() {
|
||||||
|
let mut context = ContextWindow::new(100000);
|
||||||
|
|
||||||
|
// Add many messages
|
||||||
|
for i in 0..50 {
|
||||||
|
context.add_message(Message::new(
|
||||||
|
MessageRole::User,
|
||||||
|
format!("User message number {}", i),
|
||||||
|
));
|
||||||
|
context.add_message(Message::new(
|
||||||
|
MessageRole::Assistant,
|
||||||
|
format!("Assistant response number {}", i),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let messages = build_summary_messages(&context);
|
||||||
|
|
||||||
|
// Should include all conversation content
|
||||||
|
let user_content = &messages[1].content;
|
||||||
|
assert!(user_content.contains("User message number 0"));
|
||||||
|
assert!(user_content.contains("User message number 49"));
|
||||||
|
assert!(user_content.contains("Assistant response number 49"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: CompactionResult behavior
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod compaction_result {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test success result creation
|
||||||
|
#[test]
|
||||||
|
fn test_success_result() {
|
||||||
|
let result = CompactionResult::success(5000);
|
||||||
|
|
||||||
|
assert!(result.success);
|
||||||
|
assert_eq!(result.chars_saved, 5000);
|
||||||
|
assert!(result.error.is_none());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test failure result creation
|
||||||
|
#[test]
|
||||||
|
fn test_failure_result() {
|
||||||
|
let result = CompactionResult::failure("API error".to_string());
|
||||||
|
|
||||||
|
assert!(!result.success);
|
||||||
|
assert_eq!(result.chars_saved, 0);
|
||||||
|
assert_eq!(result.error, Some("API error".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test zero chars saved is valid success
|
||||||
|
#[test]
|
||||||
|
fn test_zero_chars_saved_success() {
|
||||||
|
let result = CompactionResult::success(0);
|
||||||
|
|
||||||
|
assert!(result.success);
|
||||||
|
assert_eq!(result.chars_saved, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
356
crates/g3-core/tests/error_classification_test.rs
Normal file
356
crates/g3-core/tests/error_classification_test.rs
Normal file
@@ -0,0 +1,356 @@
|
|||||||
|
//! Error Classification Integration Tests
|
||||||
|
//!
|
||||||
|
//! CHARACTERIZATION: These tests verify the observable behavior of error
|
||||||
|
//! classification through stable public interfaces.
|
||||||
|
//!
|
||||||
|
//! What these tests protect:
|
||||||
|
//! - Error messages are correctly classified as recoverable/non-recoverable
|
||||||
|
//! - Specific error types (rate limit, timeout, server error) are detected
|
||||||
|
//! - Retry delay calculation produces reasonable values
|
||||||
|
//!
|
||||||
|
//! What these tests intentionally do NOT assert:
|
||||||
|
//! - Exact delay values (only ranges and relative behavior)
|
||||||
|
//! - Internal classification implementation details
|
||||||
|
|
||||||
|
use g3_core::error_handling::{
|
||||||
|
classify_error, calculate_retry_delay, ErrorType, RecoverableError,
|
||||||
|
};
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: Error classification for recoverable errors
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod recoverable_error_classification {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test rate limit errors are classified as recoverable
|
||||||
|
#[test]
|
||||||
|
fn test_rate_limit_detected() {
|
||||||
|
let error = anyhow::anyhow!("Rate limit exceeded");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::RateLimit)),
|
||||||
|
"Rate limit should be recoverable: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test 429 status code is classified as rate limit
|
||||||
|
#[test]
|
||||||
|
fn test_429_status_detected() {
|
||||||
|
let error = anyhow::anyhow!("HTTP 429 Too Many Requests");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::RateLimit)),
|
||||||
|
"429 should be rate limit: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test timeout errors are classified as recoverable
|
||||||
|
#[test]
|
||||||
|
fn test_timeout_detected() {
|
||||||
|
let error = anyhow::anyhow!("Request timed out");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::Timeout)),
|
||||||
|
"Timeout should be recoverable: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test server errors (5xx) are classified as recoverable
|
||||||
|
#[test]
|
||||||
|
fn test_server_error_500_detected() {
|
||||||
|
let error = anyhow::anyhow!("Server error 500");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::ServerError)),
|
||||||
|
"500 should be server error: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test 502 Bad Gateway is classified as server error
|
||||||
|
#[test]
|
||||||
|
fn test_server_error_502_detected() {
|
||||||
|
let error = anyhow::anyhow!("502 Bad Gateway");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::ServerError)),
|
||||||
|
"502 should be server error: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test 503 Service Unavailable is classified as server error
|
||||||
|
#[test]
|
||||||
|
fn test_server_error_503_detected() {
|
||||||
|
let error = anyhow::anyhow!("503 Service Unavailable");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::ServerError)),
|
||||||
|
"503 should be server error: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test network errors are classified as recoverable
|
||||||
|
#[test]
|
||||||
|
fn test_network_error_detected() {
|
||||||
|
let error = anyhow::anyhow!("Connection refused");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::NetworkError)),
|
||||||
|
"Connection refused should be network error: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test connection reset is classified as network error
|
||||||
|
#[test]
|
||||||
|
fn test_connection_reset_detected() {
|
||||||
|
let error = anyhow::anyhow!("Connection reset by peer");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::NetworkError)),
|
||||||
|
"Connection reset should be network error: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test "overloaded" is classified as busy
|
||||||
|
#[test]
|
||||||
|
fn test_model_busy_detected() {
|
||||||
|
let error = anyhow::anyhow!("Server is overloaded");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::ModelBusy)),
|
||||||
|
"Overloaded should be model busy: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test context length exceeded requires 400 status code
|
||||||
|
/// CHARACTERIZATION: The error must contain "400" or "bad request" along with
|
||||||
|
/// context length keywords to be classified as ContextLengthExceeded
|
||||||
|
#[test]
|
||||||
|
fn test_context_length_exceeded_detected() {
|
||||||
|
let error = anyhow::anyhow!("400 Bad Request: context_length_exceeded: too many tokens");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)),
|
||||||
|
"Context length exceeded should be detected: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test token limit exceeded is classified correctly
|
||||||
|
/// CHARACTERIZATION: Must contain "token" AND ("limit" OR "exceeded")
|
||||||
|
#[test]
|
||||||
|
fn test_token_limit_detected() {
|
||||||
|
let error = anyhow::anyhow!("token limit exceeded");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::TokenLimit)),
|
||||||
|
"Token limit should be detected: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: Error classification for non-recoverable errors
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod non_recoverable_error_classification {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test invalid API key is non-recoverable
|
||||||
|
#[test]
|
||||||
|
fn test_invalid_api_key_non_recoverable() {
|
||||||
|
let error = anyhow::anyhow!("Invalid API key");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::NonRecoverable),
|
||||||
|
"Invalid API key should be non-recoverable: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test authentication failure is non-recoverable
|
||||||
|
#[test]
|
||||||
|
fn test_auth_failure_non_recoverable() {
|
||||||
|
let error = anyhow::anyhow!("Authentication failed");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::NonRecoverable),
|
||||||
|
"Auth failure should be non-recoverable: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test generic errors are non-recoverable
|
||||||
|
#[test]
|
||||||
|
fn test_generic_error_non_recoverable() {
|
||||||
|
let error = anyhow::anyhow!("Something went wrong");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::NonRecoverable),
|
||||||
|
"Generic error should be non-recoverable: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test 401 Unauthorized is non-recoverable
|
||||||
|
#[test]
|
||||||
|
fn test_401_non_recoverable() {
|
||||||
|
let error = anyhow::anyhow!("401 Unauthorized");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::NonRecoverable),
|
||||||
|
"401 should be non-recoverable: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test 403 Forbidden is non-recoverable
|
||||||
|
#[test]
|
||||||
|
fn test_403_non_recoverable() {
|
||||||
|
let error = anyhow::anyhow!("403 Forbidden");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::NonRecoverable),
|
||||||
|
"403 should be non-recoverable: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: Retry delay calculation
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod retry_delay_calculation {
|
||||||
|
use super::*;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
/// Test first retry has reasonable delay
|
||||||
|
#[test]
|
||||||
|
fn test_first_retry_delay() {
|
||||||
|
let delay = calculate_retry_delay(1, false);
|
||||||
|
|
||||||
|
// First retry should be around 1-2 seconds (with jitter)
|
||||||
|
assert!(delay >= Duration::from_millis(500), "Delay should be at least 500ms: {:?}", delay);
|
||||||
|
assert!(delay <= Duration::from_secs(5), "Delay should be at most 5s: {:?}", delay);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test delays increase with retry count
|
||||||
|
#[test]
|
||||||
|
fn test_delays_increase() {
|
||||||
|
let delay1 = calculate_retry_delay(1, false);
|
||||||
|
let delay2 = calculate_retry_delay(2, false);
|
||||||
|
let delay3 = calculate_retry_delay(3, false);
|
||||||
|
|
||||||
|
// Later retries should generally have longer delays
|
||||||
|
// (accounting for jitter, we check the trend)
|
||||||
|
assert!(delay2 >= delay1 || delay3 >= delay2,
|
||||||
|
"Delays should generally increase: {:?} -> {:?} -> {:?}", delay1, delay2, delay3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test autonomous mode has different delays
|
||||||
|
#[test]
|
||||||
|
fn test_autonomous_mode_delays() {
|
||||||
|
let default_delay = calculate_retry_delay(3, false);
|
||||||
|
let autonomous_delay = calculate_retry_delay(3, true);
|
||||||
|
|
||||||
|
// Autonomous mode should have longer delays (spread over 10 minutes)
|
||||||
|
// But with jitter, we just check they're both reasonable
|
||||||
|
assert!(default_delay <= Duration::from_secs(30),
|
||||||
|
"Default delay should be reasonable: {:?}", default_delay);
|
||||||
|
assert!(autonomous_delay <= Duration::from_secs(180),
|
||||||
|
"Autonomous delay should be reasonable: {:?}", autonomous_delay);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test delays are capped at maximum
|
||||||
|
#[test]
|
||||||
|
fn test_delay_cap() {
|
||||||
|
// Even with high retry count, delay should be capped
|
||||||
|
let delay = calculate_retry_delay(10, false);
|
||||||
|
|
||||||
|
assert!(delay <= Duration::from_secs(15),
|
||||||
|
"Default mode delay should be capped: {:?}", delay);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test autonomous mode delay cap
|
||||||
|
/// CHARACTERIZATION: Autonomous mode uses longer delays spread over 10 minutes
|
||||||
|
#[test]
|
||||||
|
fn test_autonomous_delay_cap() {
|
||||||
|
let delay = calculate_retry_delay(10, true);
|
||||||
|
|
||||||
|
// Autonomous mode has longer delays (up to ~200s + jitter)
|
||||||
|
assert!(delay <= Duration::from_secs(300),
|
||||||
|
"Autonomous delay should be capped: {:?}", delay);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: Edge cases and priority
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod edge_cases {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test error with multiple keywords uses correct priority
|
||||||
|
#[test]
|
||||||
|
fn test_rate_limit_priority_over_timeout() {
|
||||||
|
// Rate limit should take priority
|
||||||
|
let error = anyhow::anyhow!("Rate limit exceeded after timeout");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::RateLimit)),
|
||||||
|
"Rate limit should take priority: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test case insensitivity
|
||||||
|
#[test]
|
||||||
|
fn test_case_insensitive_detection() {
|
||||||
|
let error = anyhow::anyhow!("RATE LIMIT EXCEEDED");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::RateLimit)),
|
||||||
|
"Should detect uppercase: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test empty error message
|
||||||
|
#[test]
|
||||||
|
fn test_empty_error_message() {
|
||||||
|
let error = anyhow::anyhow!("");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
// Empty message should be non-recoverable
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::NonRecoverable),
|
||||||
|
"Empty error should be non-recoverable: {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test connection timeout is network error (not timeout)
|
||||||
|
/// Note: This documents the current behavior where "connection" keyword
|
||||||
|
/// takes priority over "timeout"
|
||||||
|
#[test]
|
||||||
|
fn test_connection_timeout_classification() {
|
||||||
|
let error = anyhow::anyhow!("Connection timeout");
|
||||||
|
let error_type = classify_error(&error);
|
||||||
|
|
||||||
|
// Per memory: "Connection timeout" classifies as NetworkError due to "connection" keyword priority
|
||||||
|
assert!(
|
||||||
|
matches!(error_type, ErrorType::Recoverable(RecoverableError::NetworkError)),
|
||||||
|
"Connection timeout should be network error (per priority): {:?}", error_type
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
300
crates/g3-core/tests/retry_behavior_test.rs
Normal file
300
crates/g3-core/tests/retry_behavior_test.rs
Normal file
@@ -0,0 +1,300 @@
|
|||||||
|
//! Retry Behavior Integration Tests
|
||||||
|
//!
|
||||||
|
//! CHARACTERIZATION: These tests verify the observable behavior of retry
|
||||||
|
//! infrastructure through stable public interfaces.
|
||||||
|
//!
|
||||||
|
//! What these tests protect:
|
||||||
|
//! - RetryConfig construction and presets
|
||||||
|
//! - RetryResult state transitions
|
||||||
|
//! - retry_operation behavior with simulated errors
|
||||||
|
//!
|
||||||
|
//! What these tests intentionally do NOT assert:
|
||||||
|
//! - Internal timing details (only that delays occur)
|
||||||
|
//! - Specific backoff calculations (only that they increase)
|
||||||
|
//! - Agent internals (tested via execute_with_retry separately)
|
||||||
|
|
||||||
|
use g3_core::retry::{RetryConfig, RetryResult, retry_operation};
|
||||||
|
use g3_core::ContextWindow;
|
||||||
|
use g3_core::TaskResult;
|
||||||
|
use std::sync::atomic::{AtomicU32, Ordering};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: RetryConfig presets and customization
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod retry_config_presets {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test default config values
|
||||||
|
#[test]
|
||||||
|
fn test_default_config() {
|
||||||
|
let config = RetryConfig::default();
|
||||||
|
|
||||||
|
assert_eq!(config.max_retries, 3);
|
||||||
|
assert!(!config.is_autonomous);
|
||||||
|
assert_eq!(config.role_name, "agent");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test player preset
|
||||||
|
#[test]
|
||||||
|
fn test_player_preset() {
|
||||||
|
let config = RetryConfig::player();
|
||||||
|
|
||||||
|
assert_eq!(config.max_retries, 3);
|
||||||
|
assert!(config.is_autonomous, "Player should be autonomous");
|
||||||
|
assert_eq!(config.role_name, "player");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test coach preset
|
||||||
|
#[test]
|
||||||
|
fn test_coach_preset() {
|
||||||
|
let config = RetryConfig::coach();
|
||||||
|
|
||||||
|
assert_eq!(config.max_retries, 3);
|
||||||
|
assert!(config.is_autonomous, "Coach should be autonomous");
|
||||||
|
assert_eq!(config.role_name, "coach");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test planning preset with custom role
|
||||||
|
#[test]
|
||||||
|
fn test_planning_preset() {
|
||||||
|
let config = RetryConfig::planning("reviewer");
|
||||||
|
|
||||||
|
assert_eq!(config.max_retries, 3);
|
||||||
|
assert!(config.is_autonomous, "Planning should be autonomous");
|
||||||
|
assert_eq!(config.role_name, "reviewer");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test custom max retries
|
||||||
|
#[test]
|
||||||
|
fn test_custom_max_retries() {
|
||||||
|
let config = RetryConfig::player().with_max_retries(10);
|
||||||
|
|
||||||
|
assert_eq!(config.max_retries, 10);
|
||||||
|
// Other fields should be preserved
|
||||||
|
assert!(config.is_autonomous);
|
||||||
|
assert_eq!(config.role_name, "player");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test chaining customizations
|
||||||
|
#[test]
|
||||||
|
fn test_chained_customization() {
|
||||||
|
let config = RetryConfig::default()
|
||||||
|
.with_max_retries(5);
|
||||||
|
|
||||||
|
assert_eq!(config.max_retries, 5);
|
||||||
|
assert!(!config.is_autonomous); // Default is not autonomous
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: RetryResult state handling
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod retry_result_states {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test success result
|
||||||
|
#[test]
|
||||||
|
fn test_success_is_success() {
|
||||||
|
let ctx = ContextWindow::new(1000);
|
||||||
|
let result = RetryResult::Success(TaskResult::new("done".to_string(), ctx));
|
||||||
|
|
||||||
|
assert!(result.is_success());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test max retries reached is not success
|
||||||
|
#[test]
|
||||||
|
fn test_max_retries_not_success() {
|
||||||
|
let result = RetryResult::MaxRetriesReached("timeout".to_string());
|
||||||
|
|
||||||
|
assert!(!result.is_success());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test context length exceeded is not success
|
||||||
|
#[test]
|
||||||
|
fn test_context_exceeded_not_success() {
|
||||||
|
let result = RetryResult::ContextLengthExceeded("too long".to_string());
|
||||||
|
|
||||||
|
assert!(!result.is_success());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test panic is not success
|
||||||
|
#[test]
|
||||||
|
fn test_panic_not_success() {
|
||||||
|
let result = RetryResult::Panic(anyhow::anyhow!("panic occurred"));
|
||||||
|
|
||||||
|
assert!(!result.is_success());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test into_result extracts TaskResult on success
|
||||||
|
#[test]
|
||||||
|
fn test_into_result_success() {
|
||||||
|
let ctx = ContextWindow::new(1000);
|
||||||
|
let result = RetryResult::Success(TaskResult::new("done".to_string(), ctx));
|
||||||
|
|
||||||
|
let task_result = result.into_result();
|
||||||
|
assert!(task_result.is_some());
|
||||||
|
assert_eq!(task_result.unwrap().response, "done");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test into_result returns None on failure
|
||||||
|
#[test]
|
||||||
|
fn test_into_result_failure() {
|
||||||
|
let result = RetryResult::MaxRetriesReached("error".to_string());
|
||||||
|
|
||||||
|
let task_result = result.into_result();
|
||||||
|
assert!(task_result.is_none());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: retry_operation behavior
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod retry_operation_behavior {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test successful operation on first try
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_success_first_try() {
|
||||||
|
let call_count = Arc::new(AtomicU32::new(0));
|
||||||
|
let call_count_clone = call_count.clone();
|
||||||
|
|
||||||
|
let result = retry_operation(
|
||||||
|
"test_op",
|
||||||
|
|| {
|
||||||
|
let count = call_count_clone.clone();
|
||||||
|
async move {
|
||||||
|
count.fetch_add(1, Ordering::SeqCst);
|
||||||
|
Ok::<_, anyhow::Error>("success")
|
||||||
|
}
|
||||||
|
},
|
||||||
|
3,
|
||||||
|
false,
|
||||||
|
|_msg| {},
|
||||||
|
).await;
|
||||||
|
|
||||||
|
assert!(result.is_ok());
|
||||||
|
assert_eq!(result.unwrap(), "success");
|
||||||
|
assert_eq!(call_count.load(Ordering::SeqCst), 1, "Should only call once on success");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test non-recoverable error fails immediately
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_non_recoverable_fails_immediately() {
|
||||||
|
let call_count = Arc::new(AtomicU32::new(0));
|
||||||
|
let call_count_clone = call_count.clone();
|
||||||
|
|
||||||
|
let result = retry_operation(
|
||||||
|
"test_op",
|
||||||
|
|| {
|
||||||
|
let count = call_count_clone.clone();
|
||||||
|
async move {
|
||||||
|
count.fetch_add(1, Ordering::SeqCst);
|
||||||
|
Err::<String, _>(anyhow::anyhow!("Invalid API key"))
|
||||||
|
}
|
||||||
|
},
|
||||||
|
3,
|
||||||
|
false,
|
||||||
|
|_msg| {},
|
||||||
|
).await;
|
||||||
|
|
||||||
|
assert!(result.is_err());
|
||||||
|
assert_eq!(call_count.load(Ordering::SeqCst), 1, "Non-recoverable should not retry");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test recoverable error retries up to max
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_recoverable_retries_to_max() {
|
||||||
|
let call_count = Arc::new(AtomicU32::new(0));
|
||||||
|
let call_count_clone = call_count.clone();
|
||||||
|
|
||||||
|
let result = retry_operation(
|
||||||
|
"test_op",
|
||||||
|
|| {
|
||||||
|
let count = call_count_clone.clone();
|
||||||
|
async move {
|
||||||
|
count.fetch_add(1, Ordering::SeqCst);
|
||||||
|
// Rate limit is a recoverable error
|
||||||
|
Err::<String, _>(anyhow::anyhow!("Rate limit exceeded"))
|
||||||
|
}
|
||||||
|
},
|
||||||
|
3, // max retries
|
||||||
|
false,
|
||||||
|
|_msg| {},
|
||||||
|
).await;
|
||||||
|
|
||||||
|
assert!(result.is_err());
|
||||||
|
// Should try initial + max_retries times
|
||||||
|
assert_eq!(call_count.load(Ordering::SeqCst), 3, "Should retry up to max");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test recoverable error succeeds on retry
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_recoverable_succeeds_on_retry() {
|
||||||
|
let call_count = Arc::new(AtomicU32::new(0));
|
||||||
|
let call_count_clone = call_count.clone();
|
||||||
|
|
||||||
|
let result = retry_operation(
|
||||||
|
"test_op",
|
||||||
|
|| {
|
||||||
|
let count = call_count_clone.clone();
|
||||||
|
async move {
|
||||||
|
let current = count.fetch_add(1, Ordering::SeqCst);
|
||||||
|
if current < 2 {
|
||||||
|
// Fail first two times with recoverable error
|
||||||
|
Err(anyhow::anyhow!("Server error 500"))
|
||||||
|
} else {
|
||||||
|
// Succeed on third try
|
||||||
|
Ok("success after retry")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
5, // max retries
|
||||||
|
false,
|
||||||
|
|_msg| {},
|
||||||
|
).await;
|
||||||
|
|
||||||
|
assert!(result.is_ok());
|
||||||
|
assert_eq!(result.unwrap(), "success after retry");
|
||||||
|
assert_eq!(call_count.load(Ordering::SeqCst), 3, "Should succeed on third try");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test print function is called on retry
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_print_fn_called_on_retry() {
|
||||||
|
let messages = Arc::new(std::sync::Mutex::new(Vec::new()));
|
||||||
|
let messages_clone = messages.clone();
|
||||||
|
|
||||||
|
let call_count = Arc::new(AtomicU32::new(0));
|
||||||
|
let call_count_clone = call_count.clone();
|
||||||
|
|
||||||
|
let _ = retry_operation(
|
||||||
|
"test_op",
|
||||||
|
|| {
|
||||||
|
let count = call_count_clone.clone();
|
||||||
|
async move {
|
||||||
|
let current = count.fetch_add(1, Ordering::SeqCst);
|
||||||
|
if current < 1 {
|
||||||
|
Err(anyhow::anyhow!("Rate limit exceeded"))
|
||||||
|
} else {
|
||||||
|
Ok("success")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
3,
|
||||||
|
false,
|
||||||
|
|msg| {
|
||||||
|
messages_clone.lock().unwrap().push(msg.to_string());
|
||||||
|
},
|
||||||
|
).await;
|
||||||
|
|
||||||
|
let msgs = messages.lock().unwrap();
|
||||||
|
assert!(!msgs.is_empty(), "Should have printed retry messages");
|
||||||
|
// Should mention the error type
|
||||||
|
assert!(msgs.iter().any(|m| m.contains("RateLimit") || m.contains("rate")),
|
||||||
|
"Should mention rate limit in messages: {:?}", msgs);
|
||||||
|
}
|
||||||
|
}
|
||||||
466
crates/g3-core/tests/tool_execution_roundtrip_test.rs
Normal file
466
crates/g3-core/tests/tool_execution_roundtrip_test.rs
Normal file
@@ -0,0 +1,466 @@
|
|||||||
|
//! Tool Execution Round-Trip Integration Tests
|
||||||
|
//!
|
||||||
|
//! CHARACTERIZATION: These tests verify that tools execute correctly through
|
||||||
|
//! the Agent interface, testing the full round-trip from tool call to result.
|
||||||
|
//!
|
||||||
|
//! What these tests protect:
|
||||||
|
//! - File operations (read, write, str_replace) work end-to-end
|
||||||
|
//! - Shell command execution produces expected output
|
||||||
|
//! - TODO operations persist correctly
|
||||||
|
//! - Error handling for invalid inputs
|
||||||
|
//!
|
||||||
|
//! What these tests intentionally do NOT assert:
|
||||||
|
//! - Internal implementation details of tools
|
||||||
|
//! - Specific formatting of success messages (only key content)
|
||||||
|
//! - UI writer behavior (uses NullUiWriter)
|
||||||
|
|
||||||
|
use g3_core::ui_writer::NullUiWriter;
|
||||||
|
use g3_core::{Agent, ToolCall};
|
||||||
|
use serial_test::serial;
|
||||||
|
use std::fs;
|
||||||
|
use tempfile::TempDir;
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test Helpers
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/// Create a test agent in a temporary directory
|
||||||
|
async fn create_test_agent(temp_dir: &TempDir) -> Agent<NullUiWriter> {
|
||||||
|
std::env::set_current_dir(temp_dir.path()).unwrap();
|
||||||
|
let config = g3_config::Config::default();
|
||||||
|
let ui_writer = NullUiWriter;
|
||||||
|
Agent::new(config, ui_writer).await.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a ToolCall with the given tool name and arguments
|
||||||
|
fn make_tool_call(tool: &str, args: serde_json::Value) -> ToolCall {
|
||||||
|
ToolCall {
|
||||||
|
tool: tool.to_string(),
|
||||||
|
args,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: read_file tool execution
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod read_file_execution {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test reading an existing file
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_read_existing_file() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let test_file = temp_dir.path().join("test.txt");
|
||||||
|
fs::write(&test_file, "Hello, World!\nLine 2\nLine 3").unwrap();
|
||||||
|
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let tool_call = make_tool_call(
|
||||||
|
"read_file",
|
||||||
|
serde_json::json!({ "file_path": test_file.to_string_lossy() }),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||||
|
|
||||||
|
assert!(result.contains("Hello, World!"), "Should contain file content: {}", result);
|
||||||
|
assert!(result.contains("Line 2"), "Should contain all lines: {}", result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test reading a non-existent file returns error
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_read_nonexistent_file() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let tool_call = make_tool_call(
|
||||||
|
"read_file",
|
||||||
|
serde_json::json!({ "file_path": "/nonexistent/path/file.txt" }),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = agent.execute_tool(&tool_call).await;
|
||||||
|
|
||||||
|
// Should return an error or error message
|
||||||
|
assert!(
|
||||||
|
result.is_err() || result.as_ref().unwrap().contains("error") || result.as_ref().unwrap().contains("not found") || result.as_ref().unwrap().contains("No such file"),
|
||||||
|
"Should indicate file not found: {:?}", result
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test reading with character range
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_read_file_with_range() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let test_file = temp_dir.path().join("test.txt");
|
||||||
|
fs::write(&test_file, "0123456789ABCDEF").unwrap();
|
||||||
|
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let tool_call = make_tool_call(
|
||||||
|
"read_file",
|
||||||
|
serde_json::json!({
|
||||||
|
"file_path": test_file.to_string_lossy(),
|
||||||
|
"start": 5,
|
||||||
|
"end": 10
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||||
|
|
||||||
|
// Should contain the substring from position 5 to 10
|
||||||
|
assert!(result.contains("56789"), "Should contain range content: {}", result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: write_file tool execution
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod write_file_execution {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test writing a new file
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_write_new_file() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let new_file = temp_dir.path().join("new_file.txt");
|
||||||
|
|
||||||
|
assert!(!new_file.exists(), "File should not exist initially");
|
||||||
|
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let tool_call = make_tool_call(
|
||||||
|
"write_file",
|
||||||
|
serde_json::json!({
|
||||||
|
"file_path": new_file.to_string_lossy(),
|
||||||
|
"content": "New content here"
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||||
|
|
||||||
|
// Should report success
|
||||||
|
assert!(result.contains("✅") || result.to_lowercase().contains("success") || result.to_lowercase().contains("wrote"),
|
||||||
|
"Should report success: {}", result);
|
||||||
|
|
||||||
|
// File should now exist with correct content
|
||||||
|
assert!(new_file.exists(), "File should exist after write");
|
||||||
|
let content = fs::read_to_string(&new_file).unwrap();
|
||||||
|
assert_eq!(content, "New content here");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test overwriting an existing file
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_overwrite_existing_file() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let test_file = temp_dir.path().join("existing.txt");
|
||||||
|
fs::write(&test_file, "Original content").unwrap();
|
||||||
|
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let tool_call = make_tool_call(
|
||||||
|
"write_file",
|
||||||
|
serde_json::json!({
|
||||||
|
"file_path": test_file.to_string_lossy(),
|
||||||
|
"content": "Replaced content"
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||||
|
|
||||||
|
assert!(result.contains("✅") || result.to_lowercase().contains("success") || result.to_lowercase().contains("wrote"),
|
||||||
|
"Should report success: {}", result);
|
||||||
|
|
||||||
|
let content = fs::read_to_string(&test_file).unwrap();
|
||||||
|
assert_eq!(content, "Replaced content");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test writing creates parent directories
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_write_creates_parent_dirs() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let nested_file = temp_dir.path().join("a/b/c/nested.txt");
|
||||||
|
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let tool_call = make_tool_call(
|
||||||
|
"write_file",
|
||||||
|
serde_json::json!({
|
||||||
|
"file_path": nested_file.to_string_lossy(),
|
||||||
|
"content": "Nested content"
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||||
|
|
||||||
|
assert!(result.contains("✅") || result.to_lowercase().contains("success") || result.to_lowercase().contains("wrote"),
|
||||||
|
"Should report success: {}", result);
|
||||||
|
|
||||||
|
assert!(nested_file.exists(), "Nested file should exist");
|
||||||
|
let content = fs::read_to_string(&nested_file).unwrap();
|
||||||
|
assert_eq!(content, "Nested content");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: shell tool execution
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod shell_execution {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test simple echo command
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_shell_echo() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let tool_call = make_tool_call(
|
||||||
|
"shell",
|
||||||
|
serde_json::json!({ "command": "echo 'hello world'" }),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||||
|
|
||||||
|
assert!(result.contains("hello world"), "Should contain echo output: {}", result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test command that produces multi-line output
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_shell_multiline_output() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let tool_call = make_tool_call(
|
||||||
|
"shell",
|
||||||
|
serde_json::json!({ "command": "echo 'line1'; echo 'line2'; echo 'line3'" }),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||||
|
|
||||||
|
assert!(result.contains("line1"), "Should contain line1: {}", result);
|
||||||
|
assert!(result.contains("line2"), "Should contain line2: {}", result);
|
||||||
|
assert!(result.contains("line3"), "Should contain line3: {}", result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test command that fails
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_shell_failing_command() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let tool_call = make_tool_call(
|
||||||
|
"shell",
|
||||||
|
serde_json::json!({ "command": "exit 1" }),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = agent.execute_tool(&tool_call).await;
|
||||||
|
|
||||||
|
// Should indicate failure (either error or non-zero exit)
|
||||||
|
assert!(
|
||||||
|
result.is_err() || result.as_ref().unwrap().contains("exit") || result.as_ref().unwrap().contains("failed") || result.as_ref().unwrap().contains("error"),
|
||||||
|
"Should indicate command failure: {:?}", result
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test command with working directory context
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_shell_pwd() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let tool_call = make_tool_call(
|
||||||
|
"shell",
|
||||||
|
serde_json::json!({ "command": "pwd" }),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||||
|
|
||||||
|
// Should show the temp directory path
|
||||||
|
let temp_path = temp_dir.path().to_string_lossy();
|
||||||
|
assert!(result.contains(&*temp_path) || result.contains("private"),
|
||||||
|
"Should show current directory: {} (expected to contain {})", result, temp_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: str_replace tool execution
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod str_replace_execution {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test applying a simple diff
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_str_replace_simple() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let test_file = temp_dir.path().join("test.txt");
|
||||||
|
fs::write(&test_file, "line 1\nold line\nline 3\n").unwrap();
|
||||||
|
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let diff = "@@ -1,3 +1,3 @@\n line 1\n-old line\n+new line\n line 3\n";
|
||||||
|
|
||||||
|
let tool_call = make_tool_call(
|
||||||
|
"str_replace",
|
||||||
|
serde_json::json!({
|
||||||
|
"file_path": test_file.to_string_lossy(),
|
||||||
|
"diff": diff
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||||
|
|
||||||
|
assert!(result.contains("✅") || result.to_lowercase().contains("applied") || result.to_lowercase().contains("success"),
|
||||||
|
"Should report success: {}", result);
|
||||||
|
|
||||||
|
let content = fs::read_to_string(&test_file).unwrap();
|
||||||
|
assert!(content.contains("new line"), "Should contain new content: {}", content);
|
||||||
|
assert!(!content.contains("old line"), "Should not contain old content: {}", content);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test diff that adds lines
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_str_replace_add_lines() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let test_file = temp_dir.path().join("test.txt");
|
||||||
|
fs::write(&test_file, "line 1\nline 3\n").unwrap();
|
||||||
|
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let diff = "@@ -1,2 +1,3 @@\n line 1\n+line 2\n line 3\n";
|
||||||
|
|
||||||
|
let tool_call = make_tool_call(
|
||||||
|
"str_replace",
|
||||||
|
serde_json::json!({
|
||||||
|
"file_path": test_file.to_string_lossy(),
|
||||||
|
"diff": diff
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||||
|
|
||||||
|
assert!(result.contains("✅") || result.to_lowercase().contains("applied"),
|
||||||
|
"Should report success: {}", result);
|
||||||
|
|
||||||
|
let content = fs::read_to_string(&test_file).unwrap();
|
||||||
|
assert!(content.contains("line 2"), "Should contain added line: {}", content);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test diff with pattern not found
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_str_replace_pattern_not_found() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let test_file = temp_dir.path().join("test.txt");
|
||||||
|
fs::write(&test_file, "actual content\n").unwrap();
|
||||||
|
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let diff = "@@ -1,1 +1,1 @@\n-nonexistent pattern\n+replacement\n";
|
||||||
|
|
||||||
|
let tool_call = make_tool_call(
|
||||||
|
"str_replace",
|
||||||
|
serde_json::json!({
|
||||||
|
"file_path": test_file.to_string_lossy(),
|
||||||
|
"diff": diff
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = agent.execute_tool(&tool_call).await;
|
||||||
|
|
||||||
|
// Should indicate pattern not found
|
||||||
|
assert!(
|
||||||
|
result.is_err() || result.as_ref().unwrap().to_lowercase().contains("not found") || result.as_ref().unwrap().to_lowercase().contains("pattern") || result.as_ref().unwrap().to_lowercase().contains("error"),
|
||||||
|
"Should indicate pattern not found: {:?}", result
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Test: TODO tool execution
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
mod todo_execution {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test writing and reading TODO
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_todo_write_and_read() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
// Write TODO
|
||||||
|
let write_call = make_tool_call(
|
||||||
|
"todo_write",
|
||||||
|
serde_json::json!({
|
||||||
|
"content": "- [ ] Task 1\n- [x] Task 2\n- [ ] Task 3"
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
let write_result = agent.execute_tool(&write_call).await.unwrap();
|
||||||
|
assert!(write_result.contains("✅") || write_result.to_lowercase().contains("success"),
|
||||||
|
"Write should succeed: {}", write_result);
|
||||||
|
|
||||||
|
// Read TODO
|
||||||
|
let read_call = make_tool_call("todo_read", serde_json::json!({}));
|
||||||
|
let read_result = agent.execute_tool(&read_call).await.unwrap();
|
||||||
|
|
||||||
|
assert!(read_result.contains("Task 1"), "Should contain Task 1: {}", read_result);
|
||||||
|
assert!(read_result.contains("Task 2"), "Should contain Task 2: {}", read_result);
|
||||||
|
assert!(read_result.contains("Task 3"), "Should contain Task 3: {}", read_result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test reading empty TODO
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_todo_read_empty() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let read_call = make_tool_call("todo_read", serde_json::json!({}));
|
||||||
|
let result = agent.execute_tool(&read_call).await.unwrap();
|
||||||
|
|
||||||
|
assert!(result.to_lowercase().contains("empty") || result.contains("no todo"),
|
||||||
|
"Should indicate empty: {}", result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test TODO persists to file
|
||||||
|
#[tokio::test]
|
||||||
|
#[serial]
|
||||||
|
async fn test_todo_persists_to_file() {
|
||||||
|
let temp_dir = TempDir::new().unwrap();
|
||||||
|
let todo_path = temp_dir.path().join("todo.g3.md");
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut agent = create_test_agent(&temp_dir).await;
|
||||||
|
|
||||||
|
let write_call = make_tool_call(
|
||||||
|
"todo_write",
|
||||||
|
serde_json::json!({
|
||||||
|
"content": "- [ ] Persistent task"
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
agent.execute_tool(&write_call).await.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// File should exist after agent is dropped
|
||||||
|
assert!(todo_path.exists(), "TODO file should persist");
|
||||||
|
let content = fs::read_to_string(&todo_path).unwrap();
|
||||||
|
assert!(content.contains("Persistent task"), "Content should persist: {}", content);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user