add context window monitor
Writes the current context window to logs/current_context_window (uses a symlink to a session ID). This PR was unfortunately generated by a different LLM and did a ton of superficial reformating, it's actually a fairly small and benign change, but I don't want to roll back everything. Hope that's ok.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
//! Inspect tree-sitter AST structure for Rust code
|
||||
|
||||
use tree_sitter::{Parser, Language};
|
||||
use tree_sitter::{Language, Parser};
|
||||
|
||||
fn print_tree(node: tree_sitter::Node, source: &str, indent: usize) {
|
||||
let indent_str = " ".repeat(indent);
|
||||
@@ -10,7 +10,7 @@ fn print_tree(node: tree_sitter::Node, source: &str, indent: usize) {
|
||||
} else {
|
||||
node_text.to_string()
|
||||
};
|
||||
|
||||
|
||||
println!(
|
||||
"{}{} [{}:{}] '{}'",
|
||||
indent_str,
|
||||
@@ -19,7 +19,7 @@ fn print_tree(node: tree_sitter::Node, source: &str, indent: usize) {
|
||||
node.start_position().column + 1,
|
||||
preview.replace('\n', "\\n")
|
||||
);
|
||||
|
||||
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
print_tree(child, source, indent + 1);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! Inspect tree-sitter AST structure for Python code
|
||||
|
||||
use tree_sitter::{Parser, Language};
|
||||
use tree_sitter::{Language, Parser};
|
||||
|
||||
fn print_tree(node: tree_sitter::Node, source: &str, indent: usize) {
|
||||
let indent_str = " ".repeat(indent);
|
||||
@@ -10,7 +10,7 @@ fn print_tree(node: tree_sitter::Node, source: &str, indent: usize) {
|
||||
} else {
|
||||
node_text.to_string()
|
||||
};
|
||||
|
||||
|
||||
println!(
|
||||
"{}{} [{}:{}] '{}'",
|
||||
indent_str,
|
||||
@@ -19,7 +19,7 @@ fn print_tree(node: tree_sitter::Node, source: &str, indent: usize) {
|
||||
node.start_position().column + 1,
|
||||
preview.replace('\n', "\\n")
|
||||
);
|
||||
|
||||
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
print_tree(child, source, indent + 1);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Test Python async query
|
||||
|
||||
use tree_sitter::{Parser, Query, QueryCursor, Language};
|
||||
use streaming_iterator::StreamingIterator;
|
||||
use tree_sitter::{Language, Parser, Query, QueryCursor};
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let source_code = r#"
|
||||
|
||||
@@ -3,8 +3,8 @@ use anyhow::{anyhow, Result};
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use tree_sitter::{Language, Parser, Query, QueryCursor};
|
||||
use streaming_iterator::StreamingIterator;
|
||||
use tree_sitter::{Language, Parser, Query, QueryCursor};
|
||||
use walkdir::WalkDir;
|
||||
|
||||
pub struct TreeSitterSearcher {
|
||||
@@ -47,10 +47,11 @@ impl TreeSitterSearcher {
|
||||
.set_language(&language)
|
||||
.map_err(|e| anyhow!("Failed to set JavaScript language: {}", e))?;
|
||||
parsers.insert("javascript".to_string(), parser);
|
||||
|
||||
|
||||
// Create separate parser for "js" alias
|
||||
let mut parser_js = Parser::new();
|
||||
parser_js.set_language(&language)
|
||||
parser_js
|
||||
.set_language(&language)
|
||||
.map_err(|e| anyhow!("Failed to set JavaScript language: {}", e))?;
|
||||
parsers.insert("js".to_string(), parser_js);
|
||||
languages.insert("javascript".to_string(), language.clone());
|
||||
@@ -65,10 +66,11 @@ impl TreeSitterSearcher {
|
||||
.set_language(&language)
|
||||
.map_err(|e| anyhow!("Failed to set TypeScript language: {}", e))?;
|
||||
parsers.insert("typescript".to_string(), parser);
|
||||
|
||||
|
||||
// Create separate parser for "ts" alias
|
||||
let mut parser_ts = Parser::new();
|
||||
parser_ts.set_language(&language)
|
||||
parser_ts
|
||||
.set_language(&language)
|
||||
.map_err(|e| anyhow!("Failed to set TypeScript language: {}", e))?;
|
||||
parsers.insert("ts".to_string(), parser_ts);
|
||||
languages.insert("typescript".to_string(), language.clone());
|
||||
@@ -215,8 +217,8 @@ impl TreeSitterSearcher {
|
||||
.ok_or_else(|| anyhow!("Language not found: {}", spec.language))?;
|
||||
|
||||
// Parse query
|
||||
let query = Query::new(language, &spec.query)
|
||||
.map_err(|e| anyhow!("Invalid query: {}", e))?;
|
||||
let query =
|
||||
Query::new(language, &spec.query).map_err(|e| anyhow!("Invalid query: {}", e))?;
|
||||
|
||||
let mut matches = Vec::new();
|
||||
let mut files_searched = 0;
|
||||
@@ -255,11 +257,8 @@ impl TreeSitterSearcher {
|
||||
if let Ok(source_code) = fs::read_to_string(path) {
|
||||
if let Some(tree) = parser.parse(&source_code, None) {
|
||||
let mut cursor = QueryCursor::new();
|
||||
let mut query_matches = cursor.matches(
|
||||
&query,
|
||||
tree.root_node(),
|
||||
source_code.as_bytes(),
|
||||
);
|
||||
let mut query_matches =
|
||||
cursor.matches(&query, tree.root_node(), source_code.as_bytes());
|
||||
|
||||
query_matches.advance();
|
||||
while let Some(query_match) = query_matches.get() {
|
||||
@@ -308,7 +307,7 @@ impl TreeSitterSearcher {
|
||||
captures: captures_map,
|
||||
context,
|
||||
});
|
||||
|
||||
|
||||
query_matches.advance();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,15 +106,15 @@ impl ErrorContext {
|
||||
error!("Session ID: {:?}", self.session_id);
|
||||
error!("Context Tokens: {}", self.context_tokens);
|
||||
error!("Last Prompt: {}", self.last_prompt);
|
||||
|
||||
|
||||
if let Some(ref req) = self.raw_request {
|
||||
error!("Raw Request: {}", req);
|
||||
}
|
||||
|
||||
|
||||
if let Some(ref resp) = self.raw_response {
|
||||
error!("Raw Response: {}", resp);
|
||||
}
|
||||
|
||||
|
||||
error!("Stack Trace:\n{}", self.stack_trace);
|
||||
error!("=== END ERROR DETAILS ===");
|
||||
|
||||
@@ -191,23 +191,36 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType {
|
||||
let error_str = error.to_string().to_lowercase();
|
||||
|
||||
// Check for recoverable error patterns
|
||||
if error_str.contains("rate limit") || error_str.contains("rate_limit") || error_str.contains("429") {
|
||||
if error_str.contains("rate limit")
|
||||
|| error_str.contains("rate_limit")
|
||||
|| error_str.contains("429")
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::RateLimit);
|
||||
}
|
||||
|
||||
if error_str.contains("network") || error_str.contains("connection") ||
|
||||
error_str.contains("dns") || error_str.contains("refused") {
|
||||
if error_str.contains("network")
|
||||
|| error_str.contains("connection")
|
||||
|| error_str.contains("dns")
|
||||
|| error_str.contains("refused")
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::NetworkError);
|
||||
}
|
||||
|
||||
if error_str.contains("500") || error_str.contains("502") ||
|
||||
error_str.contains("503") || error_str.contains("504") ||
|
||||
error_str.contains("server error") || error_str.contains("internal error") {
|
||||
if error_str.contains("500")
|
||||
|| error_str.contains("502")
|
||||
|| error_str.contains("503")
|
||||
|| error_str.contains("504")
|
||||
|| error_str.contains("server error")
|
||||
|| error_str.contains("internal error")
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::ServerError);
|
||||
}
|
||||
|
||||
if error_str.contains("busy") || error_str.contains("overloaded") ||
|
||||
error_str.contains("capacity") || error_str.contains("unavailable") {
|
||||
if error_str.contains("busy")
|
||||
|| error_str.contains("overloaded")
|
||||
|| error_str.contains("capacity")
|
||||
|| error_str.contains("unavailable")
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::ModelBusy);
|
||||
}
|
||||
|
||||
@@ -216,18 +229,24 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType {
|
||||
error_str.contains("timed out") ||
|
||||
error_str.contains("operation timed out") ||
|
||||
error_str.contains("request or response body error") || // Common timeout pattern
|
||||
error_str.contains("stream error") && error_str.contains("timed out") {
|
||||
error_str.contains("stream error") && error_str.contains("timed out")
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::Timeout);
|
||||
}
|
||||
|
||||
// Check for context length exceeded errors (HTTP 400 with specific messages)
|
||||
if (error_str.contains("400") || error_str.contains("bad request")) &&
|
||||
(error_str.contains("context length") || error_str.contains("prompt is too long") ||
|
||||
error_str.contains("maximum context length") || error_str.contains("context_length_exceeded")) {
|
||||
if (error_str.contains("400") || error_str.contains("bad request"))
|
||||
&& (error_str.contains("context length")
|
||||
|| error_str.contains("prompt is too long")
|
||||
|| error_str.contains("maximum context length")
|
||||
|| error_str.contains("context_length_exceeded"))
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::ContextLengthExceeded);
|
||||
}
|
||||
|
||||
if error_str.contains("token") && (error_str.contains("limit") || error_str.contains("exceeded")) {
|
||||
if error_str.contains("token")
|
||||
&& (error_str.contains("limit") || error_str.contains("exceeded"))
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::TokenLimit);
|
||||
}
|
||||
|
||||
@@ -239,12 +258,14 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType {
|
||||
fn calculate_autonomous_retry_delay(attempt: u32) -> Duration {
|
||||
use rand::Rng;
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
|
||||
// Distribute 6 retries over 10 minutes (600 seconds)
|
||||
// Base delays: 10s, 30s, 60s, 120s, 180s, 200s = 600s total
|
||||
let base_delays_ms = [10000, 30000, 60000, 120000, 180000, 200000];
|
||||
let base_delay = base_delays_ms.get(attempt.saturating_sub(1) as usize).unwrap_or(&200000);
|
||||
|
||||
let base_delay = base_delays_ms
|
||||
.get(attempt.saturating_sub(1) as usize)
|
||||
.unwrap_or(&200000);
|
||||
|
||||
// Add jitter of ±30% to prevent thundering herd
|
||||
let jitter = (*base_delay as f64 * 0.3 * rng.gen::<f64>()) as u64;
|
||||
let final_delay = if rng.gen_bool(0.5) {
|
||||
@@ -252,7 +273,7 @@ fn calculate_autonomous_retry_delay(attempt: u32) -> Duration {
|
||||
} else {
|
||||
base_delay.saturating_sub(jitter)
|
||||
};
|
||||
|
||||
|
||||
Duration::from_millis(final_delay)
|
||||
}
|
||||
|
||||
@@ -261,14 +282,18 @@ pub fn calculate_retry_delay(attempt: u32, is_autonomous: bool) -> Duration {
|
||||
if is_autonomous {
|
||||
return calculate_autonomous_retry_delay(attempt);
|
||||
}
|
||||
|
||||
|
||||
use rand::Rng;
|
||||
let max_retry_delay_ms = if is_autonomous { AUTONOMOUS_MAX_RETRY_DELAY_MS } else { DEFAULT_MAX_RETRY_DELAY_MS };
|
||||
|
||||
let max_retry_delay_ms = if is_autonomous {
|
||||
AUTONOMOUS_MAX_RETRY_DELAY_MS
|
||||
} else {
|
||||
DEFAULT_MAX_RETRY_DELAY_MS
|
||||
};
|
||||
|
||||
// Exponential backoff: delay = base * 2^attempt
|
||||
let base_delay = BASE_RETRY_DELAY_MS * (2_u64.pow(attempt.saturating_sub(1)));
|
||||
let capped_delay = base_delay.min(max_retry_delay_ms);
|
||||
|
||||
|
||||
// Add jitter to prevent thundering herd
|
||||
let mut rng = rand::thread_rng();
|
||||
let jitter = (capped_delay as f64 * JITTER_FACTOR * rng.gen::<f64>()) as u64;
|
||||
@@ -277,7 +302,7 @@ pub fn calculate_retry_delay(attempt: u32, is_autonomous: bool) -> Duration {
|
||||
} else {
|
||||
capped_delay.saturating_sub(jitter)
|
||||
};
|
||||
|
||||
|
||||
Duration::from_millis(final_delay)
|
||||
}
|
||||
|
||||
@@ -298,7 +323,7 @@ where
|
||||
|
||||
loop {
|
||||
attempt += 1;
|
||||
|
||||
|
||||
match operation().await {
|
||||
Ok(result) => {
|
||||
if attempt > 1 {
|
||||
@@ -321,19 +346,19 @@ where
|
||||
context.clone().log_error(&error);
|
||||
return Err(error);
|
||||
}
|
||||
|
||||
|
||||
let delay = calculate_retry_delay(attempt, is_autonomous);
|
||||
warn!(
|
||||
"Recoverable error ({:?}) in '{}' (attempt {}/{}). Retrying in {:?}...",
|
||||
recoverable_type, operation_name, attempt, max_attempts, delay
|
||||
);
|
||||
warn!("Error details: {}", error);
|
||||
|
||||
|
||||
// Special handling for token limit errors
|
||||
if matches!(recoverable_type, RecoverableError::TokenLimit) {
|
||||
info!("Token limit error detected. Consider triggering summarization.");
|
||||
}
|
||||
|
||||
|
||||
tokio::time::sleep(delay).await;
|
||||
_last_error = Some(error);
|
||||
}
|
||||
@@ -359,18 +384,22 @@ fn truncate_for_logging(s: &str, max_len: usize) -> String {
|
||||
// Find a safe UTF-8 boundary to truncate at
|
||||
// We need to ensure we don't cut in the middle of a multi-byte character
|
||||
let mut truncate_at = max_len;
|
||||
|
||||
|
||||
// Walk backwards from max_len to find a character boundary
|
||||
while truncate_at > 0 && !s.is_char_boundary(truncate_at) {
|
||||
truncate_at -= 1;
|
||||
}
|
||||
|
||||
|
||||
// If we couldn't find a boundary (shouldn't happen), use a safe default
|
||||
if truncate_at == 0 {
|
||||
truncate_at = max_len.min(s.len());
|
||||
}
|
||||
|
||||
format!("{}... (truncated, {} total bytes)", &s[..truncate_at], s.len())
|
||||
|
||||
format!(
|
||||
"{}... (truncated, {} total bytes)",
|
||||
&s[..truncate_at],
|
||||
s.len()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -398,42 +427,69 @@ mod tests {
|
||||
fn test_error_classification() {
|
||||
// Rate limit errors
|
||||
let error = anyhow!("Rate limit exceeded");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::RateLimit));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::RateLimit)
|
||||
);
|
||||
|
||||
let error = anyhow!("HTTP 429 Too Many Requests");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::RateLimit));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::RateLimit)
|
||||
);
|
||||
|
||||
// Network errors
|
||||
let error = anyhow!("Network connection failed");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::NetworkError));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::NetworkError)
|
||||
);
|
||||
|
||||
// Server errors
|
||||
let error = anyhow!("HTTP 503 Service Unavailable");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ServerError));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::ServerError)
|
||||
);
|
||||
|
||||
// Model busy
|
||||
let error = anyhow!("Model is busy, please try again");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ModelBusy));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::ModelBusy)
|
||||
);
|
||||
|
||||
// Timeout
|
||||
let error = anyhow!("Request timed out");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::Timeout));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::Timeout)
|
||||
);
|
||||
|
||||
// Token limit
|
||||
let error = anyhow!("Token limit exceeded");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::TokenLimit));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::TokenLimit)
|
||||
);
|
||||
|
||||
// Context length exceeded
|
||||
let error = anyhow!("HTTP 400 Bad Request: context length exceeded");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ContextLengthExceeded));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)
|
||||
);
|
||||
|
||||
let error = anyhow!("Error 400: prompt is too long");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ContextLengthExceeded));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)
|
||||
);
|
||||
|
||||
// Non-recoverable
|
||||
let error = anyhow!("Invalid API key");
|
||||
assert_eq!(classify_error(&error), ErrorType::NonRecoverable);
|
||||
|
||||
|
||||
let error = anyhow!("Malformed request");
|
||||
assert_eq!(classify_error(&error), ErrorType::NonRecoverable);
|
||||
}
|
||||
@@ -444,17 +500,17 @@ mod tests {
|
||||
let delay1 = calculate_retry_delay(1, false);
|
||||
let delay2 = calculate_retry_delay(2, false);
|
||||
let delay3 = calculate_retry_delay(3, false);
|
||||
|
||||
|
||||
// Due to jitter, we can't test exact values, but the base should increase
|
||||
assert!(delay1.as_millis() >= (BASE_RETRY_DELAY_MS as f64 * 0.7) as u128);
|
||||
assert!(delay1.as_millis() <= (BASE_RETRY_DELAY_MS as f64 * 1.3) as u128);
|
||||
|
||||
|
||||
// Delay 2 should be roughly 2x delay 1 (minus jitter)
|
||||
assert!(delay2.as_millis() >= delay1.as_millis());
|
||||
|
||||
|
||||
// Delay 3 should be roughly 2x delay 2 (minus jitter)
|
||||
assert!(delay3.as_millis() >= delay2.as_millis());
|
||||
|
||||
|
||||
// Test max cap
|
||||
let delay_max = calculate_retry_delay(10, false);
|
||||
assert!(delay_max.as_millis() <= (DEFAULT_MAX_RETRY_DELAY_MS as f64 * 1.3) as u128);
|
||||
@@ -469,7 +525,7 @@ mod tests {
|
||||
let delay4 = calculate_retry_delay(4, true);
|
||||
let delay5 = calculate_retry_delay(5, true);
|
||||
let delay6 = calculate_retry_delay(6, true);
|
||||
|
||||
|
||||
// Base delays should be around: 10s, 30s, 60s, 120s, 180s, 200s
|
||||
// With ±30% jitter
|
||||
assert!(delay1.as_millis() >= 7000 && delay1.as_millis() <= 13000);
|
||||
@@ -484,14 +540,14 @@ mod tests {
|
||||
fn test_truncate_for_logging() {
|
||||
let short_text = "Hello, world!";
|
||||
assert_eq!(truncate_for_logging(short_text, 20), "Hello, world!");
|
||||
|
||||
|
||||
let long_text = "This is a very long text that should be truncated for logging purposes";
|
||||
let truncated = truncate_for_logging(long_text, 20);
|
||||
assert!(truncated.starts_with("This is a very long "));
|
||||
assert!(truncated.contains("truncated"));
|
||||
assert!(truncated.contains("total bytes"));
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_truncate_with_multibyte_chars() {
|
||||
// Test with multi-byte UTF-8 characters
|
||||
@@ -499,7 +555,7 @@ mod tests {
|
||||
let truncated = truncate_for_logging(text_with_emoji, 10);
|
||||
// Should truncate at a valid UTF-8 boundary
|
||||
assert!(truncated.starts_with("Hello "));
|
||||
|
||||
|
||||
// Test with box-drawing characters like the one causing the panic
|
||||
let text_with_box = "Some text ┌─────┐ more text";
|
||||
let truncated = truncate_for_logging(text_with_box, 12);
|
||||
|
||||
@@ -17,7 +17,7 @@ mod tests {
|
||||
"test prompt".to_string(),
|
||||
None,
|
||||
100,
|
||||
false, // quiet parameter
|
||||
false, // quiet parameter
|
||||
);
|
||||
|
||||
let result = retry_with_backoff(
|
||||
@@ -57,7 +57,7 @@ mod tests {
|
||||
"test prompt".to_string(),
|
||||
None,
|
||||
100,
|
||||
false, // quiet parameter
|
||||
false, // quiet parameter
|
||||
);
|
||||
|
||||
let result: Result<&str, _> = retry_with_backoff(
|
||||
@@ -91,7 +91,7 @@ mod tests {
|
||||
"test prompt".to_string(),
|
||||
None,
|
||||
100,
|
||||
false, // quiet parameter
|
||||
false, // quiet parameter
|
||||
);
|
||||
|
||||
let result: Result<&str, _> = retry_with_backoff(
|
||||
@@ -124,7 +124,7 @@ mod tests {
|
||||
long_prompt,
|
||||
None,
|
||||
100,
|
||||
false, // quiet parameter
|
||||
false, // quiet parameter
|
||||
);
|
||||
|
||||
// The prompt should be truncated to 1000 chars
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
// 4. Return everything else as the final filtered string
|
||||
|
||||
//! JSON tool call filtering for streaming LLM responses.
|
||||
//!
|
||||
//!
|
||||
//! This module filters out JSON tool calls from LLM output streams while preserving
|
||||
//! regular text content. It uses a state machine to handle streaming chunks.
|
||||
|
||||
@@ -29,7 +29,7 @@ struct FixedJsonToolState {
|
||||
brace_depth: i32,
|
||||
buffer: String,
|
||||
json_start_in_buffer: Option<usize>, // Position where confirmed JSON tool call starts
|
||||
content_returned_up_to: usize, // Track how much content we've already returned
|
||||
content_returned_up_to: usize, // Track how much content we've already returned
|
||||
potential_json_start: Option<usize>, // Where the potential JSON started
|
||||
}
|
||||
|
||||
|
||||
@@ -358,8 +358,8 @@ More text"#;
|
||||
// 2. Then the same complete JSON appears
|
||||
let chunks = vec![
|
||||
"Some text\n",
|
||||
r#"{"tool": "str_replace", "args": {"diff":"...","file_path":"./crates/g3-cli"#, // Truncated
|
||||
r#"{"tool": "str_replace", "args": {"diff":"...","file_path":"./crates/g3-cli/src/lib.rs"}}"#, // Complete
|
||||
r#"{"tool": "str_replace", "args": {"diff":"...","file_path":"./crates/g3-cli"#, // Truncated
|
||||
r#"{"tool": "str_replace", "args": {"diff":"...","file_path":"./crates/g3-cli/src/lib.rs"}}"#, // Complete
|
||||
"\nMore text",
|
||||
];
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,19 +7,19 @@ use std::path::{Path, PathBuf};
|
||||
pub struct Project {
|
||||
/// The workspace directory for the project
|
||||
pub workspace_dir: PathBuf,
|
||||
|
||||
|
||||
/// Path to the requirements document (for autonomous mode)
|
||||
pub requirements_path: Option<PathBuf>,
|
||||
|
||||
|
||||
/// Override requirements text (takes precedence over requirements_path)
|
||||
pub requirements_text: Option<String>,
|
||||
|
||||
|
||||
/// Whether the project is in autonomous mode
|
||||
pub autonomous: bool,
|
||||
|
||||
|
||||
/// Project name (derived from workspace directory name)
|
||||
pub name: String,
|
||||
|
||||
|
||||
/// Session ID for tracking
|
||||
pub session_id: Option<String>,
|
||||
}
|
||||
@@ -32,7 +32,7 @@ impl Project {
|
||||
.and_then(|n| n.to_str())
|
||||
.unwrap_or("unnamed")
|
||||
.to_string();
|
||||
|
||||
|
||||
Self {
|
||||
workspace_dir,
|
||||
requirements_path: None,
|
||||
@@ -42,33 +42,36 @@ impl Project {
|
||||
session_id: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Create a project for autonomous mode
|
||||
pub fn new_autonomous(workspace_dir: PathBuf) -> Result<Self> {
|
||||
let mut project = Self::new(workspace_dir.clone());
|
||||
project.autonomous = true;
|
||||
|
||||
|
||||
// Look for requirements.md in the workspace directory
|
||||
let requirements_path = workspace_dir.join("requirements.md");
|
||||
if requirements_path.exists() {
|
||||
project.requirements_path = Some(requirements_path);
|
||||
}
|
||||
|
||||
|
||||
Ok(project)
|
||||
}
|
||||
|
||||
|
||||
/// Create a project for autonomous mode with requirements text override
|
||||
pub fn new_autonomous_with_requirements(workspace_dir: PathBuf, requirements_text: String) -> Result<Self> {
|
||||
pub fn new_autonomous_with_requirements(
|
||||
workspace_dir: PathBuf,
|
||||
requirements_text: String,
|
||||
) -> Result<Self> {
|
||||
let mut project = Self::new(workspace_dir.clone());
|
||||
project.autonomous = true;
|
||||
project.requirements_text = Some(requirements_text);
|
||||
|
||||
|
||||
// Don't look for requirements.md file when text is provided
|
||||
// The text override takes precedence
|
||||
|
||||
|
||||
Ok(project)
|
||||
}
|
||||
|
||||
|
||||
/// Set the workspace directory and update related paths
|
||||
pub fn set_workspace(&mut self, workspace_dir: PathBuf) {
|
||||
self.workspace_dir = workspace_dir.clone();
|
||||
@@ -77,7 +80,7 @@ impl Project {
|
||||
.and_then(|n| n.to_str())
|
||||
.unwrap_or("unnamed")
|
||||
.to_string();
|
||||
|
||||
|
||||
// Update requirements path if in autonomous mode
|
||||
if self.autonomous {
|
||||
let requirements_path = workspace_dir.join("requirements.md");
|
||||
@@ -86,18 +89,18 @@ impl Project {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Get the workspace directory
|
||||
pub fn workspace(&self) -> &Path {
|
||||
&self.workspace_dir
|
||||
}
|
||||
|
||||
|
||||
/// Check if requirements file exists
|
||||
pub fn has_requirements(&self) -> bool {
|
||||
// Has requirements if either text override is provided or requirements file exists
|
||||
self.requirements_text.is_some() || self.requirements_path.is_some()
|
||||
}
|
||||
|
||||
|
||||
/// Read the requirements file content
|
||||
pub fn read_requirements(&self) -> Result<Option<String>> {
|
||||
// Prioritize requirements text override
|
||||
@@ -110,7 +113,7 @@ impl Project {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Create the workspace directory if it doesn't exist
|
||||
pub fn ensure_workspace_exists(&self) -> Result<()> {
|
||||
if !self.workspace_dir.exists() {
|
||||
@@ -118,18 +121,18 @@ impl Project {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
/// Change to the workspace directory
|
||||
pub fn enter_workspace(&self) -> Result<()> {
|
||||
std::env::set_current_dir(&self.workspace_dir)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
/// Get the logs directory for the project
|
||||
pub fn logs_dir(&self) -> PathBuf {
|
||||
self.workspace_dir.join("logs")
|
||||
}
|
||||
|
||||
|
||||
/// Ensure the logs directory exists
|
||||
pub fn ensure_logs_dir(&self) -> Result<()> {
|
||||
let logs_dir = self.logs_dir();
|
||||
|
||||
@@ -189,7 +189,7 @@ Do not explain what you're going to do - just do it by calling the tools.
|
||||
";
|
||||
|
||||
pub const SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE: &'static str =
|
||||
concatcp!(SYSTEM_NATIVE_TOOL_CALLS, CODING_STYLE);
|
||||
concatcp!(SYSTEM_NATIVE_TOOL_CALLS, CODING_STYLE);
|
||||
|
||||
/// Generate system prompt based on whether multiple tool calls are allowed
|
||||
pub fn get_system_prompt_for_native(allow_multiple: bool) -> String {
|
||||
|
||||
@@ -30,7 +30,7 @@ impl TaskResult {
|
||||
// Look for the final_output marker pattern
|
||||
// The final_output content typically appears after the tool is called
|
||||
// and is the substantive content that follows
|
||||
|
||||
|
||||
// First, try to find if there's a clear final_output section
|
||||
// This would be the content after the last tool execution
|
||||
if let Some(final_output_pos) = content_without_timing.rfind("final_output") {
|
||||
@@ -39,7 +39,7 @@ impl TaskResult {
|
||||
if let Some(content_start) = content_without_timing[final_output_pos..].find('\n') {
|
||||
let start_pos = final_output_pos + content_start + 1;
|
||||
let final_content = &content_without_timing[start_pos..];
|
||||
|
||||
|
||||
// Trim and return the complete content
|
||||
let trimmed = final_content.trim();
|
||||
if !trimmed.is_empty() {
|
||||
@@ -47,7 +47,7 @@ impl TaskResult {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Fallback to the original extract_last_block behavior if we can't find final_output
|
||||
// This maintains backward compatibility
|
||||
self.extract_last_block()
|
||||
@@ -62,12 +62,13 @@ impl TaskResult {
|
||||
} else {
|
||||
&self.response
|
||||
};
|
||||
|
||||
|
||||
// Split by double newlines to find the last substantial block
|
||||
let blocks: Vec<&str> = content_without_timing.split("\n\n").collect();
|
||||
|
||||
|
||||
// Find the last non-empty block that isn't just whitespace
|
||||
blocks.iter()
|
||||
blocks
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|block| !block.trim().is_empty())
|
||||
.map(|block| block.trim().to_string())
|
||||
@@ -79,7 +80,8 @@ impl TaskResult {
|
||||
|
||||
/// Check if the response contains an approval (for autonomous mode)
|
||||
pub fn is_approved(&self) -> bool {
|
||||
self.extract_final_output().contains("IMPLEMENTATION_APPROVED")
|
||||
self.extract_final_output()
|
||||
.contains("IMPLEMENTATION_APPROVED")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -91,20 +93,21 @@ mod tests {
|
||||
fn test_extract_last_block() {
|
||||
// Test case 1: Response with timing info
|
||||
let context_window = ContextWindow::new(1000);
|
||||
let response_with_timing = "Some initial content\n\nFinal block content\n\n⏱️ 2.3s | 💭 1.2s".to_string();
|
||||
let response_with_timing =
|
||||
"Some initial content\n\nFinal block content\n\n⏱️ 2.3s | 💭 1.2s".to_string();
|
||||
let result = TaskResult::new(response_with_timing, context_window.clone());
|
||||
assert_eq!(result.extract_last_block(), "Final block content");
|
||||
|
||||
|
||||
// Test case 2: Response without timing
|
||||
let response_no_timing = "Some initial content\n\nFinal block content".to_string();
|
||||
let result = TaskResult::new(response_no_timing, context_window.clone());
|
||||
assert_eq!(result.extract_last_block(), "Final block content");
|
||||
|
||||
|
||||
// Test case 3: Response with IMPLEMENTATION_APPROVED
|
||||
let response_approved = "Some content\n\nIMPLEMENTATION_APPROVED".to_string();
|
||||
let result = TaskResult::new(response_approved, context_window.clone());
|
||||
assert!(result.is_approved());
|
||||
|
||||
|
||||
// Test case 4: Response without approval
|
||||
let response_not_approved = "Some content\n\nNeeds more work".to_string();
|
||||
let result = TaskResult::new(response_not_approved, context_window);
|
||||
@@ -114,17 +117,17 @@ mod tests {
|
||||
#[test]
|
||||
fn test_extract_last_block_edge_cases() {
|
||||
let context_window = ContextWindow::new(1000);
|
||||
|
||||
|
||||
// Test empty response
|
||||
let empty_response = "".to_string();
|
||||
let result = TaskResult::new(empty_response, context_window.clone());
|
||||
assert_eq!(result.extract_last_block(), "");
|
||||
|
||||
|
||||
// Test single block
|
||||
let single_block = "Just one block".to_string();
|
||||
let result = TaskResult::new(single_block, context_window.clone());
|
||||
assert_eq!(result.extract_last_block(), "Just one block");
|
||||
|
||||
|
||||
// Test multiple empty blocks
|
||||
let multiple_empty = "\n\n\n\nSome content\n\n\n\n".to_string();
|
||||
let result = TaskResult::new(multiple_empty, context_window);
|
||||
@@ -134,18 +137,22 @@ mod tests {
|
||||
#[test]
|
||||
fn test_extract_final_output() {
|
||||
let context_window = ContextWindow::new(1000);
|
||||
|
||||
|
||||
// Test case 1: Response with final_output tool call
|
||||
let response_with_final_output = "Analyzing files...\n\nCalling final_output\n\nThis is the complete feedback\nwith multiple lines\nand important details\n\n⏱️ 2.3s".to_string();
|
||||
let result = TaskResult::new(response_with_final_output, context_window.clone());
|
||||
assert_eq!(result.extract_final_output(), "This is the complete feedback\nwith multiple lines\nand important details");
|
||||
|
||||
assert_eq!(
|
||||
result.extract_final_output(),
|
||||
"This is the complete feedback\nwith multiple lines\nand important details"
|
||||
);
|
||||
|
||||
// Test case 2: Response with IMPLEMENTATION_APPROVED in final_output
|
||||
let response_approved = "Review complete\n\nfinal_output called\n\nIMPLEMENTATION_APPROVED".to_string();
|
||||
let response_approved =
|
||||
"Review complete\n\nfinal_output called\n\nIMPLEMENTATION_APPROVED".to_string();
|
||||
let result = TaskResult::new(response_approved, context_window.clone());
|
||||
assert_eq!(result.extract_final_output(), "IMPLEMENTATION_APPROVED");
|
||||
assert!(result.is_approved());
|
||||
|
||||
|
||||
// Test case 3: Response with detailed feedback in final_output
|
||||
let response_feedback = "Checking implementation...\n\nfinal_output\n\nThe following issues need to be addressed:\n1. Missing error handling in main.rs\n2. Tests are not comprehensive\n3. Documentation needs improvement\n\nPlease fix these issues.".to_string();
|
||||
let result = TaskResult::new(response_feedback, context_window.clone());
|
||||
@@ -154,12 +161,12 @@ mod tests {
|
||||
assert!(extracted.contains("1. Missing error handling"));
|
||||
assert!(extracted.contains("Please fix these issues."));
|
||||
assert!(!result.is_approved());
|
||||
|
||||
|
||||
// Test case 4: Response without final_output (fallback to extract_last_block)
|
||||
let response_no_final_output = "Some analysis\n\nFinal thoughts here".to_string();
|
||||
let result = TaskResult::new(response_no_final_output, context_window.clone());
|
||||
assert_eq!(result.extract_final_output(), "Final thoughts here");
|
||||
|
||||
|
||||
// Test case 5: Empty response
|
||||
let empty_response = "".to_string();
|
||||
let result = TaskResult::new(empty_response, context_window);
|
||||
|
||||
@@ -6,15 +6,19 @@ use std::sync::Arc;
|
||||
fn test_task_result_basic_functionality() {
|
||||
// Create a context window with some messages
|
||||
let mut context = ContextWindow::new(10000);
|
||||
context.add_message(Message::new(MessageRole::User, "Test message 1".to_string())
|
||||
);
|
||||
context.add_message(Message::new(MessageRole::Assistant, "Response 1".to_string())
|
||||
);
|
||||
|
||||
context.add_message(Message::new(
|
||||
MessageRole::User,
|
||||
"Test message 1".to_string(),
|
||||
));
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
"Response 1".to_string(),
|
||||
));
|
||||
|
||||
// Create a TaskResult
|
||||
let response = "This is the response\n\nFinal output block".to_string();
|
||||
let result = TaskResult::new(response.clone(), context.clone());
|
||||
|
||||
|
||||
// Test basic properties
|
||||
assert_eq!(result.response, response);
|
||||
assert_eq!(result.context_window.conversation_history.len(), 2);
|
||||
@@ -24,32 +28,32 @@ fn test_task_result_basic_functionality() {
|
||||
#[test]
|
||||
fn test_extract_last_block_various_formats() {
|
||||
let context = ContextWindow::new(1000);
|
||||
|
||||
|
||||
// Test 1: Standard format with multiple blocks
|
||||
let response1 = "First block\n\nSecond block\n\nThird block".to_string();
|
||||
let result1 = TaskResult::new(response1, context.clone());
|
||||
assert_eq!(result1.extract_last_block(), "Third block");
|
||||
|
||||
|
||||
// Test 2: With timing information
|
||||
let response2 = "Content\n\nFinal block\n\n⏱️ 2.3s | 💭 1.2s".to_string();
|
||||
let result2 = TaskResult::new(response2, context.clone());
|
||||
assert_eq!(result2.extract_last_block(), "Final block");
|
||||
|
||||
|
||||
// Test 3: Single line response
|
||||
let response3 = "Single line response".to_string();
|
||||
let result3 = TaskResult::new(response3, context.clone());
|
||||
assert_eq!(result3.extract_last_block(), "Single line response");
|
||||
|
||||
|
||||
// Test 4: Empty response
|
||||
let response4 = "".to_string();
|
||||
let result4 = TaskResult::new(response4, context.clone());
|
||||
assert_eq!(result4.extract_last_block(), "");
|
||||
|
||||
|
||||
// Test 5: Only whitespace
|
||||
let response5 = "\n\n\n \n\n".to_string();
|
||||
let result5 = TaskResult::new(response5, context.clone());
|
||||
assert_eq!(result5.extract_last_block(), "");
|
||||
|
||||
|
||||
// Test 6: Multiple blocks with empty ones
|
||||
let response6 = "First\n\n\n\n\n\nLast block here".to_string();
|
||||
let result6 = TaskResult::new(response6, context.clone());
|
||||
@@ -59,7 +63,7 @@ fn test_extract_last_block_various_formats() {
|
||||
#[test]
|
||||
fn test_is_approved_detection() {
|
||||
let context = ContextWindow::new(1000);
|
||||
|
||||
|
||||
// Test approved cases
|
||||
let approved_responses = vec![
|
||||
"Analysis complete\n\nIMPLEMENTATION_APPROVED",
|
||||
@@ -67,12 +71,16 @@ fn test_is_approved_detection() {
|
||||
"IMPLEMENTATION_APPROVED",
|
||||
"Review done\n\n✅ IMPLEMENTATION_APPROVED - All tests pass",
|
||||
];
|
||||
|
||||
|
||||
for response in approved_responses {
|
||||
let result = TaskResult::new(response.to_string(), context.clone());
|
||||
assert!(result.is_approved(), "Failed to detect approval in: {}", response);
|
||||
assert!(
|
||||
result.is_approved(),
|
||||
"Failed to detect approval in: {}",
|
||||
response
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
// Test not approved cases
|
||||
let not_approved_responses = vec![
|
||||
"Needs more work",
|
||||
@@ -81,10 +89,14 @@ fn test_is_approved_detection() {
|
||||
"Almost there but not APPROVED",
|
||||
"",
|
||||
];
|
||||
|
||||
|
||||
for response in not_approved_responses {
|
||||
let result = TaskResult::new(response.to_string(), context.clone());
|
||||
assert!(!result.is_approved(), "Incorrectly detected approval in: {}", response);
|
||||
assert!(
|
||||
!result.is_approved(),
|
||||
"Incorrectly detected approval in: {}",
|
||||
response
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,33 +105,46 @@ fn test_context_window_preservation() {
|
||||
// Create a context window with specific state
|
||||
let mut context = ContextWindow::new(5000);
|
||||
context.used_tokens = 1234;
|
||||
|
||||
|
||||
// Add some messages
|
||||
for i in 0..5 {
|
||||
context.add_message(Message::new(if i % 2 == 0 { MessageRole::User } else { MessageRole::Assistant }, format!("Message {}", i)));
|
||||
context.add_message(Message::new(
|
||||
if i % 2 == 0 {
|
||||
MessageRole::User
|
||||
} else {
|
||||
MessageRole::Assistant
|
||||
},
|
||||
format!("Message {}", i),
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
// Create TaskResult
|
||||
let result = TaskResult::new("Response".to_string(), context.clone());
|
||||
|
||||
|
||||
// Verify context is preserved
|
||||
assert_eq!(result.context_window.total_tokens, 5000);
|
||||
assert!(result.context_window.used_tokens > 1234); // Should have increased
|
||||
assert_eq!(result.context_window.conversation_history.len(), 5);
|
||||
|
||||
|
||||
// Verify messages are preserved correctly
|
||||
for i in 0..5 {
|
||||
let is_user = matches!(result.context_window.conversation_history[i].role, MessageRole::User);
|
||||
let is_user = matches!(
|
||||
result.context_window.conversation_history[i].role,
|
||||
MessageRole::User
|
||||
);
|
||||
let expected_is_user = i % 2 == 0;
|
||||
assert_eq!(is_user, expected_is_user, "Message {} has wrong role", i);
|
||||
assert_eq!(result.context_window.conversation_history[i].content, format!("Message {}", i));
|
||||
assert_eq!(
|
||||
result.context_window.conversation_history[i].content,
|
||||
format!("Message {}", i)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_coach_feedback_extraction_scenarios() {
|
||||
let context = ContextWindow::new(1000);
|
||||
|
||||
|
||||
// Scenario 1: Coach feedback with file operations and analysis
|
||||
let coach_response = r#"Reading file: src/main.rs
|
||||
📄 File content (23 lines):
|
||||
@@ -133,13 +158,13 @@ The implementation needs the following fixes:
|
||||
1. Add error handling
|
||||
2. Implement missing functions
|
||||
3. Add tests"#;
|
||||
|
||||
|
||||
let result = TaskResult::new(coach_response.to_string(), context.clone());
|
||||
let feedback = result.extract_last_block();
|
||||
assert!(feedback.contains("Add error handling"));
|
||||
assert!(feedback.contains("Implement missing functions"));
|
||||
assert!(feedback.contains("Add tests"));
|
||||
|
||||
|
||||
// Scenario 2: Coach approval
|
||||
let approval_response = r#"Checking compilation...
|
||||
✅ Build successful
|
||||
@@ -148,11 +173,11 @@ Running tests...
|
||||
✅ All tests pass
|
||||
|
||||
IMPLEMENTATION_APPROVED"#;
|
||||
|
||||
|
||||
let result = TaskResult::new(approval_response.to_string(), context.clone());
|
||||
assert!(result.is_approved());
|
||||
assert_eq!(result.extract_last_block(), "IMPLEMENTATION_APPROVED");
|
||||
|
||||
|
||||
// Scenario 3: Complex feedback with timing
|
||||
let complex_response = r#"Tool execution log...
|
||||
|
||||
@@ -163,7 +188,7 @@ The following issues were found:
|
||||
- Missing input validation
|
||||
|
||||
⏱️ 5.2s | 💭 2.1s"#;
|
||||
|
||||
|
||||
let result = TaskResult::new(complex_response.to_string(), context.clone());
|
||||
let feedback = result.extract_last_block();
|
||||
assert!(feedback.contains("Memory leak"));
|
||||
@@ -174,17 +199,18 @@ The following issues were found:
|
||||
#[test]
|
||||
fn test_edge_cases_and_special_characters() {
|
||||
let context = ContextWindow::new(1000);
|
||||
|
||||
|
||||
// Test with special characters and emojis
|
||||
let response_with_emojis = "First part 🚀\n\n✅ Final part with emojis 🎉".to_string();
|
||||
let result = TaskResult::new(response_with_emojis, context.clone());
|
||||
assert_eq!(result.extract_last_block(), "✅ Final part with emojis 🎉");
|
||||
|
||||
|
||||
// Test with code blocks
|
||||
let response_with_code = "Explanation\n\n```rust\nfn main() {}\n```\n\nFinal comment".to_string();
|
||||
let response_with_code =
|
||||
"Explanation\n\n```rust\nfn main() {}\n```\n\nFinal comment".to_string();
|
||||
let result = TaskResult::new(response_with_code, context.clone());
|
||||
assert_eq!(result.extract_last_block(), "Final comment");
|
||||
|
||||
|
||||
// Test with mixed newlines
|
||||
let mixed_newlines = "Part 1\r\n\r\nPart 2\n\nPart 3".to_string();
|
||||
let result = TaskResult::new(mixed_newlines, context.clone());
|
||||
@@ -194,30 +220,33 @@ fn test_edge_cases_and_special_characters() {
|
||||
#[test]
|
||||
fn test_large_response_handling() {
|
||||
let context = ContextWindow::new(100000);
|
||||
|
||||
|
||||
// Create a large response
|
||||
let mut large_response = String::new();
|
||||
for i in 0..100 {
|
||||
large_response.push_str(&format!("Block {} with some content\n\n", i));
|
||||
}
|
||||
large_response.push_str("This is the final block after 100 other blocks");
|
||||
|
||||
|
||||
let result = TaskResult::new(large_response, context);
|
||||
assert_eq!(result.extract_last_block(), "This is the final block after 100 other blocks");
|
||||
assert_eq!(
|
||||
result.extract_last_block(),
|
||||
"This is the final block after 100 other blocks"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_concurrent_access() {
|
||||
use std::thread;
|
||||
|
||||
|
||||
let context = ContextWindow::new(1000);
|
||||
let result = Arc::new(TaskResult::new(
|
||||
"Concurrent test\n\nFinal block".to_string(),
|
||||
context,
|
||||
));
|
||||
|
||||
|
||||
let mut handles = vec![];
|
||||
|
||||
|
||||
// Spawn multiple threads to access the TaskResult
|
||||
for _ in 0..10 {
|
||||
let result_clone = Arc::clone(&result);
|
||||
@@ -225,16 +254,15 @@ fn test_concurrent_access() {
|
||||
// Each thread extracts the last block
|
||||
let block = result_clone.extract_last_block();
|
||||
assert_eq!(block, "Final block");
|
||||
|
||||
|
||||
// Check approval status
|
||||
assert!(!result_clone.is_approved());
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
|
||||
// Wait for all threads to complete
|
||||
for handle in handles {
|
||||
handle.join().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,10 +7,10 @@ mod tilde_expansion_tests {
|
||||
// Test that shellexpand works
|
||||
let path_with_tilde = "~/test.txt";
|
||||
let expanded = shellexpand::tilde(path_with_tilde);
|
||||
|
||||
|
||||
// Get the actual home directory
|
||||
let home = env::var("HOME").expect("HOME environment variable not set");
|
||||
|
||||
|
||||
// Verify expansion happened
|
||||
assert_eq!(expanded.as_ref(), format!("{}/test.txt", home));
|
||||
assert!(!expanded.contains("~"));
|
||||
@@ -20,9 +20,9 @@ mod tilde_expansion_tests {
|
||||
fn test_tilde_expansion_with_subdirs() {
|
||||
let path_with_tilde = "~/Documents/test.txt";
|
||||
let expanded = shellexpand::tilde(path_with_tilde);
|
||||
|
||||
|
||||
let home = env::var("HOME").expect("HOME environment variable not set");
|
||||
|
||||
|
||||
assert_eq!(expanded.as_ref(), format!("{}/Documents/test.txt", home));
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ mod tilde_expansion_tests {
|
||||
fn test_no_tilde_unchanged() {
|
||||
let path_without_tilde = "/absolute/path/test.txt";
|
||||
let expanded = shellexpand::tilde(path_without_tilde);
|
||||
|
||||
|
||||
assert_eq!(expanded.as_ref(), path_without_tilde);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,58 +4,60 @@
|
||||
pub trait UiWriter: Send + Sync {
|
||||
/// Print a simple message
|
||||
fn print(&self, message: &str);
|
||||
|
||||
|
||||
/// Print a message with a newline
|
||||
fn println(&self, message: &str);
|
||||
|
||||
|
||||
/// Print without newline (for progress indicators)
|
||||
fn print_inline(&self, message: &str);
|
||||
|
||||
|
||||
/// Print a system prompt section
|
||||
fn print_system_prompt(&self, prompt: &str);
|
||||
|
||||
|
||||
/// Print a context window status message
|
||||
fn print_context_status(&self, message: &str);
|
||||
|
||||
|
||||
/// Print a context thinning success message with highlight and animation
|
||||
fn print_context_thinning(&self, message: &str);
|
||||
|
||||
|
||||
/// Print a tool execution header
|
||||
fn print_tool_header(&self, tool_name: &str);
|
||||
|
||||
|
||||
/// Print a tool argument
|
||||
fn print_tool_arg(&self, key: &str, value: &str);
|
||||
|
||||
|
||||
/// Print tool output header
|
||||
fn print_tool_output_header(&self);
|
||||
|
||||
|
||||
/// Update the current tool output line (replaces previous line)
|
||||
fn update_tool_output_line(&self, line: &str);
|
||||
|
||||
|
||||
/// Print a tool output line
|
||||
fn print_tool_output_line(&self, line: &str);
|
||||
|
||||
|
||||
/// Print tool output summary (when output is truncated)
|
||||
fn print_tool_output_summary(&self, hidden_count: usize);
|
||||
|
||||
|
||||
/// Print tool execution timing
|
||||
fn print_tool_timing(&self, duration_str: &str);
|
||||
|
||||
|
||||
/// Print the agent prompt indicator
|
||||
fn print_agent_prompt(&self);
|
||||
|
||||
|
||||
/// Print agent response inline (for streaming)
|
||||
fn print_agent_response(&self, content: &str);
|
||||
|
||||
|
||||
/// Notify that an SSE event was received (including pings)
|
||||
fn notify_sse_received(&self);
|
||||
|
||||
|
||||
/// Flush any buffered output
|
||||
fn flush(&self);
|
||||
|
||||
|
||||
/// Returns true if this UI writer wants full, untruncated output
|
||||
/// Default is false (truncate for human readability)
|
||||
fn wants_full_output(&self) -> bool { false }
|
||||
fn wants_full_output(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Prompt the user for a yes/no confirmation
|
||||
fn prompt_user_yes_no(&self, message: &str) -> bool;
|
||||
@@ -86,7 +88,13 @@ impl UiWriter for NullUiWriter {
|
||||
fn print_agent_response(&self, _content: &str) {}
|
||||
fn notify_sse_received(&self) {}
|
||||
fn flush(&self) {}
|
||||
fn wants_full_output(&self) -> bool { false }
|
||||
fn prompt_user_yes_no(&self, _message: &str) -> bool { true }
|
||||
fn prompt_user_choice(&self, _message: &str, _options: &[&str]) -> usize { 0 }
|
||||
}
|
||||
fn wants_full_output(&self) -> bool {
|
||||
false
|
||||
}
|
||||
fn prompt_user_yes_no(&self, _message: &str) -> bool {
|
||||
true
|
||||
}
|
||||
fn prompt_user_choice(&self, _message: &str, _options: &[&str]) -> usize {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ async fn test_find_async_functions() {
|
||||
// Create a temporary test file
|
||||
let test_dir = std::env::temp_dir().join("g3_test_code_search");
|
||||
fs::create_dir_all(&test_dir).unwrap();
|
||||
|
||||
|
||||
let test_file = test_dir.join("test.rs");
|
||||
fs::write(
|
||||
&test_file,
|
||||
@@ -47,7 +47,10 @@ pub async fn another_async(x: i32) -> Result<(), ()> {
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
let search_result = &response.searches[0];
|
||||
assert_eq!(search_result.name, "find_async_functions");
|
||||
assert_eq!(search_result.match_count, 2, "Should find 2 async functions");
|
||||
assert_eq!(
|
||||
search_result.match_count, 2,
|
||||
"Should find 2 async functions"
|
||||
);
|
||||
assert!(search_result.error.is_none());
|
||||
|
||||
// Check that we found the right functions
|
||||
@@ -69,7 +72,7 @@ async fn test_find_all_functions() {
|
||||
// Create a temporary test file
|
||||
let test_dir = std::env::temp_dir().join("g3_test_code_search_2");
|
||||
fs::create_dir_all(&test_dir).unwrap();
|
||||
|
||||
|
||||
let test_file = test_dir.join("test.rs");
|
||||
fs::write(
|
||||
&test_file,
|
||||
@@ -107,7 +110,10 @@ pub async fn another_async(x: i32) -> Result<(), ()> {
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
let search_result = &response.searches[0];
|
||||
assert_eq!(search_result.name, "find_all_functions");
|
||||
assert_eq!(search_result.match_count, 3, "Should find 3 functions total");
|
||||
assert_eq!(
|
||||
search_result.match_count, 3,
|
||||
"Should find 3 functions total"
|
||||
);
|
||||
assert!(search_result.error.is_none());
|
||||
|
||||
// Check that we found all functions
|
||||
@@ -130,7 +136,7 @@ async fn test_find_structs() {
|
||||
// Create a temporary test file
|
||||
let test_dir = std::env::temp_dir().join("g3_test_code_search_3");
|
||||
fs::create_dir_all(&test_dir).unwrap();
|
||||
|
||||
|
||||
let test_file = test_dir.join("test.rs");
|
||||
fs::write(
|
||||
&test_file,
|
||||
@@ -188,7 +194,7 @@ async fn test_context_lines() {
|
||||
// Create a temporary test file
|
||||
let test_dir = std::env::temp_dir().join("g3_test_code_search_4");
|
||||
fs::create_dir_all(&test_dir).unwrap();
|
||||
|
||||
|
||||
let test_file = test_dir.join("test.rs");
|
||||
fs::write(
|
||||
&test_file,
|
||||
@@ -223,16 +229,22 @@ pub fn target_function() {
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
let search_result = &response.searches[0];
|
||||
assert_eq!(search_result.match_count, 1);
|
||||
|
||||
|
||||
let match_result = &search_result.matches[0];
|
||||
assert!(match_result.context.is_some());
|
||||
|
||||
|
||||
let context = match_result.context.as_ref().unwrap();
|
||||
assert!(context.contains("Line 2"), "Should include 2 lines before");
|
||||
assert!(context.contains("target_function"), "Should include the function");
|
||||
assert!(
|
||||
context.contains("target_function"),
|
||||
"Should include the function"
|
||||
);
|
||||
// Note: context_lines=2 means 2 lines before and after the match line (line 4)
|
||||
// So we get lines 2-6, which includes up to println but not the closing brace
|
||||
assert!(context.contains("println"), "Should include 2 lines after the match");
|
||||
assert!(
|
||||
context.contains("println"),
|
||||
"Should include 2 lines after the match"
|
||||
);
|
||||
|
||||
// Cleanup
|
||||
fs::remove_dir_all(&test_dir).ok();
|
||||
@@ -243,7 +255,7 @@ async fn test_multiple_searches() {
|
||||
// Create a temporary test file
|
||||
let test_dir = std::env::temp_dir().join("g3_test_code_search_5");
|
||||
fs::create_dir_all(&test_dir).unwrap();
|
||||
|
||||
|
||||
let test_file = test_dir.join("test.rs");
|
||||
fs::write(
|
||||
&test_file,
|
||||
@@ -301,7 +313,7 @@ async fn test_python_search() {
|
||||
// Create a temporary Python test file
|
||||
let test_dir = std::env::temp_dir().join("g3_test_code_search_python");
|
||||
fs::create_dir_all(&test_dir).unwrap();
|
||||
|
||||
|
||||
let test_file = test_dir.join("test.py");
|
||||
fs::write(
|
||||
&test_file,
|
||||
@@ -338,14 +350,17 @@ class MyClass:
|
||||
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
let search_result = &response.searches[0];
|
||||
assert_eq!(search_result.match_count, 3, "Should find 3 functions in Python (2 regular + 1 async + 1 method)");
|
||||
|
||||
assert_eq!(
|
||||
search_result.match_count, 3,
|
||||
"Should find 3 functions in Python (2 regular + 1 async + 1 method)"
|
||||
);
|
||||
|
||||
let function_names: Vec<String> = search_result
|
||||
.matches
|
||||
.iter()
|
||||
.filter_map(|m| m.captures.get("name").cloned())
|
||||
.collect();
|
||||
|
||||
|
||||
assert!(function_names.contains(&"regular_function".to_string()));
|
||||
assert!(function_names.contains(&"async_function".to_string()));
|
||||
assert!(function_names.contains(&"method".to_string()));
|
||||
@@ -359,7 +374,7 @@ async fn test_javascript_search() {
|
||||
// Create a temporary JavaScript test file
|
||||
let test_dir = std::env::temp_dir().join("g3_test_code_search_js");
|
||||
fs::create_dir_all(&test_dir).unwrap();
|
||||
|
||||
|
||||
let test_file = test_dir.join("test.js");
|
||||
fs::write(
|
||||
&test_file,
|
||||
@@ -396,14 +411,17 @@ class MyClass {
|
||||
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
let search_result = &response.searches[0];
|
||||
assert_eq!(search_result.match_count, 2, "Should find 2 functions in JavaScript");
|
||||
|
||||
assert_eq!(
|
||||
search_result.match_count, 2,
|
||||
"Should find 2 functions in JavaScript"
|
||||
);
|
||||
|
||||
let function_names: Vec<String> = search_result
|
||||
.matches
|
||||
.iter()
|
||||
.filter_map(|m| m.captures.get("name").cloned())
|
||||
.collect();
|
||||
|
||||
|
||||
assert!(function_names.contains(&"regularFunction".to_string()));
|
||||
assert!(function_names.contains(&"asyncFunction".to_string()));
|
||||
|
||||
@@ -420,7 +438,7 @@ async fn test_go_search() {
|
||||
.and_then(|p| p.parent())
|
||||
.unwrap();
|
||||
let test_code_path = workspace_root.join("examples/test_code");
|
||||
|
||||
|
||||
let request = CodeSearchRequest {
|
||||
searches: vec![SearchSpec {
|
||||
name: "go_functions".to_string(),
|
||||
@@ -435,14 +453,19 @@ async fn test_go_search() {
|
||||
|
||||
let response = execute_code_search(request).await.unwrap();
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
|
||||
|
||||
eprintln!("Go search result: {:?}", response.searches[0]);
|
||||
eprintln!("Match count: {}", response.searches[0].matches.len());
|
||||
eprintln!("Error: {:?}", response.searches[0].error);
|
||||
assert!(response.searches[0].matches.len() > 0, "No matches found for Go search");
|
||||
|
||||
assert!(
|
||||
response.searches[0].matches.len() > 0,
|
||||
"No matches found for Go search"
|
||||
);
|
||||
|
||||
// Should find main and greet functions
|
||||
let names: Vec<&str> = response.searches[0].matches.iter()
|
||||
let names: Vec<&str> = response.searches[0]
|
||||
.matches
|
||||
.iter()
|
||||
.filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
|
||||
.collect();
|
||||
assert!(names.contains(&"main"));
|
||||
@@ -458,7 +481,7 @@ async fn test_java_search() {
|
||||
.and_then(|p| p.parent())
|
||||
.unwrap();
|
||||
let test_code_path = workspace_root.join("examples/test_code");
|
||||
|
||||
|
||||
let request = CodeSearchRequest {
|
||||
searches: vec![SearchSpec {
|
||||
name: "java_classes".to_string(),
|
||||
@@ -474,9 +497,11 @@ async fn test_java_search() {
|
||||
let response = execute_code_search(request).await.unwrap();
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
assert!(response.searches[0].matches.len() > 0);
|
||||
|
||||
|
||||
// Should find Example class
|
||||
let names: Vec<&str> = response.searches[0].matches.iter()
|
||||
let names: Vec<&str> = response.searches[0]
|
||||
.matches
|
||||
.iter()
|
||||
.filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
|
||||
.collect();
|
||||
assert!(names.contains(&"Example"));
|
||||
@@ -491,7 +516,7 @@ async fn test_c_search() {
|
||||
.and_then(|p| p.parent())
|
||||
.unwrap();
|
||||
let test_code_path = workspace_root.join("examples/test_code");
|
||||
|
||||
|
||||
let request = CodeSearchRequest {
|
||||
searches: vec![SearchSpec {
|
||||
name: "c_functions".to_string(),
|
||||
@@ -507,9 +532,11 @@ async fn test_c_search() {
|
||||
let response = execute_code_search(request).await.unwrap();
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
assert!(response.searches[0].matches.len() > 0);
|
||||
|
||||
|
||||
// Should find greet, add, and main functions
|
||||
let names: Vec<&str> = response.searches[0].matches.iter()
|
||||
let names: Vec<&str> = response.searches[0]
|
||||
.matches
|
||||
.iter()
|
||||
.filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
|
||||
.collect();
|
||||
assert!(names.contains(&"greet"));
|
||||
@@ -526,7 +553,7 @@ async fn test_cpp_search() {
|
||||
.and_then(|p| p.parent())
|
||||
.unwrap();
|
||||
let test_code_path = workspace_root.join("examples/test_code");
|
||||
|
||||
|
||||
let request = CodeSearchRequest {
|
||||
searches: vec![SearchSpec {
|
||||
name: "cpp_classes".to_string(),
|
||||
@@ -542,9 +569,11 @@ async fn test_cpp_search() {
|
||||
let response = execute_code_search(request).await.unwrap();
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
assert!(response.searches[0].matches.len() > 0);
|
||||
|
||||
|
||||
// Should find Person class
|
||||
let names: Vec<&str> = response.searches[0].matches.iter()
|
||||
let names: Vec<&str> = response.searches[0]
|
||||
.matches
|
||||
.iter()
|
||||
.filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
|
||||
.collect();
|
||||
assert!(names.contains(&"Person"));
|
||||
@@ -568,9 +597,11 @@ async fn test_kotlin_search() {
|
||||
let response = execute_code_search(request).await.unwrap();
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
assert!(response.searches[0].matches.len() > 0);
|
||||
|
||||
|
||||
// Should find Person class
|
||||
let names: Vec<&str> = response.searches[0].matches.iter()
|
||||
let names: Vec<&str> = response.searches[0]
|
||||
.matches
|
||||
.iter()
|
||||
.filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
|
||||
.collect();
|
||||
assert!(names.contains(&"Person"));
|
||||
|
||||
@@ -4,35 +4,35 @@ use g3_providers::{Message, MessageRole};
|
||||
#[test]
|
||||
fn test_thinning_thresholds() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
|
||||
// At 0%, should not thin
|
||||
assert!(!context.should_thin());
|
||||
|
||||
|
||||
// Simulate reaching 50% usage
|
||||
context.used_tokens = 5000;
|
||||
assert!(context.should_thin());
|
||||
|
||||
|
||||
// After thinning at 50%, should not thin again until next threshold
|
||||
context.last_thinning_percentage = 50;
|
||||
assert!(!context.should_thin());
|
||||
|
||||
|
||||
// At 60%, should thin again
|
||||
context.used_tokens = 6000;
|
||||
assert!(context.should_thin());
|
||||
|
||||
|
||||
// After thinning at 60%, should not thin
|
||||
context.last_thinning_percentage = 60;
|
||||
assert!(!context.should_thin());
|
||||
|
||||
|
||||
// At 70%, should thin
|
||||
context.used_tokens = 7000;
|
||||
assert!(context.should_thin());
|
||||
|
||||
|
||||
// At 80%, should thin
|
||||
context.last_thinning_percentage = 70;
|
||||
context.used_tokens = 8000;
|
||||
assert!(context.should_thin());
|
||||
|
||||
|
||||
// After 80%, should not thin (compaction takes over)
|
||||
context.last_thinning_percentage = 80;
|
||||
context.used_tokens = 8500;
|
||||
@@ -42,7 +42,7 @@ fn test_thinning_thresholds() {
|
||||
#[test]
|
||||
fn test_thin_context_basic() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
|
||||
// Add some messages to the first third
|
||||
for i in 0..9 {
|
||||
if i % 2 == 0 {
|
||||
@@ -62,24 +62,25 @@ fn test_thin_context_basic() {
|
||||
// Small tool result (< 1000 chars)
|
||||
format!("Tool result: small result {}", i)
|
||||
};
|
||||
|
||||
context.add_message(Message::new(
|
||||
MessageRole::User,
|
||||
content,
|
||||
));
|
||||
|
||||
context.add_message(Message::new(MessageRole::User, content));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Trigger thinning at 50%
|
||||
context.used_tokens = 5000;
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
|
||||
println!("Thinning summary: {}", summary);
|
||||
|
||||
|
||||
// Should have thinned at least 1 large tool result in the first third
|
||||
assert!(summary.contains("1 tool result"), "Summary was: {}", summary);
|
||||
assert!(
|
||||
summary.contains("1 tool result"),
|
||||
"Summary was: {}",
|
||||
summary
|
||||
);
|
||||
assert!(summary.contains("50%"));
|
||||
|
||||
|
||||
// Check that the large tool results were replaced
|
||||
let first_third_end = context.conversation_history.len() / 3;
|
||||
for i in 0..first_third_end {
|
||||
@@ -96,13 +97,13 @@ fn test_thin_context_basic() {
|
||||
#[test]
|
||||
fn test_thin_write_file_tool_calls() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
|
||||
// Add some messages including a write_file tool call with large content
|
||||
context.add_message(Message::new(
|
||||
MessageRole::User,
|
||||
"Please create a large file".to_string(),
|
||||
));
|
||||
|
||||
|
||||
// Add an assistant message with a write_file tool call containing large content
|
||||
let large_content = "x".repeat(1500);
|
||||
let tool_call_json = format!(
|
||||
@@ -113,12 +114,12 @@ fn test_thin_write_file_tool_calls() {
|
||||
MessageRole::Assistant,
|
||||
format!("I'll create that file.\n\n{}", tool_call_json),
|
||||
));
|
||||
|
||||
|
||||
context.add_message(Message::new(
|
||||
MessageRole::User,
|
||||
"Tool result: ✅ Successfully wrote 1500 lines".to_string(),
|
||||
));
|
||||
|
||||
|
||||
// Add more messages to ensure we have enough for "first third" logic
|
||||
for i in 0..6 {
|
||||
context.add_message(Message::new(
|
||||
@@ -126,16 +127,16 @@ fn test_thin_write_file_tool_calls() {
|
||||
format!("Response {}", i),
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
// Trigger thinning at 50%
|
||||
context.used_tokens = 5000;
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
|
||||
println!("Thinning summary: {}", summary);
|
||||
|
||||
|
||||
// Should have thinned the write_file tool call
|
||||
assert!(summary.contains("tool call") || summary.contains("chars saved"));
|
||||
|
||||
|
||||
// Check that the large content was replaced with a file reference
|
||||
let first_third_end = context.conversation_history.len() / 3;
|
||||
for i in 0..first_third_end {
|
||||
@@ -152,15 +153,19 @@ fn test_thin_write_file_tool_calls() {
|
||||
#[test]
|
||||
fn test_thin_str_replace_tool_calls() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
|
||||
// Add some messages including a str_replace tool call with large diff
|
||||
context.add_message(Message::new(
|
||||
MessageRole::User,
|
||||
"Please update the file".to_string(),
|
||||
));
|
||||
|
||||
|
||||
// Add an assistant message with a str_replace tool call containing large diff
|
||||
let large_diff = format!("--- old\n{}\n+++ new\n{}", "-old line\n".repeat(100), "+new line\n".repeat(100));
|
||||
let large_diff = format!(
|
||||
"--- old\n{}\n+++ new\n{}",
|
||||
"-old line\n".repeat(100),
|
||||
"+new line\n".repeat(100)
|
||||
);
|
||||
let tool_call_json = format!(
|
||||
r#"{{"tool": "str_replace", "args": {{"file_path": "test.txt", "diff": "{}"}}}}"#,
|
||||
large_diff.replace('\n', "\\n")
|
||||
@@ -169,12 +174,12 @@ fn test_thin_str_replace_tool_calls() {
|
||||
MessageRole::Assistant,
|
||||
format!("I'll update that file.\n\n{}", tool_call_json),
|
||||
));
|
||||
|
||||
|
||||
context.add_message(Message::new(
|
||||
MessageRole::User,
|
||||
"Tool result: ✅ applied unified diff".to_string(),
|
||||
));
|
||||
|
||||
|
||||
// Add more messages to ensure we have enough for "first third" logic
|
||||
for i in 0..6 {
|
||||
context.add_message(Message::new(
|
||||
@@ -182,16 +187,16 @@ fn test_thin_str_replace_tool_calls() {
|
||||
format!("Response {}", i),
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
// Trigger thinning at 50%
|
||||
context.used_tokens = 5000;
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
|
||||
println!("Thinning summary: {}", summary);
|
||||
|
||||
|
||||
// Should have thinned the str_replace tool call
|
||||
assert!(summary.contains("tool call") || summary.contains("chars saved"));
|
||||
|
||||
|
||||
// Check that the large diff was replaced with a file reference
|
||||
let first_third_end = context.conversation_history.len() / 3;
|
||||
for i in 0..first_third_end {
|
||||
@@ -209,7 +214,7 @@ fn test_thin_str_replace_tool_calls() {
|
||||
#[test]
|
||||
fn test_thin_context_no_large_results() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
|
||||
// Add only small messages
|
||||
for i in 0..9 {
|
||||
context.add_message(Message::new(
|
||||
@@ -217,10 +222,10 @@ fn test_thin_context_no_large_results() {
|
||||
format!("Tool result: small {}", i),
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
context.used_tokens = 5000;
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
|
||||
// Should report no large results found
|
||||
assert!(summary.contains("no large tool results or tool calls found"));
|
||||
}
|
||||
@@ -228,7 +233,7 @@ fn test_thin_context_no_large_results() {
|
||||
#[test]
|
||||
fn test_thin_context_only_affects_first_third() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
|
||||
// Add 12 messages (first third = 4 messages)
|
||||
for i in 0..12 {
|
||||
let content = if i % 2 == 1 {
|
||||
@@ -237,23 +242,23 @@ fn test_thin_context_only_affects_first_third() {
|
||||
} else {
|
||||
format!("Assistant message {}", i)
|
||||
};
|
||||
|
||||
|
||||
let role = if i % 2 == 1 {
|
||||
MessageRole::User
|
||||
} else {
|
||||
MessageRole::Assistant
|
||||
};
|
||||
|
||||
|
||||
context.add_message(Message::new(role, content));
|
||||
}
|
||||
|
||||
|
||||
context.used_tokens = 5000;
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
|
||||
// First third is 4 messages (indices 0-3), so only indices 1 and 3 should be thinned
|
||||
// That's 2 tool results
|
||||
assert!(summary.contains("2 tool results"));
|
||||
|
||||
|
||||
// Check that messages after the first third are NOT thinned
|
||||
let first_third_end = context.conversation_history.len() / 3;
|
||||
for i in first_third_end..context.conversation_history.len() {
|
||||
@@ -261,8 +266,11 @@ fn test_thin_context_only_affects_first_third() {
|
||||
if matches!(msg.role, MessageRole::User) && msg.content.starts_with("Tool result:") {
|
||||
// These should still be large (not thinned)
|
||||
if i % 2 == 1 {
|
||||
assert!(msg.content.len() > 1000,
|
||||
"Message at index {} should not have been thinned", i);
|
||||
assert!(
|
||||
msg.content.len() > 1000,
|
||||
"Message at index {} should not have been thinned",
|
||||
i
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,28 +6,34 @@ use serial_test::serial;
|
||||
#[serial]
|
||||
fn test_todo_read_results_not_thinned() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
|
||||
// Add a todo_read tool call
|
||||
context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "todo_read", "args": {}}"#.to_string()));
|
||||
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
r#"{"tool": "todo_read", "args": {}}"#.to_string(),
|
||||
));
|
||||
|
||||
// Add a large TODO result (> 500 chars)
|
||||
let large_todo_result = format!(
|
||||
"Tool result: 📝 TODO list:\n{}",
|
||||
"- [ ] Task with long description\n".repeat(50)
|
||||
);
|
||||
context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
|
||||
|
||||
|
||||
// Add more messages to ensure we have enough for "first third" logic
|
||||
for i in 0..6 {
|
||||
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
format!("Response {}", i),
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
// Trigger thinning at 50%
|
||||
context.used_tokens = 5000;
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
|
||||
println!("Thinning summary: {}", summary);
|
||||
|
||||
|
||||
// Check that the TODO result was NOT thinned
|
||||
let first_third_end = context.conversation_history.len() / 3;
|
||||
for i in 0..first_third_end {
|
||||
@@ -53,29 +59,38 @@ fn test_todo_read_results_not_thinned() {
|
||||
#[serial]
|
||||
fn test_todo_write_results_not_thinned() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
|
||||
// Add a todo_write tool call
|
||||
let large_content = "- [ ] Task\n".repeat(100);
|
||||
context.add_message(Message::new(MessageRole::Assistant, format!(r#"{{"tool": "todo_write", "args": {{"content": "{}"}}}}"#, large_content)));
|
||||
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
format!(
|
||||
r#"{{"tool": "todo_write", "args": {{"content": "{}"}}}}"#,
|
||||
large_content
|
||||
),
|
||||
));
|
||||
|
||||
// Add a large TODO write result
|
||||
let large_todo_result = format!(
|
||||
"Tool result: ✅ TODO list updated ({} chars) and saved to todo.g3.md",
|
||||
large_content.len()
|
||||
);
|
||||
context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
|
||||
|
||||
|
||||
// Add more messages
|
||||
for i in 0..6 {
|
||||
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
format!("Response {}", i),
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
// Trigger thinning at 50%
|
||||
context.used_tokens = 5000;
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
|
||||
println!("Thinning summary: {}", summary);
|
||||
|
||||
|
||||
// Check that the TODO write result was NOT thinned
|
||||
let first_third_end = context.conversation_history.len() / 3;
|
||||
for i in 0..first_third_end {
|
||||
@@ -99,31 +114,37 @@ fn test_todo_write_results_not_thinned() {
|
||||
#[serial]
|
||||
fn test_non_todo_results_still_thinned() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
|
||||
// Add a non-TODO tool call (e.g., read_file)
|
||||
context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string()));
|
||||
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
|
||||
));
|
||||
|
||||
// Add a large read_file result (> 500 chars)
|
||||
let large_result = format!("Tool result: {}", "x".repeat(1500));
|
||||
context.add_message(Message::new(MessageRole::User, large_result));
|
||||
|
||||
|
||||
// Add more messages
|
||||
for i in 0..6 {
|
||||
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
format!("Response {}", i),
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
// Trigger thinning at 50%
|
||||
context.used_tokens = 5000;
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
|
||||
println!("Thinning summary: {}", summary);
|
||||
|
||||
|
||||
// Should have thinned the non-TODO result
|
||||
assert!(
|
||||
summary.contains("1 tool result") || summary.contains("chars saved"),
|
||||
"Non-TODO results should be thinned"
|
||||
);
|
||||
|
||||
|
||||
// Check that the result was actually thinned
|
||||
let first_third_end = context.conversation_history.len() / 3;
|
||||
for i in 0..first_third_end {
|
||||
@@ -143,26 +164,29 @@ fn test_non_todo_results_still_thinned() {
|
||||
#[serial]
|
||||
fn test_todo_read_with_spaces_in_tool_name() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
|
||||
// Add a todo_read tool call with spaces (JSON formatting variation)
|
||||
context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "todo_read", "args": {}}"#.to_string()));
|
||||
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
r#"{"tool": "todo_read", "args": {}}"#.to_string(),
|
||||
));
|
||||
|
||||
// Add a large TODO result
|
||||
let large_todo_result = format!(
|
||||
"Tool result: 📝 TODO list:\n{}",
|
||||
"- [ ] Task\n".repeat(50)
|
||||
);
|
||||
let large_todo_result = format!("Tool result: 📝 TODO list:\n{}", "- [ ] Task\n".repeat(50));
|
||||
context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
|
||||
|
||||
|
||||
// Add more messages
|
||||
for i in 0..6 {
|
||||
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
format!("Response {}", i),
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
// Trigger thinning
|
||||
context.used_tokens = 5000;
|
||||
let (_summary, _chars_saved) = context.thin_context();
|
||||
|
||||
|
||||
// Verify TODO result was not thinned
|
||||
let first_third_end = context.conversation_history.len() / 3;
|
||||
for i in 0..first_third_end {
|
||||
|
||||
@@ -1,20 +1,19 @@
|
||||
use g3_core::Agent;
|
||||
use g3_core::ui_writer::NullUiWriter;
|
||||
use g3_core::Agent;
|
||||
use serial_test::serial;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use tempfile::TempDir;
|
||||
|
||||
|
||||
/// Helper to create a test agent in a temporary directory
|
||||
async fn create_test_agent_in_dir(temp_dir: &TempDir) -> Agent<NullUiWriter> {
|
||||
// Change to temp directory
|
||||
std::env::set_current_dir(temp_dir.path()).unwrap();
|
||||
|
||||
|
||||
// Create a minimal config
|
||||
let config = g3_config::Config::default();
|
||||
let ui_writer = NullUiWriter;
|
||||
|
||||
|
||||
Agent::new(config, ui_writer).await.unwrap()
|
||||
}
|
||||
|
||||
@@ -29,10 +28,10 @@ async fn test_todo_write_creates_file() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||
let todo_path = get_todo_path(&temp_dir);
|
||||
|
||||
|
||||
// Initially, todo.g3.md should not exist
|
||||
assert!(!todo_path.exists(), "todo.g3.md should not exist initially");
|
||||
|
||||
|
||||
// Create a tool call to write TODO
|
||||
let tool_call = g3_core::ToolCall {
|
||||
tool: "todo_write".to_string(),
|
||||
@@ -40,17 +39,21 @@ async fn test_todo_write_creates_file() {
|
||||
"content": "- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3"
|
||||
}),
|
||||
};
|
||||
|
||||
|
||||
// Execute the tool
|
||||
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||
|
||||
|
||||
// Should report success
|
||||
assert!(result.contains("✅"), "Should report success: {}", result);
|
||||
assert!(result.contains("todo.g3.md"), "Should mention todo.g3.md: {}", result);
|
||||
|
||||
assert!(
|
||||
result.contains("todo.g3.md"),
|
||||
"Should mention todo.g3.md: {}",
|
||||
result
|
||||
);
|
||||
|
||||
// File should now exist
|
||||
assert!(todo_path.exists(), "todo.g3.md should exist after write");
|
||||
|
||||
|
||||
// File should contain the correct content
|
||||
let content = fs::read_to_string(&todo_path).unwrap();
|
||||
assert_eq!(content, "- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3");
|
||||
@@ -61,27 +64,39 @@ async fn test_todo_write_creates_file() {
|
||||
async fn test_todo_read_from_file() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let todo_path = get_todo_path(&temp_dir);
|
||||
|
||||
|
||||
// Pre-create a todo.g3.md file
|
||||
let test_content = "# My TODO\n\n- [ ] First task\n- [x] Completed task";
|
||||
fs::write(&todo_path, test_content).unwrap();
|
||||
|
||||
|
||||
// Create agent (should load from file)
|
||||
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||
|
||||
|
||||
// Create a tool call to read TODO
|
||||
let tool_call = g3_core::ToolCall {
|
||||
tool: "todo_read".to_string(),
|
||||
args: serde_json::json!({}),
|
||||
};
|
||||
|
||||
|
||||
// Execute the tool
|
||||
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||
|
||||
|
||||
// Should contain the TODO content
|
||||
assert!(result.contains("📝 TODO list:"), "Should have TODO list header: {}", result);
|
||||
assert!(result.contains("First task"), "Should contain first task: {}", result);
|
||||
assert!(result.contains("Completed task"), "Should contain completed task: {}", result);
|
||||
assert!(
|
||||
result.contains("📝 TODO list:"),
|
||||
"Should have TODO list header: {}",
|
||||
result
|
||||
);
|
||||
assert!(
|
||||
result.contains("First task"),
|
||||
"Should contain first task: {}",
|
||||
result
|
||||
);
|
||||
assert!(
|
||||
result.contains("Completed task"),
|
||||
"Should contain completed task: {}",
|
||||
result
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -89,16 +104,16 @@ async fn test_todo_read_from_file() {
|
||||
async fn test_todo_read_empty_file() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||
|
||||
|
||||
// Create a tool call to read TODO (file doesn't exist)
|
||||
let tool_call = g3_core::ToolCall {
|
||||
tool: "todo_read".to_string(),
|
||||
args: serde_json::json!({}),
|
||||
};
|
||||
|
||||
|
||||
// Execute the tool
|
||||
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||
|
||||
|
||||
// Should report empty
|
||||
assert!(result.contains("empty"), "Should report empty: {}", result);
|
||||
}
|
||||
@@ -108,7 +123,7 @@ async fn test_todo_read_empty_file() {
|
||||
async fn test_todo_persistence_across_agents() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let todo_path = get_todo_path(&temp_dir);
|
||||
|
||||
|
||||
// Agent 1: Write TODO
|
||||
{
|
||||
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||
@@ -120,10 +135,13 @@ async fn test_todo_persistence_across_agents() {
|
||||
};
|
||||
agent.execute_tool(&tool_call).await.unwrap();
|
||||
}
|
||||
|
||||
|
||||
// Verify file exists
|
||||
assert!(todo_path.exists(), "todo.g3.md should persist after agent drops");
|
||||
|
||||
assert!(
|
||||
todo_path.exists(),
|
||||
"todo.g3.md should persist after agent drops"
|
||||
);
|
||||
|
||||
// Agent 2: Read TODO (new agent instance)
|
||||
{
|
||||
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||
@@ -132,10 +150,18 @@ async fn test_todo_persistence_across_agents() {
|
||||
args: serde_json::json!({}),
|
||||
};
|
||||
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||
|
||||
|
||||
// Should read the persisted content
|
||||
assert!(result.contains("Persistent task"), "Should read persisted task: {}", result);
|
||||
assert!(result.contains("Done task"), "Should read done task: {}", result);
|
||||
assert!(
|
||||
result.contains("Persistent task"),
|
||||
"Should read persisted task: {}",
|
||||
result
|
||||
);
|
||||
assert!(
|
||||
result.contains("Done task"),
|
||||
"Should read done task: {}",
|
||||
result
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -145,7 +171,7 @@ async fn test_todo_update_preserves_file() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||
let todo_path = get_todo_path(&temp_dir);
|
||||
|
||||
|
||||
// Write initial TODO
|
||||
let write_call = g3_core::ToolCall {
|
||||
tool: "todo_write".to_string(),
|
||||
@@ -154,7 +180,7 @@ async fn test_todo_update_preserves_file() {
|
||||
}),
|
||||
};
|
||||
agent.execute_tool(&write_call).await.unwrap();
|
||||
|
||||
|
||||
// Update TODO
|
||||
let update_call = g3_core::ToolCall {
|
||||
tool: "todo_write".to_string(),
|
||||
@@ -163,7 +189,7 @@ async fn test_todo_update_preserves_file() {
|
||||
}),
|
||||
};
|
||||
agent.execute_tool(&update_call).await.unwrap();
|
||||
|
||||
|
||||
// Verify file has updated content
|
||||
let content = fs::read_to_string(&todo_path).unwrap();
|
||||
assert_eq!(content, "- [x] Task 1\n- [ ] Task 2\n- [ ] Task 3");
|
||||
@@ -175,23 +201,30 @@ async fn test_todo_handles_large_content() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||
let todo_path = get_todo_path(&temp_dir);
|
||||
|
||||
|
||||
// Create a large TODO (but under the 50k limit)
|
||||
let mut large_content = String::from("# Large TODO\n\n");
|
||||
for i in 0..100 {
|
||||
large_content.push_str(&format!("- [ ] Task {} with a long description that exceeds normal line lengths\n", i));
|
||||
large_content.push_str(&format!(
|
||||
"- [ ] Task {} with a long description that exceeds normal line lengths\n",
|
||||
i
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
let tool_call = g3_core::ToolCall {
|
||||
tool: "todo_write".to_string(),
|
||||
args: serde_json::json!({
|
||||
"content": large_content
|
||||
}),
|
||||
};
|
||||
|
||||
|
||||
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||
assert!(result.contains("✅"), "Should handle large content: {}", result);
|
||||
|
||||
assert!(
|
||||
result.contains("✅"),
|
||||
"Should handle large content: {}",
|
||||
result
|
||||
);
|
||||
|
||||
// Verify file contains all content
|
||||
let file_content = fs::read_to_string(&todo_path).unwrap();
|
||||
assert_eq!(file_content, large_content);
|
||||
@@ -203,22 +236,30 @@ async fn test_todo_handles_large_content() {
|
||||
async fn test_todo_respects_size_limit() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||
|
||||
|
||||
// Create content that exceeds the default 50k limit
|
||||
let huge_content = "x".repeat(60_000);
|
||||
|
||||
|
||||
let tool_call = g3_core::ToolCall {
|
||||
tool: "todo_write".to_string(),
|
||||
args: serde_json::json!({
|
||||
"content": huge_content
|
||||
}),
|
||||
};
|
||||
|
||||
|
||||
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||
|
||||
|
||||
// Should reject content that's too large
|
||||
assert!(result.contains("❌"), "Should reject oversized content: {}", result);
|
||||
assert!(result.contains("too large"), "Should mention size limit: {}", result);
|
||||
assert!(
|
||||
result.contains("❌"),
|
||||
"Should reject oversized content: {}",
|
||||
result
|
||||
);
|
||||
assert!(
|
||||
result.contains("too large"),
|
||||
"Should mention size limit: {}",
|
||||
result
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -226,22 +267,26 @@ async fn test_todo_respects_size_limit() {
|
||||
async fn test_todo_agent_initialization_loads_file() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let todo_path = get_todo_path(&temp_dir);
|
||||
|
||||
|
||||
// Pre-create todo.g3.md before agent initialization
|
||||
let initial_content = "- [ ] Pre-existing task";
|
||||
fs::write(&todo_path, initial_content).unwrap();
|
||||
|
||||
|
||||
// Create agent - should load the file during initialization
|
||||
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||
|
||||
|
||||
// Read TODO - should return the pre-existing content
|
||||
let tool_call = g3_core::ToolCall {
|
||||
tool: "todo_read".to_string(),
|
||||
args: serde_json::json!({}),
|
||||
};
|
||||
|
||||
|
||||
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||
assert!(result.contains("Pre-existing task"), "Should load file on init: {}", result);
|
||||
assert!(
|
||||
result.contains("Pre-existing task"),
|
||||
"Should load file on init: {}",
|
||||
result
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -250,33 +295,41 @@ async fn test_todo_handles_unicode_content() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||
let todo_path = get_todo_path(&temp_dir);
|
||||
|
||||
|
||||
// Create TODO with unicode characters
|
||||
let unicode_content = "- [ ] 日本語タスク\n- [ ] Émoji task 🚀\n- [x] Ελληνικά task";
|
||||
|
||||
|
||||
let tool_call = g3_core::ToolCall {
|
||||
tool: "todo_write".to_string(),
|
||||
args: serde_json::json!({
|
||||
"content": unicode_content
|
||||
}),
|
||||
};
|
||||
|
||||
|
||||
agent.execute_tool(&tool_call).await.unwrap();
|
||||
|
||||
|
||||
// Verify file preserves unicode
|
||||
let file_content = fs::read_to_string(&todo_path).unwrap();
|
||||
assert_eq!(file_content, unicode_content);
|
||||
|
||||
|
||||
// Verify reading back works
|
||||
let read_call = g3_core::ToolCall {
|
||||
tool: "todo_read".to_string(),
|
||||
args: serde_json::json!({}),
|
||||
};
|
||||
|
||||
|
||||
let result = agent.execute_tool(&read_call).await.unwrap();
|
||||
assert!(result.contains("日本語"), "Should preserve Japanese: {}", result);
|
||||
assert!(
|
||||
result.contains("日本語"),
|
||||
"Should preserve Japanese: {}",
|
||||
result
|
||||
);
|
||||
assert!(result.contains("🚀"), "Should preserve emoji: {}", result);
|
||||
assert!(result.contains("Ελληνικά"), "Should preserve Greek: {}", result);
|
||||
assert!(
|
||||
result.contains("Ελληνικά"),
|
||||
"Should preserve Greek: {}",
|
||||
result
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -285,7 +338,7 @@ async fn test_todo_empty_content_creates_empty_file() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||
let todo_path = get_todo_path(&temp_dir);
|
||||
|
||||
|
||||
// Write empty TODO
|
||||
let tool_call = g3_core::ToolCall {
|
||||
tool: "todo_write".to_string(),
|
||||
@@ -293,9 +346,9 @@ async fn test_todo_empty_content_creates_empty_file() {
|
||||
"content": ""
|
||||
}),
|
||||
};
|
||||
|
||||
|
||||
agent.execute_tool(&tool_call).await.unwrap();
|
||||
|
||||
|
||||
// File should exist but be empty
|
||||
assert!(todo_path.exists(), "Empty todo.g3.md should create file");
|
||||
let content = fs::read_to_string(&todo_path).unwrap();
|
||||
@@ -307,7 +360,7 @@ async fn test_todo_empty_content_creates_empty_file() {
|
||||
async fn test_todo_whitespace_only_content() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let mut agent = create_test_agent_in_dir(&temp_dir).await;
|
||||
|
||||
|
||||
// Write whitespace-only TODO
|
||||
let tool_call = g3_core::ToolCall {
|
||||
tool: "todo_write".to_string(),
|
||||
@@ -315,17 +368,21 @@ async fn test_todo_whitespace_only_content() {
|
||||
"content": " \n\n \t \n"
|
||||
}),
|
||||
};
|
||||
|
||||
|
||||
agent.execute_tool(&tool_call).await.unwrap();
|
||||
|
||||
|
||||
// Read it back
|
||||
let read_call = g3_core::ToolCall {
|
||||
tool: "todo_read".to_string(),
|
||||
args: serde_json::json!({}),
|
||||
};
|
||||
|
||||
|
||||
let result = agent.execute_tool(&read_call).await.unwrap();
|
||||
|
||||
|
||||
// Should report as empty (whitespace is trimmed)
|
||||
assert!(result.contains("empty"), "Whitespace-only should be empty: {}", result);
|
||||
assert!(
|
||||
result.contains("empty"),
|
||||
"Whitespace-only should be empty: {}",
|
||||
result
|
||||
);
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ use g3_providers::Usage;
|
||||
#[test]
|
||||
fn test_token_accumulation() {
|
||||
let mut window = ContextWindow::new(10000);
|
||||
|
||||
|
||||
// First API call: 100 prompt + 50 completion = 150 total
|
||||
let usage1 = Usage {
|
||||
prompt_tokens: 100,
|
||||
@@ -22,7 +22,10 @@ fn test_token_accumulation() {
|
||||
total_tokens: 275,
|
||||
};
|
||||
window.update_usage_from_response(&usage2);
|
||||
assert_eq!(window.used_tokens, 425, "Second call should accumulate to 425 tokens");
|
||||
assert_eq!(
|
||||
window.used_tokens, 425,
|
||||
"Second call should accumulate to 425 tokens"
|
||||
);
|
||||
assert_eq!(window.cumulative_tokens, 425, "Cumulative should be 425");
|
||||
|
||||
// Third API call with SMALLER token count: 50 prompt + 25 completion = 75 total
|
||||
@@ -32,27 +35,33 @@ fn test_token_accumulation() {
|
||||
total_tokens: 75,
|
||||
};
|
||||
window.update_usage_from_response(&usage3);
|
||||
assert_eq!(window.used_tokens, 500, "Third call should accumulate to 500 tokens");
|
||||
assert_eq!(
|
||||
window.used_tokens, 500,
|
||||
"Third call should accumulate to 500 tokens"
|
||||
);
|
||||
assert_eq!(window.cumulative_tokens, 500, "Cumulative should be 500");
|
||||
|
||||
|
||||
// Verify tokens never decrease
|
||||
assert!(window.used_tokens >= 425, "Token count should never decrease!");
|
||||
assert!(
|
||||
window.used_tokens >= 425,
|
||||
"Token count should never decrease!"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_add_streaming_tokens() {
|
||||
let mut window = ContextWindow::new(10000);
|
||||
|
||||
|
||||
// Add some streaming tokens
|
||||
window.add_streaming_tokens(100);
|
||||
assert_eq!(window.used_tokens, 100);
|
||||
assert_eq!(window.cumulative_tokens, 100);
|
||||
|
||||
|
||||
// Add more
|
||||
window.add_streaming_tokens(50);
|
||||
assert_eq!(window.used_tokens, 150);
|
||||
assert_eq!(window.cumulative_tokens, 150);
|
||||
|
||||
|
||||
// Now update from provider response
|
||||
let usage = Usage {
|
||||
prompt_tokens: 80,
|
||||
@@ -60,7 +69,7 @@ fn test_add_streaming_tokens() {
|
||||
total_tokens: 120,
|
||||
};
|
||||
window.update_usage_from_response(&usage);
|
||||
|
||||
|
||||
// Should ADD to existing, not replace
|
||||
assert_eq!(window.used_tokens, 270, "Should add 120 to existing 150");
|
||||
assert_eq!(window.cumulative_tokens, 270);
|
||||
@@ -69,7 +78,7 @@ fn test_add_streaming_tokens() {
|
||||
#[test]
|
||||
fn test_percentage_calculation() {
|
||||
let mut window = ContextWindow::new(1000);
|
||||
|
||||
|
||||
// Add tokens via provider response
|
||||
let usage = Usage {
|
||||
prompt_tokens: 150,
|
||||
@@ -77,10 +86,10 @@ fn test_percentage_calculation() {
|
||||
total_tokens: 250,
|
||||
};
|
||||
window.update_usage_from_response(&usage);
|
||||
|
||||
|
||||
assert_eq!(window.percentage_used(), 25.0);
|
||||
assert_eq!(window.remaining_tokens(), 750);
|
||||
|
||||
|
||||
// Add more tokens
|
||||
let usage2 = Usage {
|
||||
prompt_tokens: 300,
|
||||
@@ -88,7 +97,7 @@ fn test_percentage_calculation() {
|
||||
total_tokens: 500,
|
||||
};
|
||||
window.update_usage_from_response(&usage2);
|
||||
|
||||
|
||||
assert_eq!(window.percentage_used(), 75.0);
|
||||
assert_eq!(window.remaining_tokens(), 250);
|
||||
}
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use g3_core::{Agent, ToolCall};
|
||||
use g3_core::ui_writer::UiWriter;
|
||||
use g3_config::Config;
|
||||
use g3_core::ui_writer::UiWriter;
|
||||
use g3_core::{Agent, ToolCall};
|
||||
use serial_test::serial;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tempfile::TempDir;
|
||||
use serial_test::serial;
|
||||
|
||||
// Mock UI Writer for testing
|
||||
#[derive(Clone)]
|
||||
@@ -47,7 +47,10 @@ impl UiWriter for MockUiWriter {
|
||||
}
|
||||
fn print_system_prompt(&self, _prompt: &str) {}
|
||||
fn print_context_status(&self, message: &str) {
|
||||
self.output.lock().unwrap().push(format!("STATUS: {}", message));
|
||||
self.output
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(format!("STATUS: {}", message));
|
||||
}
|
||||
fn print_context_thinning(&self, _message: &str) {}
|
||||
fn print_tool_header(&self, _tool_name: &str) {}
|
||||
@@ -61,13 +64,21 @@ impl UiWriter for MockUiWriter {
|
||||
fn print_agent_response(&self, _content: &str) {}
|
||||
fn notify_sse_received(&self) {}
|
||||
fn flush(&self) {}
|
||||
fn wants_full_output(&self) -> bool { false }
|
||||
fn wants_full_output(&self) -> bool {
|
||||
false
|
||||
}
|
||||
fn prompt_user_yes_no(&self, message: &str) -> bool {
|
||||
self.output.lock().unwrap().push(format!("PROMPT: {}", message));
|
||||
self.output
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(format!("PROMPT: {}", message));
|
||||
self.prompt_responses.lock().unwrap().pop().unwrap_or(true)
|
||||
}
|
||||
fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
|
||||
self.output.lock().unwrap().push(format!("CHOICE: {} Options: {:?}", message, options));
|
||||
self.output
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(format!("CHOICE: {} Options: {:?}", message, options));
|
||||
self.choice_responses.lock().unwrap().pop().unwrap_or(0)
|
||||
}
|
||||
}
|
||||
@@ -80,7 +91,10 @@ async fn test_todo_staleness_check_matching_sha() {
|
||||
std::env::set_current_dir(&temp_dir).unwrap();
|
||||
|
||||
let sha = "abc123hash";
|
||||
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha);
|
||||
let content = format!(
|
||||
"{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
|
||||
sha
|
||||
);
|
||||
std::fs::write(&todo_path, content).unwrap();
|
||||
|
||||
let mut config = Config::default();
|
||||
@@ -109,7 +123,10 @@ async fn test_todo_staleness_check_mismatch_sha_ignore() {
|
||||
|
||||
let sha_file = "old_sha";
|
||||
let sha_req = "new_sha";
|
||||
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
|
||||
let content = format!(
|
||||
"{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
|
||||
sha_file
|
||||
);
|
||||
std::fs::write(&todo_path, content).unwrap();
|
||||
|
||||
let mut config = Config::default();
|
||||
@@ -139,7 +156,10 @@ async fn test_todo_staleness_check_mismatch_sha_mark_stale() {
|
||||
|
||||
let sha_file = "old_sha";
|
||||
let sha_req = "new_sha";
|
||||
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
|
||||
let content = format!(
|
||||
"{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
|
||||
sha_file
|
||||
);
|
||||
std::fs::write(&todo_path, content).unwrap();
|
||||
|
||||
let mut config = Config::default();
|
||||
@@ -173,7 +193,10 @@ async fn test_todo_staleness_check_disabled() {
|
||||
|
||||
let sha_file = "old_sha";
|
||||
let sha_req = "new_sha";
|
||||
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
|
||||
let content = format!(
|
||||
"{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
|
||||
sha_file
|
||||
);
|
||||
std::fs::write(&todo_path, content).unwrap();
|
||||
|
||||
let mut config = Config::default();
|
||||
|
||||
Reference in New Issue
Block a user