add context window monitor
Writes the current context window to logs/current_context_window (uses a symlink to a session ID). This PR was unfortunately generated by a different LLM and did a ton of superficial reformating, it's actually a fairly small and benign change, but I don't want to roll back everything. Hope that's ok.
This commit is contained in:
@@ -106,15 +106,15 @@ impl ErrorContext {
|
||||
error!("Session ID: {:?}", self.session_id);
|
||||
error!("Context Tokens: {}", self.context_tokens);
|
||||
error!("Last Prompt: {}", self.last_prompt);
|
||||
|
||||
|
||||
if let Some(ref req) = self.raw_request {
|
||||
error!("Raw Request: {}", req);
|
||||
}
|
||||
|
||||
|
||||
if let Some(ref resp) = self.raw_response {
|
||||
error!("Raw Response: {}", resp);
|
||||
}
|
||||
|
||||
|
||||
error!("Stack Trace:\n{}", self.stack_trace);
|
||||
error!("=== END ERROR DETAILS ===");
|
||||
|
||||
@@ -191,23 +191,36 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType {
|
||||
let error_str = error.to_string().to_lowercase();
|
||||
|
||||
// Check for recoverable error patterns
|
||||
if error_str.contains("rate limit") || error_str.contains("rate_limit") || error_str.contains("429") {
|
||||
if error_str.contains("rate limit")
|
||||
|| error_str.contains("rate_limit")
|
||||
|| error_str.contains("429")
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::RateLimit);
|
||||
}
|
||||
|
||||
if error_str.contains("network") || error_str.contains("connection") ||
|
||||
error_str.contains("dns") || error_str.contains("refused") {
|
||||
if error_str.contains("network")
|
||||
|| error_str.contains("connection")
|
||||
|| error_str.contains("dns")
|
||||
|| error_str.contains("refused")
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::NetworkError);
|
||||
}
|
||||
|
||||
if error_str.contains("500") || error_str.contains("502") ||
|
||||
error_str.contains("503") || error_str.contains("504") ||
|
||||
error_str.contains("server error") || error_str.contains("internal error") {
|
||||
if error_str.contains("500")
|
||||
|| error_str.contains("502")
|
||||
|| error_str.contains("503")
|
||||
|| error_str.contains("504")
|
||||
|| error_str.contains("server error")
|
||||
|| error_str.contains("internal error")
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::ServerError);
|
||||
}
|
||||
|
||||
if error_str.contains("busy") || error_str.contains("overloaded") ||
|
||||
error_str.contains("capacity") || error_str.contains("unavailable") {
|
||||
if error_str.contains("busy")
|
||||
|| error_str.contains("overloaded")
|
||||
|| error_str.contains("capacity")
|
||||
|| error_str.contains("unavailable")
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::ModelBusy);
|
||||
}
|
||||
|
||||
@@ -216,18 +229,24 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType {
|
||||
error_str.contains("timed out") ||
|
||||
error_str.contains("operation timed out") ||
|
||||
error_str.contains("request or response body error") || // Common timeout pattern
|
||||
error_str.contains("stream error") && error_str.contains("timed out") {
|
||||
error_str.contains("stream error") && error_str.contains("timed out")
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::Timeout);
|
||||
}
|
||||
|
||||
// Check for context length exceeded errors (HTTP 400 with specific messages)
|
||||
if (error_str.contains("400") || error_str.contains("bad request")) &&
|
||||
(error_str.contains("context length") || error_str.contains("prompt is too long") ||
|
||||
error_str.contains("maximum context length") || error_str.contains("context_length_exceeded")) {
|
||||
if (error_str.contains("400") || error_str.contains("bad request"))
|
||||
&& (error_str.contains("context length")
|
||||
|| error_str.contains("prompt is too long")
|
||||
|| error_str.contains("maximum context length")
|
||||
|| error_str.contains("context_length_exceeded"))
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::ContextLengthExceeded);
|
||||
}
|
||||
|
||||
if error_str.contains("token") && (error_str.contains("limit") || error_str.contains("exceeded")) {
|
||||
if error_str.contains("token")
|
||||
&& (error_str.contains("limit") || error_str.contains("exceeded"))
|
||||
{
|
||||
return ErrorType::Recoverable(RecoverableError::TokenLimit);
|
||||
}
|
||||
|
||||
@@ -239,12 +258,14 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType {
|
||||
fn calculate_autonomous_retry_delay(attempt: u32) -> Duration {
|
||||
use rand::Rng;
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
|
||||
// Distribute 6 retries over 10 minutes (600 seconds)
|
||||
// Base delays: 10s, 30s, 60s, 120s, 180s, 200s = 600s total
|
||||
let base_delays_ms = [10000, 30000, 60000, 120000, 180000, 200000];
|
||||
let base_delay = base_delays_ms.get(attempt.saturating_sub(1) as usize).unwrap_or(&200000);
|
||||
|
||||
let base_delay = base_delays_ms
|
||||
.get(attempt.saturating_sub(1) as usize)
|
||||
.unwrap_or(&200000);
|
||||
|
||||
// Add jitter of ±30% to prevent thundering herd
|
||||
let jitter = (*base_delay as f64 * 0.3 * rng.gen::<f64>()) as u64;
|
||||
let final_delay = if rng.gen_bool(0.5) {
|
||||
@@ -252,7 +273,7 @@ fn calculate_autonomous_retry_delay(attempt: u32) -> Duration {
|
||||
} else {
|
||||
base_delay.saturating_sub(jitter)
|
||||
};
|
||||
|
||||
|
||||
Duration::from_millis(final_delay)
|
||||
}
|
||||
|
||||
@@ -261,14 +282,18 @@ pub fn calculate_retry_delay(attempt: u32, is_autonomous: bool) -> Duration {
|
||||
if is_autonomous {
|
||||
return calculate_autonomous_retry_delay(attempt);
|
||||
}
|
||||
|
||||
|
||||
use rand::Rng;
|
||||
let max_retry_delay_ms = if is_autonomous { AUTONOMOUS_MAX_RETRY_DELAY_MS } else { DEFAULT_MAX_RETRY_DELAY_MS };
|
||||
|
||||
let max_retry_delay_ms = if is_autonomous {
|
||||
AUTONOMOUS_MAX_RETRY_DELAY_MS
|
||||
} else {
|
||||
DEFAULT_MAX_RETRY_DELAY_MS
|
||||
};
|
||||
|
||||
// Exponential backoff: delay = base * 2^attempt
|
||||
let base_delay = BASE_RETRY_DELAY_MS * (2_u64.pow(attempt.saturating_sub(1)));
|
||||
let capped_delay = base_delay.min(max_retry_delay_ms);
|
||||
|
||||
|
||||
// Add jitter to prevent thundering herd
|
||||
let mut rng = rand::thread_rng();
|
||||
let jitter = (capped_delay as f64 * JITTER_FACTOR * rng.gen::<f64>()) as u64;
|
||||
@@ -277,7 +302,7 @@ pub fn calculate_retry_delay(attempt: u32, is_autonomous: bool) -> Duration {
|
||||
} else {
|
||||
capped_delay.saturating_sub(jitter)
|
||||
};
|
||||
|
||||
|
||||
Duration::from_millis(final_delay)
|
||||
}
|
||||
|
||||
@@ -298,7 +323,7 @@ where
|
||||
|
||||
loop {
|
||||
attempt += 1;
|
||||
|
||||
|
||||
match operation().await {
|
||||
Ok(result) => {
|
||||
if attempt > 1 {
|
||||
@@ -321,19 +346,19 @@ where
|
||||
context.clone().log_error(&error);
|
||||
return Err(error);
|
||||
}
|
||||
|
||||
|
||||
let delay = calculate_retry_delay(attempt, is_autonomous);
|
||||
warn!(
|
||||
"Recoverable error ({:?}) in '{}' (attempt {}/{}). Retrying in {:?}...",
|
||||
recoverable_type, operation_name, attempt, max_attempts, delay
|
||||
);
|
||||
warn!("Error details: {}", error);
|
||||
|
||||
|
||||
// Special handling for token limit errors
|
||||
if matches!(recoverable_type, RecoverableError::TokenLimit) {
|
||||
info!("Token limit error detected. Consider triggering summarization.");
|
||||
}
|
||||
|
||||
|
||||
tokio::time::sleep(delay).await;
|
||||
_last_error = Some(error);
|
||||
}
|
||||
@@ -359,18 +384,22 @@ fn truncate_for_logging(s: &str, max_len: usize) -> String {
|
||||
// Find a safe UTF-8 boundary to truncate at
|
||||
// We need to ensure we don't cut in the middle of a multi-byte character
|
||||
let mut truncate_at = max_len;
|
||||
|
||||
|
||||
// Walk backwards from max_len to find a character boundary
|
||||
while truncate_at > 0 && !s.is_char_boundary(truncate_at) {
|
||||
truncate_at -= 1;
|
||||
}
|
||||
|
||||
|
||||
// If we couldn't find a boundary (shouldn't happen), use a safe default
|
||||
if truncate_at == 0 {
|
||||
truncate_at = max_len.min(s.len());
|
||||
}
|
||||
|
||||
format!("{}... (truncated, {} total bytes)", &s[..truncate_at], s.len())
|
||||
|
||||
format!(
|
||||
"{}... (truncated, {} total bytes)",
|
||||
&s[..truncate_at],
|
||||
s.len()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -398,42 +427,69 @@ mod tests {
|
||||
fn test_error_classification() {
|
||||
// Rate limit errors
|
||||
let error = anyhow!("Rate limit exceeded");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::RateLimit));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::RateLimit)
|
||||
);
|
||||
|
||||
let error = anyhow!("HTTP 429 Too Many Requests");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::RateLimit));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::RateLimit)
|
||||
);
|
||||
|
||||
// Network errors
|
||||
let error = anyhow!("Network connection failed");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::NetworkError));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::NetworkError)
|
||||
);
|
||||
|
||||
// Server errors
|
||||
let error = anyhow!("HTTP 503 Service Unavailable");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ServerError));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::ServerError)
|
||||
);
|
||||
|
||||
// Model busy
|
||||
let error = anyhow!("Model is busy, please try again");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ModelBusy));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::ModelBusy)
|
||||
);
|
||||
|
||||
// Timeout
|
||||
let error = anyhow!("Request timed out");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::Timeout));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::Timeout)
|
||||
);
|
||||
|
||||
// Token limit
|
||||
let error = anyhow!("Token limit exceeded");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::TokenLimit));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::TokenLimit)
|
||||
);
|
||||
|
||||
// Context length exceeded
|
||||
let error = anyhow!("HTTP 400 Bad Request: context length exceeded");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ContextLengthExceeded));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)
|
||||
);
|
||||
|
||||
let error = anyhow!("Error 400: prompt is too long");
|
||||
assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ContextLengthExceeded));
|
||||
|
||||
assert_eq!(
|
||||
classify_error(&error),
|
||||
ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)
|
||||
);
|
||||
|
||||
// Non-recoverable
|
||||
let error = anyhow!("Invalid API key");
|
||||
assert_eq!(classify_error(&error), ErrorType::NonRecoverable);
|
||||
|
||||
|
||||
let error = anyhow!("Malformed request");
|
||||
assert_eq!(classify_error(&error), ErrorType::NonRecoverable);
|
||||
}
|
||||
@@ -444,17 +500,17 @@ mod tests {
|
||||
let delay1 = calculate_retry_delay(1, false);
|
||||
let delay2 = calculate_retry_delay(2, false);
|
||||
let delay3 = calculate_retry_delay(3, false);
|
||||
|
||||
|
||||
// Due to jitter, we can't test exact values, but the base should increase
|
||||
assert!(delay1.as_millis() >= (BASE_RETRY_DELAY_MS as f64 * 0.7) as u128);
|
||||
assert!(delay1.as_millis() <= (BASE_RETRY_DELAY_MS as f64 * 1.3) as u128);
|
||||
|
||||
|
||||
// Delay 2 should be roughly 2x delay 1 (minus jitter)
|
||||
assert!(delay2.as_millis() >= delay1.as_millis());
|
||||
|
||||
|
||||
// Delay 3 should be roughly 2x delay 2 (minus jitter)
|
||||
assert!(delay3.as_millis() >= delay2.as_millis());
|
||||
|
||||
|
||||
// Test max cap
|
||||
let delay_max = calculate_retry_delay(10, false);
|
||||
assert!(delay_max.as_millis() <= (DEFAULT_MAX_RETRY_DELAY_MS as f64 * 1.3) as u128);
|
||||
@@ -469,7 +525,7 @@ mod tests {
|
||||
let delay4 = calculate_retry_delay(4, true);
|
||||
let delay5 = calculate_retry_delay(5, true);
|
||||
let delay6 = calculate_retry_delay(6, true);
|
||||
|
||||
|
||||
// Base delays should be around: 10s, 30s, 60s, 120s, 180s, 200s
|
||||
// With ±30% jitter
|
||||
assert!(delay1.as_millis() >= 7000 && delay1.as_millis() <= 13000);
|
||||
@@ -484,14 +540,14 @@ mod tests {
|
||||
fn test_truncate_for_logging() {
|
||||
let short_text = "Hello, world!";
|
||||
assert_eq!(truncate_for_logging(short_text, 20), "Hello, world!");
|
||||
|
||||
|
||||
let long_text = "This is a very long text that should be truncated for logging purposes";
|
||||
let truncated = truncate_for_logging(long_text, 20);
|
||||
assert!(truncated.starts_with("This is a very long "));
|
||||
assert!(truncated.contains("truncated"));
|
||||
assert!(truncated.contains("total bytes"));
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_truncate_with_multibyte_chars() {
|
||||
// Test with multi-byte UTF-8 characters
|
||||
@@ -499,7 +555,7 @@ mod tests {
|
||||
let truncated = truncate_for_logging(text_with_emoji, 10);
|
||||
// Should truncate at a valid UTF-8 boundary
|
||||
assert!(truncated.starts_with("Hello "));
|
||||
|
||||
|
||||
// Test with box-drawing characters like the one causing the panic
|
||||
let text_with_box = "Some text ┌─────┐ more text";
|
||||
let truncated = truncate_for_logging(text_with_box, 12);
|
||||
|
||||
Reference in New Issue
Block a user