add context window monitor

Writes the current context window to logs/current_context_window (uses a symlink to a session ID). This PR was unfortunately generated by a different LLM and did a ton of superficial reformating, it's actually a fairly small and benign change, but I don't want to roll back everything. Hope that's ok.
2025-11-27 21:00:02 +11:00
parent 93dc4acf86
commit 52f78653b4
89 changed files with 4040 additions and 2576 deletions
--- a/crates/g3-core/src/error_handling.rs
+++ b/crates/g3-core/src/error_handling.rs
@@ -106,15 +106,15 @@ impl ErrorContext {
        error!("Session ID: {:?}", self.session_id);
        error!("Context Tokens: {}", self.context_tokens);
        error!("Last Prompt: {}", self.last_prompt);
-        
+
        if let Some(ref req) = self.raw_request {
            error!("Raw Request: {}", req);
        }
-        
+
        if let Some(ref resp) = self.raw_response {
            error!("Raw Response: {}", resp);
        }
-        
+
        error!("Stack Trace:\n{}", self.stack_trace);
        error!("=== END ERROR DETAILS ===");

@@ -191,23 +191,36 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType {
    let error_str = error.to_string().to_lowercase();

    // Check for recoverable error patterns
-    if error_str.contains("rate limit") || error_str.contains("rate_limit") || error_str.contains("429") {
+    if error_str.contains("rate limit")
+        || error_str.contains("rate_limit")
+        || error_str.contains("429")
+    {
        return ErrorType::Recoverable(RecoverableError::RateLimit);
    }

-    if error_str.contains("network") || error_str.contains("connection") || 
-       error_str.contains("dns") || error_str.contains("refused") {
+    if error_str.contains("network")
+        || error_str.contains("connection")
+        || error_str.contains("dns")
+        || error_str.contains("refused")
+    {
        return ErrorType::Recoverable(RecoverableError::NetworkError);
    }

-    if error_str.contains("500") || error_str.contains("502") || 
-       error_str.contains("503") || error_str.contains("504") ||
-       error_str.contains("server error") || error_str.contains("internal error") {
+    if error_str.contains("500")
+        || error_str.contains("502")
+        || error_str.contains("503")
+        || error_str.contains("504")
+        || error_str.contains("server error")
+        || error_str.contains("internal error")
+    {
        return ErrorType::Recoverable(RecoverableError::ServerError);
    }

-    if error_str.contains("busy") || error_str.contains("overloaded") || 
-       error_str.contains("capacity") || error_str.contains("unavailable") {
+    if error_str.contains("busy")
+        || error_str.contains("overloaded")
+        || error_str.contains("capacity")
+        || error_str.contains("unavailable")
+    {
        return ErrorType::Recoverable(RecoverableError::ModelBusy);
    }

@@ -216,18 +229,24 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType {
       error_str.contains("timed out") || 
       error_str.contains("operation timed out") ||
       error_str.contains("request or response body error") ||  // Common timeout pattern
-       error_str.contains("stream error") && error_str.contains("timed out") {
+       error_str.contains("stream error") && error_str.contains("timed out")
+    {
        return ErrorType::Recoverable(RecoverableError::Timeout);
    }

    // Check for context length exceeded errors (HTTP 400 with specific messages)
-    if (error_str.contains("400") || error_str.contains("bad request")) &&
-       (error_str.contains("context length") || error_str.contains("prompt is too long") ||
-        error_str.contains("maximum context length") || error_str.contains("context_length_exceeded")) {
+    if (error_str.contains("400") || error_str.contains("bad request"))
+        && (error_str.contains("context length")
+            || error_str.contains("prompt is too long")
+            || error_str.contains("maximum context length")
+            || error_str.contains("context_length_exceeded"))
+    {
        return ErrorType::Recoverable(RecoverableError::ContextLengthExceeded);
    }

-    if error_str.contains("token") && (error_str.contains("limit") || error_str.contains("exceeded")) {
+    if error_str.contains("token")
+        && (error_str.contains("limit") || error_str.contains("exceeded"))
+    {
        return ErrorType::Recoverable(RecoverableError::TokenLimit);
    }

@@ -239,12 +258,14 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType {
 fn calculate_autonomous_retry_delay(attempt: u32) -> Duration {
    use rand::Rng;
    let mut rng = rand::thread_rng();
-    
+
    // Distribute 6 retries over 10 minutes (600 seconds)
    // Base delays: 10s, 30s, 60s, 120s, 180s, 200s = 600s total
    let base_delays_ms = [10000, 30000, 60000, 120000, 180000, 200000];
-    let base_delay = base_delays_ms.get(attempt.saturating_sub(1) as usize).unwrap_or(&200000);
-    
+    let base_delay = base_delays_ms
+        .get(attempt.saturating_sub(1) as usize)
+        .unwrap_or(&200000);
+
    // Add jitter of ±30% to prevent thundering herd
    let jitter = (*base_delay as f64 * 0.3 * rng.gen::<f64>()) as u64;
    let final_delay = if rng.gen_bool(0.5) {
@@ -252,7 +273,7 @@ fn calculate_autonomous_retry_delay(attempt: u32) -> Duration {
    } else {
        base_delay.saturating_sub(jitter)
    };
-    
+
    Duration::from_millis(final_delay)
 }

@@ -261,14 +282,18 @@ pub fn calculate_retry_delay(attempt: u32, is_autonomous: bool) -> Duration {
    if is_autonomous {
        return calculate_autonomous_retry_delay(attempt);
    }
-    
+
    use rand::Rng;
-    let max_retry_delay_ms = if is_autonomous { AUTONOMOUS_MAX_RETRY_DELAY_MS } else { DEFAULT_MAX_RETRY_DELAY_MS };
-    
+    let max_retry_delay_ms = if is_autonomous {
+        AUTONOMOUS_MAX_RETRY_DELAY_MS
+    } else {
+        DEFAULT_MAX_RETRY_DELAY_MS
+    };
+
    // Exponential backoff: delay = base * 2^attempt
    let base_delay = BASE_RETRY_DELAY_MS * (2_u64.pow(attempt.saturating_sub(1)));
    let capped_delay = base_delay.min(max_retry_delay_ms);
-    
+
    // Add jitter to prevent thundering herd
    let mut rng = rand::thread_rng();
    let jitter = (capped_delay as f64 * JITTER_FACTOR * rng.gen::<f64>()) as u64;
@@ -277,7 +302,7 @@ pub fn calculate_retry_delay(attempt: u32, is_autonomous: bool) -> Duration {
    } else {
        capped_delay.saturating_sub(jitter)
    };
-    
+
    Duration::from_millis(final_delay)
 }

@@ -298,7 +323,7 @@ where

    loop {
        attempt += 1;
-        
+
        match operation().await {
            Ok(result) => {
                if attempt > 1 {
@@ -321,19 +346,19 @@ where
                            context.clone().log_error(&error);
                            return Err(error);
                        }
-                        
+
                        let delay = calculate_retry_delay(attempt, is_autonomous);
                        warn!(
                            "Recoverable error ({:?}) in '{}' (attempt {}/{}). Retrying in {:?}...",
                            recoverable_type, operation_name, attempt, max_attempts, delay
                        );
                        warn!("Error details: {}", error);
-                        
+
                        // Special handling for token limit errors
                        if matches!(recoverable_type, RecoverableError::TokenLimit) {
                            info!("Token limit error detected. Consider triggering summarization.");
                        }
-                        
+
                        tokio::time::sleep(delay).await;
                        _last_error = Some(error);
                    }
@@ -359,18 +384,22 @@ fn truncate_for_logging(s: &str, max_len: usize) -> String {
        // Find a safe UTF-8 boundary to truncate at
        // We need to ensure we don't cut in the middle of a multi-byte character
        let mut truncate_at = max_len;
-        
+
        // Walk backwards from max_len to find a character boundary
        while truncate_at > 0 && !s.is_char_boundary(truncate_at) {
            truncate_at -= 1;
        }
-        
+
        // If we couldn't find a boundary (shouldn't happen), use a safe default
        if truncate_at == 0 {
            truncate_at = max_len.min(s.len());
        }
-        
-        format!("{}... (truncated, {} total bytes)", &s[..truncate_at], s.len())
+
+        format!(
+            "{}... (truncated, {} total bytes)",
+            &s[..truncate_at],
+            s.len()
+        )
    }
 }

@@ -398,42 +427,69 @@ mod tests {
    fn test_error_classification() {
        // Rate limit errors
        let error = anyhow!("Rate limit exceeded");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::RateLimit));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::RateLimit)
+        );
+
        let error = anyhow!("HTTP 429 Too Many Requests");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::RateLimit));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::RateLimit)
+        );
+
        // Network errors
        let error = anyhow!("Network connection failed");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::NetworkError));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::NetworkError)
+        );
+
        // Server errors
        let error = anyhow!("HTTP 503 Service Unavailable");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ServerError));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::ServerError)
+        );
+
        // Model busy
        let error = anyhow!("Model is busy, please try again");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ModelBusy));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::ModelBusy)
+        );
+
        // Timeout
        let error = anyhow!("Request timed out");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::Timeout));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::Timeout)
+        );
+
        // Token limit
        let error = anyhow!("Token limit exceeded");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::TokenLimit));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::TokenLimit)
+        );
+
        // Context length exceeded
        let error = anyhow!("HTTP 400 Bad Request: context length exceeded");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ContextLengthExceeded));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)
+        );
+
        let error = anyhow!("Error 400: prompt is too long");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ContextLengthExceeded));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)
+        );
+
        // Non-recoverable
        let error = anyhow!("Invalid API key");
        assert_eq!(classify_error(&error), ErrorType::NonRecoverable);
-        
+
        let error = anyhow!("Malformed request");
        assert_eq!(classify_error(&error), ErrorType::NonRecoverable);
    }
@@ -444,17 +500,17 @@ mod tests {
        let delay1 = calculate_retry_delay(1, false);
        let delay2 = calculate_retry_delay(2, false);
        let delay3 = calculate_retry_delay(3, false);
-        
+
        // Due to jitter, we can't test exact values, but the base should increase
        assert!(delay1.as_millis() >= (BASE_RETRY_DELAY_MS as f64 * 0.7) as u128);
        assert!(delay1.as_millis() <= (BASE_RETRY_DELAY_MS as f64 * 1.3) as u128);
-        
+
        // Delay 2 should be roughly 2x delay 1 (minus jitter)
        assert!(delay2.as_millis() >= delay1.as_millis());
-        
+
        // Delay 3 should be roughly 2x delay 2 (minus jitter)
        assert!(delay3.as_millis() >= delay2.as_millis());
-        
+
        // Test max cap
        let delay_max = calculate_retry_delay(10, false);
        assert!(delay_max.as_millis() <= (DEFAULT_MAX_RETRY_DELAY_MS as f64 * 1.3) as u128);
@@ -469,7 +525,7 @@ mod tests {
        let delay4 = calculate_retry_delay(4, true);
        let delay5 = calculate_retry_delay(5, true);
        let delay6 = calculate_retry_delay(6, true);
-        
+
        // Base delays should be around: 10s, 30s, 60s, 120s, 180s, 200s
        // With ±30% jitter
        assert!(delay1.as_millis() >= 7000 && delay1.as_millis() <= 13000);
@@ -484,14 +540,14 @@ mod tests {
    fn test_truncate_for_logging() {
        let short_text = "Hello, world!";
        assert_eq!(truncate_for_logging(short_text, 20), "Hello, world!");
-        
+
        let long_text = "This is a very long text that should be truncated for logging purposes";
        let truncated = truncate_for_logging(long_text, 20);
        assert!(truncated.starts_with("This is a very long "));
        assert!(truncated.contains("truncated"));
        assert!(truncated.contains("total bytes"));
    }
-    
+
    #[test]
    fn test_truncate_with_multibyte_chars() {
        // Test with multi-byte UTF-8 characters
@@ -499,7 +555,7 @@ mod tests {
        let truncated = truncate_for_logging(text_with_emoji, 10);
        // Should truncate at a valid UTF-8 boundary
        assert!(truncated.starts_with("Hello "));
-        
+
        // Test with box-drawing characters like the one causing the panic
        let text_with_box = "Some text ┌─────┐ more text";
        let truncated = truncate_for_logging(text_with_box, 12);