Add comprehensive end-of-turn behavior tests for g3-core

Agent: hopper Adds 56 new integration tests covering the observable end-of-turn behaviors in the streaming module: - Timing footer formatting (5 tests): verifies user-facing timing display with various durations, token counts, and context percentages - Tool call duplicate detection (6 tests): ensures identical sequential tool calls are detected while different tools/args are not - Empty response detection (9 tests): validates detection of empty, whitespace-only, and timing-only responses that trigger auto-continue - Connection error classification (5 tests): verifies EOF, connection, chunk, and body errors are correctly identified for graceful recovery - Tool output summary formatting (17 tests): covers read_file, write_file, str_replace, remember, screenshot, coverage, and rehydrate summaries - Duration formatting (4 tests): milliseconds, seconds, minutes, zero - Text truncation (4 tests): short/long strings, multiline, flag behavior - LLM token cleaning (3 tests): removal of stop tokens like <|im_end|> - Edge cases (4 tests): empty inputs, unicode handling, large numbers All tests are blackbox/characterization style - they test observable outputs through stable public interfaces without encoding internal implementation details. Tests remain stable under refactoring that preserves behavior.
2026-01-12 21:17:32 +05:30
parent d164c97ad2
commit 6f50d01ab6
1 changed files with 613 additions and 0 deletions
--- a/crates/g3-core/tests/end_of_turn_behavior_test.rs
+++ b/crates/g3-core/tests/end_of_turn_behavior_test.rs
@@ -0,0 +1,613 @@
+//! End-of-Turn Behavior Integration Tests
+//!
+//! CHARACTERIZATION: These tests verify the observable behavior of end-of-turn
+//! logic through stable public interfaces.
+//!
+//! What these tests protect:
+//! - Timing footer formatting (observable output)
+//! - Tool call duplicate detection (prevents stuttering)
+//! - Empty response detection (triggers auto-continue)
+//! - Connection error classification (enables graceful recovery)
+//! - Tool output summary formatting (user-facing display)
+//!
+//! What these tests intentionally do NOT assert:
+//! - Internal streaming state machine transitions
+//! - Specific iteration counts or loop behavior
+//! - Internal parser buffer management
+//! - Provider-specific response handling
+
+use g3_core::streaming::{
+    are_tool_calls_duplicate, format_timing_footer, is_connection_error, is_empty_response,
+    format_read_file_summary, format_write_file_result, format_str_replace_summary,
+    format_remember_summary, format_screenshot_summary, format_coverage_summary,
+    format_rehydrate_summary, format_duration, truncate_for_display, truncate_line,
+    clean_llm_tokens,
+};
+use g3_core::ToolCall;
+use std::time::Duration;
+
+// =============================================================================
+// Test: Timing Footer Formatting
+// =============================================================================
+
+mod timing_footer {
+    use super::*;
+
+    /// Test basic timing footer with all components
+    #[test]
+    fn test_format_timing_footer_complete() {
+        let elapsed = Duration::from_secs(5);
+        let ttft = Duration::from_millis(500);
+        let turn_tokens = Some(1500);
+        let context_pct = 45.5;
+
+        let footer = format_timing_footer(elapsed, ttft, turn_tokens, context_pct);
+
+        // Should contain timing info
+        assert!(footer.contains("5.0s"), "Should show elapsed time: {}", footer);
+        assert!(footer.contains("500ms"), "Should show TTFT: {}", footer);
+        // Should contain token info
+        assert!(footer.contains("1500") || footer.contains("1.5k"), "Should show tokens: {}", footer);
+        // Should contain context percentage
+        assert!(footer.contains("45") || footer.contains("46"), "Should show context %: {}", footer);
+    }
+
+    /// Test timing footer without token info
+    #[test]
+    fn test_format_timing_footer_no_tokens() {
+        let elapsed = Duration::from_secs(10);
+        let ttft = Duration::from_secs(1);
+        let turn_tokens = None;
+        let context_pct = 30.0;
+
+        let footer = format_timing_footer(elapsed, ttft, turn_tokens, context_pct);
+
+        // Should still have timing
+        assert!(footer.contains("10"), "Should show elapsed time: {}", footer);
+        // Should handle missing tokens gracefully
+        assert!(!footer.is_empty(), "Footer should not be empty");
+    }
+
+    /// Test timing footer with very short times (milliseconds)
+    #[test]
+    fn test_format_timing_footer_short_times() {
+        let elapsed = Duration::from_millis(250);
+        let ttft = Duration::from_millis(50);
+        let turn_tokens = Some(100);
+        let context_pct = 5.0;
+
+        let footer = format_timing_footer(elapsed, ttft, turn_tokens, context_pct);
+
+        // Should format milliseconds appropriately
+        assert!(footer.contains("ms") || footer.contains("0."), "Should handle ms times: {}", footer);
+    }
+
+    /// Test timing footer with long times (minutes)
+    #[test]
+    fn test_format_timing_footer_long_times() {
+        let elapsed = Duration::from_secs(125); // 2m 5s
+        let ttft = Duration::from_secs(3);
+        let turn_tokens = Some(50000);
+        let context_pct = 85.0;
+
+        let footer = format_timing_footer(elapsed, ttft, turn_tokens, context_pct);
+
+        // Should format minutes appropriately
+        assert!(footer.contains("m") || footer.contains("125"), "Should handle minute times: {}", footer);
+    }
+
+    /// Test timing footer at context capacity
+    #[test]
+    fn test_format_timing_footer_high_context() {
+        let elapsed = Duration::from_secs(30);
+        let ttft = Duration::from_secs(2);
+        let turn_tokens = Some(180000);
+        let context_pct = 95.0;
+
+        let footer = format_timing_footer(elapsed, ttft, turn_tokens, context_pct);
+
+        // Should show high context percentage
+        assert!(footer.contains("95") || footer.contains("9"), "Should show high context: {}", footer);
+    }
+}
+
+// =============================================================================
+// Test: Tool Call Duplicate Detection
+// =============================================================================
+
+mod duplicate_detection {
+    use super::*;
+
+    fn make_tool_call(tool: &str, args: serde_json::Value) -> ToolCall {
+        ToolCall {
+            tool: tool.to_string(),
+            args,
+        }
+    }
+
+    /// Test identical tool calls are detected as duplicates
+    #[test]
+    fn test_identical_calls_are_duplicates() {
+        let tc1 = make_tool_call("read_file", serde_json::json!({"file_path": "test.txt"}));
+        let tc2 = make_tool_call("read_file", serde_json::json!({"file_path": "test.txt"}));
+
+        assert!(are_tool_calls_duplicate(&tc1, &tc2));
+    }
+
+    /// Test different tools are not duplicates
+    #[test]
+    fn test_different_tools_not_duplicates() {
+        let tc1 = make_tool_call("read_file", serde_json::json!({"file_path": "test.txt"}));
+        let tc2 = make_tool_call("write_file", serde_json::json!({"file_path": "test.txt"}));
+
+        assert!(!are_tool_calls_duplicate(&tc1, &tc2));
+    }
+
+    /// Test same tool with different args are not duplicates
+    #[test]
+    fn test_same_tool_different_args_not_duplicates() {
+        let tc1 = make_tool_call("read_file", serde_json::json!({"file_path": "a.txt"}));
+        let tc2 = make_tool_call("read_file", serde_json::json!({"file_path": "b.txt"}));
+
+        assert!(!are_tool_calls_duplicate(&tc1, &tc2));
+    }
+
+    /// Test empty args are handled correctly
+    #[test]
+    fn test_empty_args_duplicates() {
+        let tc1 = make_tool_call("todo_read", serde_json::json!({}));
+        let tc2 = make_tool_call("todo_read", serde_json::json!({}));
+
+        assert!(are_tool_calls_duplicate(&tc1, &tc2));
+    }
+
+    /// Test complex nested args
+    #[test]
+    fn test_complex_args_duplicates() {
+        let args = serde_json::json!({
+            "searches": [
+                {"name": "test", "query": "(function_item)", "language": "rust"}
+            ]
+        });
+        let tc1 = make_tool_call("code_search", args.clone());
+        let tc2 = make_tool_call("code_search", args);
+
+        assert!(are_tool_calls_duplicate(&tc1, &tc2));
+    }
+
+    /// Test complex args with different values
+    #[test]
+    fn test_complex_args_different_not_duplicates() {
+        let args1 = serde_json::json!({
+            "searches": [{"name": "test1", "query": "(function_item)"}]
+        });
+        let args2 = serde_json::json!({
+            "searches": [{"name": "test2", "query": "(function_item)"}]
+        });
+        let tc1 = make_tool_call("code_search", args1);
+        let tc2 = make_tool_call("code_search", args2);
+
+        assert!(!are_tool_calls_duplicate(&tc1, &tc2));
+    }
+}
+
+// =============================================================================
+// Test: Empty Response Detection
+// =============================================================================
+
+mod empty_response {
+    use super::*;
+
+    /// Test truly empty responses
+    #[test]
+    fn test_empty_string() {
+        assert!(is_empty_response(""));
+    }
+
+    /// Test whitespace-only responses
+    #[test]
+    fn test_whitespace_only() {
+        assert!(is_empty_response("   "));
+        assert!(is_empty_response("\n\n\n"));
+        assert!(is_empty_response("  \n  \t  \n  "));
+    }
+
+    /// Test timing-only responses (should be considered empty)
+    #[test]
+    fn test_timing_only() {
+        assert!(is_empty_response("⏱️ 43.0s | 💭 3.6s"));
+        assert!(is_empty_response("  ⏱️ 43.0s | 💭 3.6s  "));
+        assert!(is_empty_response("\n⏱️ 43.0s | 💭 3.6s\n"));
+    }
+
+    /// Test mixed timing and whitespace
+    #[test]
+    fn test_timing_with_whitespace() {
+        assert!(is_empty_response("\n\n⏱️ 10.0s | 💭 1.0s\n\n"));
+        assert!(is_empty_response("⏱️ 1s\n\n⏱️ 2s"));
+    }
+
+    /// Test substantive content is NOT empty
+    #[test]
+    fn test_substantive_content_not_empty() {
+        assert!(!is_empty_response("Hello"));
+        assert!(!is_empty_response("I will help you."));
+        assert!(!is_empty_response("Done!"));
+        assert!(!is_empty_response("."));
+    }
+
+    /// Test timing with substantive content is NOT empty
+    #[test]
+    fn test_timing_with_content_not_empty() {
+        assert!(!is_empty_response("⏱️ 43.0s\nHere is the result."));
+        assert!(!is_empty_response("Done!\n⏱️ 43.0s"));
+    }
+
+    /// Test JSON tool calls are NOT empty
+    #[test]
+    fn test_json_not_empty() {
+        assert!(!is_empty_response(r#"{"tool": "read_file"}"#));
+        assert!(!is_empty_response(r#"{"tool": "test", "args": {}}"#));
+    }
+
+    /// Test code blocks are NOT empty
+    #[test]
+    fn test_code_blocks_not_empty() {
+        assert!(!is_empty_response("```rust\nfn main() {}\n```"));
+    }
+
+    /// Test markdown is NOT empty
+    #[test]
+    fn test_markdown_not_empty() {
+        assert!(!is_empty_response("# Summary"));
+        assert!(!is_empty_response("- Item 1"));
+    }
+}
+
+// =============================================================================
+// Test: Connection Error Detection
+// =============================================================================
+
+mod connection_errors {
+    use super::*;
+
+    /// Test EOF errors are detected
+    #[test]
+    fn test_eof_errors() {
+        assert!(is_connection_error("unexpected EOF during read"));
+        assert!(is_connection_error("unexpected EOF"));
+    }
+
+    /// Test connection errors are detected
+    #[test]
+    fn test_connection_errors() {
+        assert!(is_connection_error("connection reset"));
+        assert!(is_connection_error("connection refused"));
+        assert!(is_connection_error("connection timed out"));
+    }
+
+    /// Test chunk errors are detected
+    #[test]
+    fn test_chunk_errors() {
+        assert!(is_connection_error("chunk size line"));
+        assert!(is_connection_error("invalid chunk size line"));
+    }
+
+    /// Test body errors are detected
+    #[test]
+    fn test_body_errors() {
+        assert!(is_connection_error("body error"));
+        assert!(is_connection_error("body error occurred"));
+    }
+
+    /// Test non-connection errors are NOT detected
+    #[test]
+    fn test_non_connection_errors() {
+        assert!(!is_connection_error("invalid JSON"));
+        assert!(!is_connection_error("rate limit exceeded"));
+        assert!(!is_connection_error("authentication failed"));
+        assert!(!is_connection_error("model not found"));
+    }
+}
+
+// =============================================================================
+// Test: Tool Output Summary Formatting
+// =============================================================================
+
+mod tool_output_formatting {
+    use super::*;
+
+    /// Test read_file summary formatting
+    #[test]
+    fn test_read_file_summary() {
+        assert_eq!(format_read_file_summary(10, 500), "10 lines (500 chars)");
+        assert_eq!(format_read_file_summary(100, 1500), "100 lines (1.5k chars)");
+        assert_eq!(format_read_file_summary(1, 50), "1 lines (50 chars)");
+        assert_eq!(format_read_file_summary(0, 0), "0 lines (0 chars)");
+    }
+
+    /// Test read_file summary with large files
+    #[test]
+    fn test_read_file_summary_large() {
+        let summary = format_read_file_summary(5000, 250000);
+        assert!(summary.contains("5000"));
+        assert!(summary.contains("250.0k") || summary.contains("250k"));
+    }
+
+    /// Test write_file result parsing
+    #[test]
+    fn test_write_file_result() {
+        let result = format_write_file_result("✅ wrote 42 lines | 1500 chars");
+        assert!(result.contains("42"), "Should contain line count: {}", result);
+        assert!(result.contains("1500"), "Should contain char count: {}", result);
+    }
+
+    /// Test write_file result with k notation
+    #[test]
+    fn test_write_file_result_k_notation() {
+        let result = format_write_file_result("✅ wrote 100 lines | 2.5k chars");
+        assert!(result.contains("100"));
+        assert!(result.contains("2.5k"));
+    }
+
+    /// Test write_file result fallback for unexpected format
+    #[test]
+    fn test_write_file_result_fallback() {
+        let result = format_write_file_result("unexpected format");
+        assert_eq!(result, "unexpected format");
+    }
+
+    /// Test str_replace summary with both insertions and deletions
+    #[test]
+    fn test_str_replace_summary_both() {
+        let summary = format_str_replace_summary(5, 3);
+        assert!(summary.contains("+5") || summary.contains("5"));
+        assert!(summary.contains("-3") || summary.contains("3"));
+    }
+
+    /// Test str_replace summary with only insertions
+    #[test]
+    fn test_str_replace_summary_insertions_only() {
+        let summary = format_str_replace_summary(10, 0);
+        assert!(summary.contains("10"));
+    }
+
+    /// Test str_replace summary with only deletions
+    #[test]
+    fn test_str_replace_summary_deletions_only() {
+        let summary = format_str_replace_summary(0, 7);
+        assert!(summary.contains("7"));
+    }
+
+    /// Test remember summary parsing
+    #[test]
+    fn test_remember_summary() {
+        let summary = format_remember_summary("Memory updated. Size: 1.2k");
+        assert!(summary.contains("1.2k") || summary.contains("memory"));
+    }
+
+    /// Test remember summary fallback
+    #[test]
+    fn test_remember_summary_fallback() {
+        let summary = format_remember_summary("Memory updated");
+        assert!(summary.contains("memory"));
+    }
+
+    /// Test screenshot summary parsing
+    #[test]
+    fn test_screenshot_summary() {
+        let summary = format_screenshot_summary("✅ Screenshot of Safari saved to: /tmp/screenshot.png");
+        assert!(summary.contains("screenshot.png") || summary.contains("📸"));
+    }
+
+    /// Test screenshot summary error case
+    #[test]
+    fn test_screenshot_summary_error() {
+        let summary = format_screenshot_summary("❌ Failed to capture screenshot");
+        assert!(summary.contains("❌") || summary.contains("failed"));
+    }
+
+    /// Test coverage summary
+    #[test]
+    fn test_coverage_summary() {
+        let summary = format_coverage_summary("Coverage report generated");
+        assert!(summary.contains("📊") || summary.contains("report"));
+    }
+
+    /// Test coverage summary error case
+    #[test]
+    fn test_coverage_summary_error() {
+        let summary = format_coverage_summary("❌ Coverage failed");
+        assert!(summary.contains("❌") || summary.contains("failed"));
+    }
+
+    /// Test rehydrate summary parsing
+    #[test]
+    fn test_rehydrate_summary() {
+        let summary = format_rehydrate_summary("✅ Rehydrated fragment 'abc123' (47 messages, ~18500 tokens)");
+        assert!(summary.contains("abc123") || summary.contains("🔄"));
+    }
+
+    /// Test rehydrate summary error case
+    #[test]
+    fn test_rehydrate_summary_error() {
+        let summary = format_rehydrate_summary("❌ Fragment not found");
+        assert!(summary.contains("❌") || summary.contains("failed"));
+    }
+}
+
+// =============================================================================
+// Test: Duration Formatting
+// =============================================================================
+
+mod duration_formatting {
+    use super::*;
+
+    /// Test millisecond formatting
+    #[test]
+    fn test_milliseconds() {
+        assert_eq!(format_duration(Duration::from_millis(500)), "500ms");
+        assert_eq!(format_duration(Duration::from_millis(50)), "50ms");
+        assert_eq!(format_duration(Duration::from_millis(999)), "999ms");
+    }
+
+    /// Test second formatting
+    #[test]
+    fn test_seconds() {
+        assert_eq!(format_duration(Duration::from_millis(1000)), "1.0s");
+        assert_eq!(format_duration(Duration::from_millis(1500)), "1.5s");
+        assert_eq!(format_duration(Duration::from_secs(30)), "30.0s");
+    }
+
+    /// Test minute formatting
+    #[test]
+    fn test_minutes() {
+        let result = format_duration(Duration::from_secs(90));
+        assert!(result.contains("m"), "Should format as minutes: {}", result);
+        assert!(result.contains("1m") || result.contains("30"), "Should show 1m 30s: {}", result);
+    }
+
+    /// Test edge case: zero duration
+    #[test]
+    fn test_zero_duration() {
+        let result = format_duration(Duration::from_millis(0));
+        assert!(result.contains("0"), "Should handle zero: {}", result);
+    }
+}
+
+// =============================================================================
+// Test: Text Truncation
+// =============================================================================
+
+mod truncation {
+    use super::*;
+
+    /// Test truncate_for_display with short strings
+    #[test]
+    fn test_truncate_short_string() {
+        assert_eq!(truncate_for_display("short", 10), "short");
+        assert_eq!(truncate_for_display("exact", 5), "exact");
+    }
+
+    /// Test truncate_for_display with long strings
+    #[test]
+    fn test_truncate_long_string() {
+        let result = truncate_for_display("this is a very long string", 10);
+        assert!(result.len() <= 15, "Should be truncated: {}", result);
+        assert!(result.ends_with("..."), "Should end with ellipsis: {}", result);
+    }
+
+    /// Test truncate_for_display with multiline (uses first line only)
+    #[test]
+    fn test_truncate_multiline() {
+        assert_eq!(truncate_for_display("first line\nsecond line", 20), "first line");
+        assert_eq!(truncate_for_display("❌ Error\nDetails here", 10), "❌ Error");
+    }
+
+    /// Test truncate_line with should_truncate flag
+    #[test]
+    fn test_truncate_line_flag() {
+        let long_line = "a".repeat(100);
+        
+        // With truncation enabled
+        let truncated = truncate_line(&long_line, 50, true);
+        assert!(truncated.len() <= 55, "Should be truncated: len={}", truncated.len());
+        
+        // With truncation disabled
+        let not_truncated = truncate_line(&long_line, 50, false);
+        assert_eq!(not_truncated.len(), 100, "Should not be truncated");
+    }
+}
+
+// =============================================================================
+// Test: LLM Token Cleaning
+// =============================================================================
+
+mod token_cleaning {
+    use super::*;
+
+    /// Test removal of common LLM stop tokens
+    #[test]
+    fn test_clean_stop_tokens() {
+        assert_eq!(clean_llm_tokens("hello<|im_end|>"), "hello");
+        assert_eq!(clean_llm_tokens("test</s>more"), "testmore");
+        assert_eq!(clean_llm_tokens("[/INST]response"), "response");
+    }
+
+    /// Test content without tokens is unchanged
+    #[test]
+    fn test_clean_no_tokens() {
+        assert_eq!(clean_llm_tokens("normal text"), "normal text");
+        assert_eq!(clean_llm_tokens(""), "");
+    }
+
+    /// Test multiple tokens in one string
+    #[test]
+    fn test_clean_multiple_tokens() {
+        let result = clean_llm_tokens("start<|im_end|>middle</s>end");
+        assert!(!result.contains("<|im_end|>"));
+        assert!(!result.contains("</s>"));
+    }
+}
+
+// =============================================================================
+// Test: Edge Cases and Boundary Conditions
+// =============================================================================
+
+mod edge_cases {
+    use super::*;
+
+    /// Test empty inputs don't panic
+    #[test]
+    fn test_empty_inputs() {
+        assert_eq!(clean_llm_tokens(""), "");
+        assert_eq!(truncate_for_display("", 10), "");
+        assert_eq!(truncate_line("", 10, true), "");
+        assert!(is_empty_response(""));
+        assert!(!is_connection_error(""));
+    }
+
+    /// Test unicode handling in truncation
+    #[test]
+    fn test_unicode_truncation() {
+        // Emoji and special characters should be handled safely
+        let emoji_str = "🎉🎊🎈🎁🎀";
+        let result = truncate_for_display(emoji_str, 3);
+        // Should not panic and should produce valid UTF-8
+        assert!(result.len() > 0);
+        
+        // Bullet points
+        let bullet_str = "• Item 1\n• Item 2";
+        let result = truncate_for_display(bullet_str, 10);
+        assert!(result.starts_with("• Item"));
+    }
+
+    /// Test very large numbers in formatting
+    #[test]
+    fn test_large_numbers() {
+        let summary = format_read_file_summary(1000000, 50000000);
+        assert!(summary.contains("1000000") || summary.contains("M"));
+    }
+
+    /// Test timing footer with edge case values
+    #[test]
+    fn test_timing_footer_edge_values() {
+        // Zero duration
+        let footer = format_timing_footer(
+            Duration::from_millis(0),
+            Duration::from_millis(0),
+            Some(0),
+            0.0,
+        );
+        assert!(!footer.is_empty());
+
+        // Very high context percentage
+        let footer = format_timing_footer(
+            Duration::from_secs(1),
+            Duration::from_millis(100),
+            Some(200000),
+            100.0,
+        );
+        assert!(!footer.is_empty());
+    }
+}