From dc45987e8d712149802398d6ed966c7249cce3d4 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Tue, 13 Jan 2026 11:22:46 +0530 Subject: [PATCH] Add characterization tests for UTF-8 truncation and parser sanitization Agent: hopper Adds 32 new integration tests covering recent commits: ## UTF-8 Safe Truncation Tests (14 tests) Covers commit f30f145 (Fix UTF-8 panics): - Topic extraction with emoji, CJK, and multi-byte characters - Truncation at character boundaries (not byte boundaries) - Edge cases: exactly 50 chars, 51 chars, 2-byte/3-byte/4-byte UTF-8 - Stub generation with multi-byte topics - Combining characters and diacritics ## Parser Sanitization Tests (18 tests) Covers commit 4c36cc0 (Prevent parser poisoning): - Code block contexts (inline code, after fences, prose) - Line boundary edge cases (empty lines, whitespace, indentation) - Unicode handling (emoji, bullets, CJK before patterns) - Multiple patterns on same line - Negative cases (similar but different patterns, partial patterns) - Real-world scenarios from the original bug report All tests are blackbox/characterization style - they test observable outputs through stable public interfaces without encoding internal implementation details. --- .../g3-core/tests/parser_sanitization_test.rs | 270 ++++++++++++++++ crates/g3-core/tests/utf8_truncation_test.rs | 297 ++++++++++++++++++ 2 files changed, 567 insertions(+) create mode 100644 crates/g3-core/tests/parser_sanitization_test.rs create mode 100644 crates/g3-core/tests/utf8_truncation_test.rs diff --git a/crates/g3-core/tests/parser_sanitization_test.rs b/crates/g3-core/tests/parser_sanitization_test.rs new file mode 100644 index 0000000..5584422 --- /dev/null +++ b/crates/g3-core/tests/parser_sanitization_test.rs @@ -0,0 +1,270 @@ +//! Parser Sanitization Edge Case Tests +//! +//! CHARACTERIZATION: These tests verify edge cases for the inline tool pattern +//! sanitization that prevents parser poisoning. +//! +//! What these tests protect: +//! - Tool call patterns in various contexts (code blocks, quotes, etc.) +//! - Edge cases at line boundaries +//! - Unicode handling in sanitization +//! +//! What these tests intentionally do NOT assert: +//! - Internal parser state +//! - Exact sanitization implementation +//! +//! Related commits: +//! - 4c36cc0: fix: prevent parser poisoning from inline tool-call JSON patterns + +use g3_core::streaming_parser::sanitize_inline_tool_patterns; + +// ============================================================================= +// Test: Code block contexts +// ============================================================================= + +mod code_block_contexts { + use super::*; + + /// Test tool pattern in markdown inline code + #[test] + fn test_inline_code_backticks() { + let input = "Use `{\"tool\": \"shell\"}` to run commands"; + let result = sanitize_inline_tool_patterns(input); + + // Should be sanitized since it's inline + assert!(!result.contains("{\"tool\":"), "Inline code should be sanitized"); + } + + /// Test tool pattern after code fence (should NOT be sanitized) + #[test] + fn test_after_code_fence_standalone() { + // Tool call on its own line after a code fence marker + let input = "```\n{\"tool\": \"shell\", \"args\": {}}"; + let result = sanitize_inline_tool_patterns(input); + + // The tool call is on its own line, should NOT be sanitized + let lines: Vec<&str> = result.lines().collect(); + assert!(lines[1].starts_with("{\"tool\":"), "Standalone after fence should not be sanitized"); + } + + /// Test tool pattern in prose explanation + #[test] + fn test_prose_explanation() { + let input = "The format is {\"tool\": \"name\", \"args\": {...}} where name is the tool"; + let result = sanitize_inline_tool_patterns(input); + + assert!(!result.contains("{\"tool\":"), "Prose should be sanitized"); + } +} + +// ============================================================================= +// Test: Line boundary edge cases +// ============================================================================= + +mod line_boundary_cases { + use super::*; + + /// Test empty lines don't affect detection + #[test] + fn test_empty_lines_before_tool_call() { + let input = "\n\n{\"tool\": \"shell\", \"args\": {}}"; + let result = sanitize_inline_tool_patterns(input); + + // Tool call is on its own line (after empty lines), should NOT be sanitized + assert!(result.contains("{\"tool\":"), "Standalone after empty lines should not be sanitized"); + } + + /// Test whitespace-only lines + #[test] + fn test_whitespace_only_lines() { + let input = " \n \n{\"tool\": \"shell\", \"args\": {}}"; + let result = sanitize_inline_tool_patterns(input); + + // Tool call is on its own line, should NOT be sanitized + assert!(result.contains("{\"tool\":"), "Standalone after whitespace lines should not be sanitized"); + } + + /// Test tool call with leading whitespace (indented) + #[test] + fn test_indented_tool_call() { + let input = " {\"tool\": \"shell\", \"args\": {}}"; + let result = sanitize_inline_tool_patterns(input); + + // Indented but on its own line, should NOT be sanitized + assert!(result.contains("{\"tool\":"), "Indented standalone should not be sanitized"); + } + + /// Test tool call with tabs + #[test] + fn test_tab_indented_tool_call() { + let input = "\t{\"tool\": \"shell\", \"args\": {}}"; + let result = sanitize_inline_tool_patterns(input); + + // Tab-indented but on its own line, should NOT be sanitized + assert!(result.contains("{\"tool\":"), "Tab-indented standalone should not be sanitized"); + } +} + +// ============================================================================= +// Test: Special characters and Unicode +// ============================================================================= + +mod unicode_handling { + use super::*; + + /// Test tool pattern after emoji + #[test] + fn test_after_emoji() { + let input = "🔧 {\"tool\": \"shell\"}"; + let result = sanitize_inline_tool_patterns(input); + + // Emoji before means it's inline, should be sanitized + assert!(!result.contains("{\"tool\":"), "After emoji should be sanitized"); + } + + /// Test tool pattern after bullet point + #[test] + fn test_after_bullet() { + let input = "• {\"tool\": \"shell\"}"; + let result = sanitize_inline_tool_patterns(input); + + // Bullet before means it's inline, should be sanitized + assert!(!result.contains("{\"tool\":"), "After bullet should be sanitized"); + } + + /// Test tool pattern after CJK text + #[test] + fn test_after_cjk() { + let input = "使用 {\"tool\": \"shell\"} 命令"; + let result = sanitize_inline_tool_patterns(input); + + // CJK text before means it's inline, should be sanitized + assert!(!result.contains("{\"tool\":"), "After CJK should be sanitized"); + } + + /// Test tool pattern with Unicode in args (should still detect pattern) + #[test] + fn test_unicode_in_args() { + let input = "Example: {\"tool\": \"shell\", \"args\": {\"command\": \"echo 你好\"}}"; + let result = sanitize_inline_tool_patterns(input); + + // Should be sanitized (inline) + assert!(!result.contains("{\"tool\":"), "Unicode in args should still be detected"); + } +} + +// ============================================================================= +// Test: Multiple patterns on same line +// ============================================================================= + +mod multiple_patterns { + use super::*; + + /// Test three tool patterns on one line + #[test] + fn test_three_patterns() { + let input = "Compare {\"tool\": \"a\"} vs {\"tool\": \"b\"} vs {\"tool\": \"c\"}"; + let result = sanitize_inline_tool_patterns(input); + + // All should be sanitized + assert!(!result.contains("{\"tool\":"), "All three should be sanitized"); + } + + /// Test mixed: one standalone, one inline + #[test] + fn test_mixed_standalone_and_inline() { + let input = "Text with {\"tool\": \"inline\"} here\n{\"tool\": \"standalone\", \"args\": {}}"; + let result = sanitize_inline_tool_patterns(input); + + let lines: Vec<&str> = result.lines().collect(); + + // First line should have sanitized pattern + assert!(!lines[0].contains("{\"tool\":"), "Inline should be sanitized"); + + // Second line should NOT be sanitized (standalone) + assert!(lines[1].starts_with("{\"tool\":"), "Standalone should not be sanitized"); + } +} + +// ============================================================================= +// Test: Edge cases that should NOT trigger sanitization +// ============================================================================= + +mod no_sanitization_cases { + use super::*; + + /// Test similar but not matching patterns + #[test] + fn test_similar_but_different() { + let inputs = [ + "{\"tools\": \"value\"}", // "tools" not "tool" + "{\"Tool\": \"value\"}", // Capital T + "{\"TOOL\": \"value\"}", // All caps + "{'tool': 'value'}", // Single quotes + ]; + + for input in inputs { + let result = sanitize_inline_tool_patterns(input); + assert_eq!(result, input, "'{}' should not be modified", input); + } + } + + /// Test partial patterns + #[test] + fn test_partial_patterns() { + let inputs = [ + "{\"tool", // No colon + "\"tool\":", // No opening brace + "tool", // Just the word + ]; + + for input in inputs { + let result = sanitize_inline_tool_patterns(input); + assert_eq!(result, input, "'{}' should not be modified", input); + } + } + + /// Test JSON that happens to have "tool" as a value + #[test] + fn test_tool_as_value() { + let input = "{\"name\": \"tool\"}"; + let result = sanitize_inline_tool_patterns(input); + assert_eq!(result, input, "'tool' as value should not trigger sanitization"); + } +} + +// ============================================================================= +// Test: Real-world scenarios from the bug report +// ============================================================================= + +mod real_world_scenarios { + use super::*; + + /// Test documentation example that caused the original bug + #[test] + fn test_documentation_example() { + let input = r#"To call a tool, use this format: {"tool": "name", "args": {...}} + +For example: +{"tool": "shell", "args": {"command": "ls"}} + +This will execute the command."#; + + let result = sanitize_inline_tool_patterns(input); + let lines: Vec<&str> = result.lines().collect(); + + // First line has inline pattern - should be sanitized + assert!(!lines[0].contains("{\"tool\":"), "Inline in docs should be sanitized"); + + // The standalone example should NOT be sanitized + assert!(lines[3].starts_with("{\"tool\":"), "Standalone example should not be sanitized"); + } + + /// Test code example in prose + #[test] + fn test_code_in_prose() { + let input = "The agent responds with {\"tool\": \"read_file\"} when it needs to read files."; + let result = sanitize_inline_tool_patterns(input); + + assert!(!result.contains("{\"tool\":"), "Code in prose should be sanitized"); + } +} diff --git a/crates/g3-core/tests/utf8_truncation_test.rs b/crates/g3-core/tests/utf8_truncation_test.rs new file mode 100644 index 0000000..267149e --- /dev/null +++ b/crates/g3-core/tests/utf8_truncation_test.rs @@ -0,0 +1,297 @@ +//! UTF-8 Safe Truncation Tests +//! +//! CHARACTERIZATION: These tests verify that string truncation operations +//! handle multi-byte UTF-8 characters correctly without panicking. +//! +//! What these tests protect: +//! - Truncation of strings containing emoji, CJK characters, and other multi-byte chars +//! - Word-boundary truncation with multi-byte characters +//! - Edge cases at exact character boundaries +//! +//! What these tests intentionally do NOT assert: +//! - Internal implementation details of truncation +//! - Exact output format (only that it doesn't panic and is valid UTF-8) +//! +//! Related commits: +//! - f30f145: Fix UTF-8 panics and inconsistent retry logic + +use g3_core::acd::Fragment; +use g3_providers::{Message, MessageRole}; + +// ============================================================================= +// Test: Fragment topic extraction with multi-byte characters +// ============================================================================= + +mod topic_extraction_utf8 { + use super::*; + + /// Helper to create a fragment and extract its topics via stub generation + fn extract_topics_from_messages(messages: Vec) -> Vec { + let fragment = Fragment::new(messages, None); + // Topics are embedded in the stub, so we verify the fragment was created + // without panicking and has valid data + fragment.topics.clone() + } + + /// Test that emoji in user messages don't cause panics + #[test] + fn test_emoji_in_topic() { + let messages = vec![ + Message::new(MessageRole::User, "🚀 Deploy the application to production".to_string()), + Message::new(MessageRole::Assistant, "I'll help you deploy.".to_string()), + ]; + + let topics = extract_topics_from_messages(messages); + + // Should not panic and should contain the topic + assert!(!topics.is_empty(), "Should extract at least one topic"); + assert!(topics[0].contains("🚀") || topics[0].contains("Deploy"), + "Topic should contain emoji or text: {:?}", topics); + } + + /// Test that CJK characters don't cause panics + #[test] + fn test_cjk_characters_in_topic() { + let messages = vec![ + Message::new(MessageRole::User, "请帮我实现一个用户认证模块".to_string()), + Message::new(MessageRole::Assistant, "好的,我来帮你实现。".to_string()), + ]; + + let topics = extract_topics_from_messages(messages); + + // Should not panic + assert!(!topics.is_empty(), "Should extract topic from CJK text"); + } + + /// Test that mixed ASCII and multi-byte characters work + #[test] + fn test_mixed_ascii_and_multibyte() { + let messages = vec![ + Message::new(MessageRole::User, "Fix the bug in auth.rs • important ⚡ urgent".to_string()), + Message::new(MessageRole::Assistant, "I'll fix it.".to_string()), + ]; + + let topics = extract_topics_from_messages(messages); + + // Should not panic + assert!(!topics.is_empty(), "Should extract topic from mixed text"); + } + + /// Test long message with emoji that would be truncated + #[test] + fn test_long_message_with_emoji_truncation() { + // Create a message longer than 50 characters with emoji scattered throughout + let long_msg = "🔧 Fix the authentication bug in the login module that causes users to be logged out unexpectedly 🐛"; + assert!(long_msg.chars().count() > 50, "Test message should be > 50 chars"); + + let messages = vec![ + Message::new(MessageRole::User, long_msg.to_string()), + Message::new(MessageRole::Assistant, "I'll investigate.".to_string()), + ]; + + let topics = extract_topics_from_messages(messages); + + // Should not panic and topic should be truncated + assert!(!topics.is_empty(), "Should extract truncated topic"); + // The topic should be valid UTF-8 (this would fail if truncated mid-character) + let topic = &topics[0]; + assert!(topic.is_ascii() || topic.chars().count() > 0, "Topic should be valid UTF-8"); + } + + /// Test message with emoji at exactly the truncation boundary + #[test] + fn test_emoji_at_truncation_boundary() { + // Create a message where an emoji would be at position 49-50 + // "a]" repeated to fill 48 chars, then emoji + let prefix = "a".repeat(48); + let msg = format!("{}🚀🔥 more text here", prefix); + + let messages = vec![ + Message::new(MessageRole::User, msg), + Message::new(MessageRole::Assistant, "OK".to_string()), + ]; + + let topics = extract_topics_from_messages(messages); + + // Should not panic - the key test is that this doesn't crash + assert!(!topics.is_empty()); + } + + /// Test that bullet points (•) don't cause issues + #[test] + fn test_bullet_points() { + let messages = vec![ + Message::new(MessageRole::User, "Tasks: • item one • item two • item three • item four • item five".to_string()), + Message::new(MessageRole::Assistant, "I see the tasks.".to_string()), + ]; + + let topics = extract_topics_from_messages(messages); + + // Should not panic + assert!(!topics.is_empty()); + } + + /// Test combining characters (diacritics) + #[test] + fn test_combining_characters() { + // é can be represented as e + combining acute accent + let messages = vec![ + Message::new(MessageRole::User, "Café résumé naïve coöperate fiancée".to_string()), + Message::new(MessageRole::Assistant, "Understood.".to_string()), + ]; + + let topics = extract_topics_from_messages(messages); + + // Should not panic + assert!(!topics.is_empty()); + } +} + +// ============================================================================= +// Test: Fragment stub generation with multi-byte characters +// ============================================================================= + +mod stub_generation_utf8 { + use super::*; + + /// Test that stub generation works with emoji in topics + #[test] + fn test_stub_with_emoji_topics() { + let messages = vec![ + Message::new(MessageRole::User, "🎯 Implement feature X".to_string()), + Message::new(MessageRole::Assistant, "Starting implementation.".to_string()), + Message::new(MessageRole::User, "🔧 Now fix the tests".to_string()), + Message::new(MessageRole::Assistant, "Fixing tests.".to_string()), + ]; + + let fragment = Fragment::new(messages, None); + let stub = fragment.generate_stub(); + + // Stub should be valid UTF-8 and contain expected elements + assert!(stub.contains("DEHYDRATED CONTEXT"), "Stub should have header"); + assert!(stub.contains(&fragment.fragment_id), "Stub should have fragment ID"); + assert!(stub.contains("rehydrate"), "Stub should mention rehydrate"); + } + + /// Test stub with very long multi-byte topic that gets truncated + #[test] + fn test_stub_with_truncated_multibyte_topic() { + // Create a long message with multi-byte chars that will be truncated + let long_msg = "🔧 ".to_string() + &"修复".repeat(30); // Chinese chars, each 3 bytes + + let messages = vec![ + Message::new(MessageRole::User, long_msg), + Message::new(MessageRole::Assistant, "好的".to_string()), + ]; + + let fragment = Fragment::new(messages, None); + let stub = fragment.generate_stub(); + + // Should not panic and stub should be valid + assert!(stub.contains("DEHYDRATED CONTEXT")); + } +} + +// ============================================================================= +// Test: Edge cases for character counting vs byte counting +// ============================================================================= + +mod char_vs_byte_edge_cases { + use super::*; + + /// Test that we count characters, not bytes + /// A string of 50 emoji is 50 characters but 200 bytes + #[test] + fn test_emoji_string_character_count() { + let emoji_50 = "🔥".repeat(50); + assert_eq!(emoji_50.chars().count(), 50, "Should be 50 characters"); + assert_eq!(emoji_50.len(), 200, "Should be 200 bytes (4 bytes per emoji)"); + + let messages = vec![ + Message::new(MessageRole::User, emoji_50), + Message::new(MessageRole::Assistant, "OK".to_string()), + ]; + + let fragment = Fragment::new(messages, None); + + // Should not panic - if we used byte slicing, this would crash + let _stub = fragment.generate_stub(); + } + + /// Test exactly 50 characters (no truncation needed) + #[test] + fn test_exactly_50_chars() { + let msg = "a".repeat(50); + assert_eq!(msg.chars().count(), 50); + + let messages = vec![ + Message::new(MessageRole::User, msg), + Message::new(MessageRole::Assistant, "OK".to_string()), + ]; + + let topics = Fragment::new(messages, None).topics; + + // Should not have "..." suffix since it's exactly 50 + assert!(!topics.is_empty()); + // Topic should be the full message or close to it + } + + /// Test 51 characters (truncation needed) + #[test] + fn test_51_chars_triggers_truncation() { + let msg = "a".repeat(51); + assert_eq!(msg.chars().count(), 51); + + let messages = vec![ + Message::new(MessageRole::User, msg), + Message::new(MessageRole::Assistant, "OK".to_string()), + ]; + + let topics = Fragment::new(messages, None).topics; + + // Should have truncation + assert!(!topics.is_empty()); + let topic = &topics[0]; + assert!(topic.ends_with("..."), "Should be truncated: {}", topic); + } + + /// Test string with 3-byte UTF-8 characters (CJK) + #[test] + fn test_3byte_utf8_chars() { + // Each Chinese character is 3 bytes + let cjk_60 = "中".repeat(60); // 60 chars, 180 bytes + assert_eq!(cjk_60.chars().count(), 60); + assert_eq!(cjk_60.len(), 180); + + let messages = vec![ + Message::new(MessageRole::User, cjk_60), + Message::new(MessageRole::Assistant, "好".to_string()), + ]; + + let fragment = Fragment::new(messages, None); + + // Should not panic + let _stub = fragment.generate_stub(); + assert!(!fragment.topics.is_empty()); + } + + /// Test string with 2-byte UTF-8 characters (Latin extended) + #[test] + fn test_2byte_utf8_chars() { + // Each accented character is 2 bytes + let accented_60 = "é".repeat(60); // 60 chars, 120 bytes + assert_eq!(accented_60.chars().count(), 60); + assert_eq!(accented_60.len(), 120); + + let messages = vec![ + Message::new(MessageRole::User, accented_60), + Message::new(MessageRole::Assistant, "OK".to_string()), + ]; + + let fragment = Fragment::new(messages, None); + + // Should not panic + let _stub = fragment.generate_stub(); + assert!(!fragment.topics.is_empty()); + } +}