Add characterization tests for UTF-8 truncation and parser sanitization
Agent: hopper Adds 32 new integration tests covering recent commits: ## UTF-8 Safe Truncation Tests (14 tests) Covers commitf30f145(Fix UTF-8 panics): - Topic extraction with emoji, CJK, and multi-byte characters - Truncation at character boundaries (not byte boundaries) - Edge cases: exactly 50 chars, 51 chars, 2-byte/3-byte/4-byte UTF-8 - Stub generation with multi-byte topics - Combining characters and diacritics ## Parser Sanitization Tests (18 tests) Covers commit4c36cc0(Prevent parser poisoning): - Code block contexts (inline code, after fences, prose) - Line boundary edge cases (empty lines, whitespace, indentation) - Unicode handling (emoji, bullets, CJK before patterns) - Multiple patterns on same line - Negative cases (similar but different patterns, partial patterns) - Real-world scenarios from the original bug report All tests are blackbox/characterization style - they test observable outputs through stable public interfaces without encoding internal implementation details.
This commit is contained in:
270
crates/g3-core/tests/parser_sanitization_test.rs
Normal file
270
crates/g3-core/tests/parser_sanitization_test.rs
Normal file
@@ -0,0 +1,270 @@
|
||||
//! Parser Sanitization Edge Case Tests
|
||||
//!
|
||||
//! CHARACTERIZATION: These tests verify edge cases for the inline tool pattern
|
||||
//! sanitization that prevents parser poisoning.
|
||||
//!
|
||||
//! What these tests protect:
|
||||
//! - Tool call patterns in various contexts (code blocks, quotes, etc.)
|
||||
//! - Edge cases at line boundaries
|
||||
//! - Unicode handling in sanitization
|
||||
//!
|
||||
//! What these tests intentionally do NOT assert:
|
||||
//! - Internal parser state
|
||||
//! - Exact sanitization implementation
|
||||
//!
|
||||
//! Related commits:
|
||||
//! - 4c36cc0: fix: prevent parser poisoning from inline tool-call JSON patterns
|
||||
|
||||
use g3_core::streaming_parser::sanitize_inline_tool_patterns;
|
||||
|
||||
// =============================================================================
|
||||
// Test: Code block contexts
|
||||
// =============================================================================
|
||||
|
||||
mod code_block_contexts {
|
||||
use super::*;
|
||||
|
||||
/// Test tool pattern in markdown inline code
|
||||
#[test]
|
||||
fn test_inline_code_backticks() {
|
||||
let input = "Use `{\"tool\": \"shell\"}` to run commands";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
// Should be sanitized since it's inline
|
||||
assert!(!result.contains("{\"tool\":"), "Inline code should be sanitized");
|
||||
}
|
||||
|
||||
/// Test tool pattern after code fence (should NOT be sanitized)
|
||||
#[test]
|
||||
fn test_after_code_fence_standalone() {
|
||||
// Tool call on its own line after a code fence marker
|
||||
let input = "```\n{\"tool\": \"shell\", \"args\": {}}";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
// The tool call is on its own line, should NOT be sanitized
|
||||
let lines: Vec<&str> = result.lines().collect();
|
||||
assert!(lines[1].starts_with("{\"tool\":"), "Standalone after fence should not be sanitized");
|
||||
}
|
||||
|
||||
/// Test tool pattern in prose explanation
|
||||
#[test]
|
||||
fn test_prose_explanation() {
|
||||
let input = "The format is {\"tool\": \"name\", \"args\": {...}} where name is the tool";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
assert!(!result.contains("{\"tool\":"), "Prose should be sanitized");
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Line boundary edge cases
|
||||
// =============================================================================
|
||||
|
||||
mod line_boundary_cases {
|
||||
use super::*;
|
||||
|
||||
/// Test empty lines don't affect detection
|
||||
#[test]
|
||||
fn test_empty_lines_before_tool_call() {
|
||||
let input = "\n\n{\"tool\": \"shell\", \"args\": {}}";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
// Tool call is on its own line (after empty lines), should NOT be sanitized
|
||||
assert!(result.contains("{\"tool\":"), "Standalone after empty lines should not be sanitized");
|
||||
}
|
||||
|
||||
/// Test whitespace-only lines
|
||||
#[test]
|
||||
fn test_whitespace_only_lines() {
|
||||
let input = " \n \n{\"tool\": \"shell\", \"args\": {}}";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
// Tool call is on its own line, should NOT be sanitized
|
||||
assert!(result.contains("{\"tool\":"), "Standalone after whitespace lines should not be sanitized");
|
||||
}
|
||||
|
||||
/// Test tool call with leading whitespace (indented)
|
||||
#[test]
|
||||
fn test_indented_tool_call() {
|
||||
let input = " {\"tool\": \"shell\", \"args\": {}}";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
// Indented but on its own line, should NOT be sanitized
|
||||
assert!(result.contains("{\"tool\":"), "Indented standalone should not be sanitized");
|
||||
}
|
||||
|
||||
/// Test tool call with tabs
|
||||
#[test]
|
||||
fn test_tab_indented_tool_call() {
|
||||
let input = "\t{\"tool\": \"shell\", \"args\": {}}";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
// Tab-indented but on its own line, should NOT be sanitized
|
||||
assert!(result.contains("{\"tool\":"), "Tab-indented standalone should not be sanitized");
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Special characters and Unicode
|
||||
// =============================================================================
|
||||
|
||||
mod unicode_handling {
|
||||
use super::*;
|
||||
|
||||
/// Test tool pattern after emoji
|
||||
#[test]
|
||||
fn test_after_emoji() {
|
||||
let input = "🔧 {\"tool\": \"shell\"}";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
// Emoji before means it's inline, should be sanitized
|
||||
assert!(!result.contains("{\"tool\":"), "After emoji should be sanitized");
|
||||
}
|
||||
|
||||
/// Test tool pattern after bullet point
|
||||
#[test]
|
||||
fn test_after_bullet() {
|
||||
let input = "• {\"tool\": \"shell\"}";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
// Bullet before means it's inline, should be sanitized
|
||||
assert!(!result.contains("{\"tool\":"), "After bullet should be sanitized");
|
||||
}
|
||||
|
||||
/// Test tool pattern after CJK text
|
||||
#[test]
|
||||
fn test_after_cjk() {
|
||||
let input = "使用 {\"tool\": \"shell\"} 命令";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
// CJK text before means it's inline, should be sanitized
|
||||
assert!(!result.contains("{\"tool\":"), "After CJK should be sanitized");
|
||||
}
|
||||
|
||||
/// Test tool pattern with Unicode in args (should still detect pattern)
|
||||
#[test]
|
||||
fn test_unicode_in_args() {
|
||||
let input = "Example: {\"tool\": \"shell\", \"args\": {\"command\": \"echo 你好\"}}";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
// Should be sanitized (inline)
|
||||
assert!(!result.contains("{\"tool\":"), "Unicode in args should still be detected");
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Multiple patterns on same line
|
||||
// =============================================================================
|
||||
|
||||
mod multiple_patterns {
|
||||
use super::*;
|
||||
|
||||
/// Test three tool patterns on one line
|
||||
#[test]
|
||||
fn test_three_patterns() {
|
||||
let input = "Compare {\"tool\": \"a\"} vs {\"tool\": \"b\"} vs {\"tool\": \"c\"}";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
// All should be sanitized
|
||||
assert!(!result.contains("{\"tool\":"), "All three should be sanitized");
|
||||
}
|
||||
|
||||
/// Test mixed: one standalone, one inline
|
||||
#[test]
|
||||
fn test_mixed_standalone_and_inline() {
|
||||
let input = "Text with {\"tool\": \"inline\"} here\n{\"tool\": \"standalone\", \"args\": {}}";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
let lines: Vec<&str> = result.lines().collect();
|
||||
|
||||
// First line should have sanitized pattern
|
||||
assert!(!lines[0].contains("{\"tool\":"), "Inline should be sanitized");
|
||||
|
||||
// Second line should NOT be sanitized (standalone)
|
||||
assert!(lines[1].starts_with("{\"tool\":"), "Standalone should not be sanitized");
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Edge cases that should NOT trigger sanitization
|
||||
// =============================================================================
|
||||
|
||||
mod no_sanitization_cases {
|
||||
use super::*;
|
||||
|
||||
/// Test similar but not matching patterns
|
||||
#[test]
|
||||
fn test_similar_but_different() {
|
||||
let inputs = [
|
||||
"{\"tools\": \"value\"}", // "tools" not "tool"
|
||||
"{\"Tool\": \"value\"}", // Capital T
|
||||
"{\"TOOL\": \"value\"}", // All caps
|
||||
"{'tool': 'value'}", // Single quotes
|
||||
];
|
||||
|
||||
for input in inputs {
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
assert_eq!(result, input, "'{}' should not be modified", input);
|
||||
}
|
||||
}
|
||||
|
||||
/// Test partial patterns
|
||||
#[test]
|
||||
fn test_partial_patterns() {
|
||||
let inputs = [
|
||||
"{\"tool", // No colon
|
||||
"\"tool\":", // No opening brace
|
||||
"tool", // Just the word
|
||||
];
|
||||
|
||||
for input in inputs {
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
assert_eq!(result, input, "'{}' should not be modified", input);
|
||||
}
|
||||
}
|
||||
|
||||
/// Test JSON that happens to have "tool" as a value
|
||||
#[test]
|
||||
fn test_tool_as_value() {
|
||||
let input = "{\"name\": \"tool\"}";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
assert_eq!(result, input, "'tool' as value should not trigger sanitization");
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Real-world scenarios from the bug report
|
||||
// =============================================================================
|
||||
|
||||
mod real_world_scenarios {
|
||||
use super::*;
|
||||
|
||||
/// Test documentation example that caused the original bug
|
||||
#[test]
|
||||
fn test_documentation_example() {
|
||||
let input = r#"To call a tool, use this format: {"tool": "name", "args": {...}}
|
||||
|
||||
For example:
|
||||
{"tool": "shell", "args": {"command": "ls"}}
|
||||
|
||||
This will execute the command."#;
|
||||
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
let lines: Vec<&str> = result.lines().collect();
|
||||
|
||||
// First line has inline pattern - should be sanitized
|
||||
assert!(!lines[0].contains("{\"tool\":"), "Inline in docs should be sanitized");
|
||||
|
||||
// The standalone example should NOT be sanitized
|
||||
assert!(lines[3].starts_with("{\"tool\":"), "Standalone example should not be sanitized");
|
||||
}
|
||||
|
||||
/// Test code example in prose
|
||||
#[test]
|
||||
fn test_code_in_prose() {
|
||||
let input = "The agent responds with {\"tool\": \"read_file\"} when it needs to read files.";
|
||||
let result = sanitize_inline_tool_patterns(input);
|
||||
|
||||
assert!(!result.contains("{\"tool\":"), "Code in prose should be sanitized");
|
||||
}
|
||||
}
|
||||
297
crates/g3-core/tests/utf8_truncation_test.rs
Normal file
297
crates/g3-core/tests/utf8_truncation_test.rs
Normal file
@@ -0,0 +1,297 @@
|
||||
//! UTF-8 Safe Truncation Tests
|
||||
//!
|
||||
//! CHARACTERIZATION: These tests verify that string truncation operations
|
||||
//! handle multi-byte UTF-8 characters correctly without panicking.
|
||||
//!
|
||||
//! What these tests protect:
|
||||
//! - Truncation of strings containing emoji, CJK characters, and other multi-byte chars
|
||||
//! - Word-boundary truncation with multi-byte characters
|
||||
//! - Edge cases at exact character boundaries
|
||||
//!
|
||||
//! What these tests intentionally do NOT assert:
|
||||
//! - Internal implementation details of truncation
|
||||
//! - Exact output format (only that it doesn't panic and is valid UTF-8)
|
||||
//!
|
||||
//! Related commits:
|
||||
//! - f30f145: Fix UTF-8 panics and inconsistent retry logic
|
||||
|
||||
use g3_core::acd::Fragment;
|
||||
use g3_providers::{Message, MessageRole};
|
||||
|
||||
// =============================================================================
|
||||
// Test: Fragment topic extraction with multi-byte characters
|
||||
// =============================================================================
|
||||
|
||||
mod topic_extraction_utf8 {
|
||||
use super::*;
|
||||
|
||||
/// Helper to create a fragment and extract its topics via stub generation
|
||||
fn extract_topics_from_messages(messages: Vec<Message>) -> Vec<String> {
|
||||
let fragment = Fragment::new(messages, None);
|
||||
// Topics are embedded in the stub, so we verify the fragment was created
|
||||
// without panicking and has valid data
|
||||
fragment.topics.clone()
|
||||
}
|
||||
|
||||
/// Test that emoji in user messages don't cause panics
|
||||
#[test]
|
||||
fn test_emoji_in_topic() {
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, "🚀 Deploy the application to production".to_string()),
|
||||
Message::new(MessageRole::Assistant, "I'll help you deploy.".to_string()),
|
||||
];
|
||||
|
||||
let topics = extract_topics_from_messages(messages);
|
||||
|
||||
// Should not panic and should contain the topic
|
||||
assert!(!topics.is_empty(), "Should extract at least one topic");
|
||||
assert!(topics[0].contains("🚀") || topics[0].contains("Deploy"),
|
||||
"Topic should contain emoji or text: {:?}", topics);
|
||||
}
|
||||
|
||||
/// Test that CJK characters don't cause panics
|
||||
#[test]
|
||||
fn test_cjk_characters_in_topic() {
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, "请帮我实现一个用户认证模块".to_string()),
|
||||
Message::new(MessageRole::Assistant, "好的,我来帮你实现。".to_string()),
|
||||
];
|
||||
|
||||
let topics = extract_topics_from_messages(messages);
|
||||
|
||||
// Should not panic
|
||||
assert!(!topics.is_empty(), "Should extract topic from CJK text");
|
||||
}
|
||||
|
||||
/// Test that mixed ASCII and multi-byte characters work
|
||||
#[test]
|
||||
fn test_mixed_ascii_and_multibyte() {
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, "Fix the bug in auth.rs • important ⚡ urgent".to_string()),
|
||||
Message::new(MessageRole::Assistant, "I'll fix it.".to_string()),
|
||||
];
|
||||
|
||||
let topics = extract_topics_from_messages(messages);
|
||||
|
||||
// Should not panic
|
||||
assert!(!topics.is_empty(), "Should extract topic from mixed text");
|
||||
}
|
||||
|
||||
/// Test long message with emoji that would be truncated
|
||||
#[test]
|
||||
fn test_long_message_with_emoji_truncation() {
|
||||
// Create a message longer than 50 characters with emoji scattered throughout
|
||||
let long_msg = "🔧 Fix the authentication bug in the login module that causes users to be logged out unexpectedly 🐛";
|
||||
assert!(long_msg.chars().count() > 50, "Test message should be > 50 chars");
|
||||
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, long_msg.to_string()),
|
||||
Message::new(MessageRole::Assistant, "I'll investigate.".to_string()),
|
||||
];
|
||||
|
||||
let topics = extract_topics_from_messages(messages);
|
||||
|
||||
// Should not panic and topic should be truncated
|
||||
assert!(!topics.is_empty(), "Should extract truncated topic");
|
||||
// The topic should be valid UTF-8 (this would fail if truncated mid-character)
|
||||
let topic = &topics[0];
|
||||
assert!(topic.is_ascii() || topic.chars().count() > 0, "Topic should be valid UTF-8");
|
||||
}
|
||||
|
||||
/// Test message with emoji at exactly the truncation boundary
|
||||
#[test]
|
||||
fn test_emoji_at_truncation_boundary() {
|
||||
// Create a message where an emoji would be at position 49-50
|
||||
// "a]" repeated to fill 48 chars, then emoji
|
||||
let prefix = "a".repeat(48);
|
||||
let msg = format!("{}🚀🔥 more text here", prefix);
|
||||
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, msg),
|
||||
Message::new(MessageRole::Assistant, "OK".to_string()),
|
||||
];
|
||||
|
||||
let topics = extract_topics_from_messages(messages);
|
||||
|
||||
// Should not panic - the key test is that this doesn't crash
|
||||
assert!(!topics.is_empty());
|
||||
}
|
||||
|
||||
/// Test that bullet points (•) don't cause issues
|
||||
#[test]
|
||||
fn test_bullet_points() {
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, "Tasks: • item one • item two • item three • item four • item five".to_string()),
|
||||
Message::new(MessageRole::Assistant, "I see the tasks.".to_string()),
|
||||
];
|
||||
|
||||
let topics = extract_topics_from_messages(messages);
|
||||
|
||||
// Should not panic
|
||||
assert!(!topics.is_empty());
|
||||
}
|
||||
|
||||
/// Test combining characters (diacritics)
|
||||
#[test]
|
||||
fn test_combining_characters() {
|
||||
// é can be represented as e + combining acute accent
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, "Café résumé naïve coöperate fiancée".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Understood.".to_string()),
|
||||
];
|
||||
|
||||
let topics = extract_topics_from_messages(messages);
|
||||
|
||||
// Should not panic
|
||||
assert!(!topics.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Fragment stub generation with multi-byte characters
|
||||
// =============================================================================
|
||||
|
||||
mod stub_generation_utf8 {
|
||||
use super::*;
|
||||
|
||||
/// Test that stub generation works with emoji in topics
|
||||
#[test]
|
||||
fn test_stub_with_emoji_topics() {
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, "🎯 Implement feature X".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Starting implementation.".to_string()),
|
||||
Message::new(MessageRole::User, "🔧 Now fix the tests".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Fixing tests.".to_string()),
|
||||
];
|
||||
|
||||
let fragment = Fragment::new(messages, None);
|
||||
let stub = fragment.generate_stub();
|
||||
|
||||
// Stub should be valid UTF-8 and contain expected elements
|
||||
assert!(stub.contains("DEHYDRATED CONTEXT"), "Stub should have header");
|
||||
assert!(stub.contains(&fragment.fragment_id), "Stub should have fragment ID");
|
||||
assert!(stub.contains("rehydrate"), "Stub should mention rehydrate");
|
||||
}
|
||||
|
||||
/// Test stub with very long multi-byte topic that gets truncated
|
||||
#[test]
|
||||
fn test_stub_with_truncated_multibyte_topic() {
|
||||
// Create a long message with multi-byte chars that will be truncated
|
||||
let long_msg = "🔧 ".to_string() + &"修复".repeat(30); // Chinese chars, each 3 bytes
|
||||
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, long_msg),
|
||||
Message::new(MessageRole::Assistant, "好的".to_string()),
|
||||
];
|
||||
|
||||
let fragment = Fragment::new(messages, None);
|
||||
let stub = fragment.generate_stub();
|
||||
|
||||
// Should not panic and stub should be valid
|
||||
assert!(stub.contains("DEHYDRATED CONTEXT"));
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Edge cases for character counting vs byte counting
|
||||
// =============================================================================
|
||||
|
||||
mod char_vs_byte_edge_cases {
|
||||
use super::*;
|
||||
|
||||
/// Test that we count characters, not bytes
|
||||
/// A string of 50 emoji is 50 characters but 200 bytes
|
||||
#[test]
|
||||
fn test_emoji_string_character_count() {
|
||||
let emoji_50 = "🔥".repeat(50);
|
||||
assert_eq!(emoji_50.chars().count(), 50, "Should be 50 characters");
|
||||
assert_eq!(emoji_50.len(), 200, "Should be 200 bytes (4 bytes per emoji)");
|
||||
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, emoji_50),
|
||||
Message::new(MessageRole::Assistant, "OK".to_string()),
|
||||
];
|
||||
|
||||
let fragment = Fragment::new(messages, None);
|
||||
|
||||
// Should not panic - if we used byte slicing, this would crash
|
||||
let _stub = fragment.generate_stub();
|
||||
}
|
||||
|
||||
/// Test exactly 50 characters (no truncation needed)
|
||||
#[test]
|
||||
fn test_exactly_50_chars() {
|
||||
let msg = "a".repeat(50);
|
||||
assert_eq!(msg.chars().count(), 50);
|
||||
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, msg),
|
||||
Message::new(MessageRole::Assistant, "OK".to_string()),
|
||||
];
|
||||
|
||||
let topics = Fragment::new(messages, None).topics;
|
||||
|
||||
// Should not have "..." suffix since it's exactly 50
|
||||
assert!(!topics.is_empty());
|
||||
// Topic should be the full message or close to it
|
||||
}
|
||||
|
||||
/// Test 51 characters (truncation needed)
|
||||
#[test]
|
||||
fn test_51_chars_triggers_truncation() {
|
||||
let msg = "a".repeat(51);
|
||||
assert_eq!(msg.chars().count(), 51);
|
||||
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, msg),
|
||||
Message::new(MessageRole::Assistant, "OK".to_string()),
|
||||
];
|
||||
|
||||
let topics = Fragment::new(messages, None).topics;
|
||||
|
||||
// Should have truncation
|
||||
assert!(!topics.is_empty());
|
||||
let topic = &topics[0];
|
||||
assert!(topic.ends_with("..."), "Should be truncated: {}", topic);
|
||||
}
|
||||
|
||||
/// Test string with 3-byte UTF-8 characters (CJK)
|
||||
#[test]
|
||||
fn test_3byte_utf8_chars() {
|
||||
// Each Chinese character is 3 bytes
|
||||
let cjk_60 = "中".repeat(60); // 60 chars, 180 bytes
|
||||
assert_eq!(cjk_60.chars().count(), 60);
|
||||
assert_eq!(cjk_60.len(), 180);
|
||||
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, cjk_60),
|
||||
Message::new(MessageRole::Assistant, "好".to_string()),
|
||||
];
|
||||
|
||||
let fragment = Fragment::new(messages, None);
|
||||
|
||||
// Should not panic
|
||||
let _stub = fragment.generate_stub();
|
||||
assert!(!fragment.topics.is_empty());
|
||||
}
|
||||
|
||||
/// Test string with 2-byte UTF-8 characters (Latin extended)
|
||||
#[test]
|
||||
fn test_2byte_utf8_chars() {
|
||||
// Each accented character is 2 bytes
|
||||
let accented_60 = "é".repeat(60); // 60 chars, 120 bytes
|
||||
assert_eq!(accented_60.chars().count(), 60);
|
||||
assert_eq!(accented_60.len(), 120);
|
||||
|
||||
let messages = vec![
|
||||
Message::new(MessageRole::User, accented_60),
|
||||
Message::new(MessageRole::Assistant, "OK".to_string()),
|
||||
];
|
||||
|
||||
let fragment = Fragment::new(messages, None);
|
||||
|
||||
// Should not panic
|
||||
let _stub = fragment.generate_stub();
|
||||
assert!(!fragment.topics.is_empty());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user