Add 10 stress tests covering: - Nested formatting (bold in italic, italic in bold) - Empty/minimal content edge cases - Escape sequences and special characters - Lists with complex inline formatting - Links with various content types - Tables with formatting in cells - Code blocks (should not format contents) - Mixed block elements (headers, quotes, rules) - Nested lists (3+ levels, mixed types) - Pathological/adversarial inputs (unbalanced delimiters, unicode, long lines) All 45 tests pass.
245 lines
8.3 KiB
Rust
245 lines
8.3 KiB
Rust
//! Syntax highlighting for code blocks using syntect.
|
|
//!
|
|
//! This module provides functionality to extract code blocks from markdown,
|
|
//! apply syntax highlighting using syntect, and return the highlighted output
|
|
//! while leaving the rest of the markdown intact.
|
|
|
|
use once_cell::sync::Lazy;
|
|
use syntect::easy::HighlightLines;
|
|
use syntect::highlighting::ThemeSet;
|
|
use syntect::parsing::SyntaxSet;
|
|
use syntect::util::{as_24_bit_terminal_escaped, LinesWithEndings};
|
|
|
|
/// Lazily loaded syntax set with default syntaxes.
|
|
static SYNTAX_SET: Lazy<SyntaxSet> = Lazy::new(SyntaxSet::load_defaults_newlines);
|
|
|
|
/// Lazily loaded theme set with default themes.
|
|
static THEME_SET: Lazy<ThemeSet> = Lazy::new(ThemeSet::load_defaults);
|
|
|
|
/// A segment of markdown content - either plain text or a code block.
|
|
#[derive(Debug)]
|
|
enum MarkdownSegment<'a> {
|
|
/// Plain markdown text (not a code block)
|
|
Text(&'a str),
|
|
/// A fenced code block with optional language and content
|
|
CodeBlock { lang: Option<&'a str>, code: &'a str },
|
|
}
|
|
|
|
/// Parse markdown into segments of text and code blocks.
|
|
fn parse_markdown_segments(markdown: &str) -> Vec<MarkdownSegment<'_>> {
|
|
let mut segments = Vec::new();
|
|
let mut remaining = markdown;
|
|
|
|
while !remaining.is_empty() {
|
|
// Look for the start of a code block (``` at start of line or after newline)
|
|
if let Some(fence_start) = find_code_fence_start(remaining) {
|
|
// Add any text before the fence
|
|
if fence_start > 0 {
|
|
segments.push(MarkdownSegment::Text(&remaining[..fence_start]));
|
|
}
|
|
|
|
// Parse the code block
|
|
let after_fence = &remaining[fence_start..];
|
|
if let Some((lang, code, end_pos)) = parse_code_block(after_fence) {
|
|
segments.push(MarkdownSegment::CodeBlock { lang, code });
|
|
remaining = &after_fence[end_pos..];
|
|
} else {
|
|
// Malformed fence - treat as text and continue
|
|
segments.push(MarkdownSegment::Text(&remaining[..fence_start + 3]));
|
|
remaining = &remaining[fence_start + 3..];
|
|
}
|
|
} else {
|
|
// No more code blocks - rest is plain text
|
|
segments.push(MarkdownSegment::Text(remaining));
|
|
break;
|
|
}
|
|
}
|
|
|
|
segments
|
|
}
|
|
|
|
/// Find the start position of a code fence (```) that begins a line.
|
|
fn find_code_fence_start(text: &str) -> Option<usize> {
|
|
let mut pos = 0;
|
|
for line in text.lines() {
|
|
let trimmed = line.trim_start();
|
|
if trimmed.starts_with("```") {
|
|
// Return position at start of the ``` (after any leading whitespace on line)
|
|
let whitespace_len = line.len() - trimmed.len();
|
|
return Some(pos + whitespace_len);
|
|
}
|
|
pos += line.len() + 1; // +1 for newline
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Parse a code block starting at the opening fence.
|
|
/// Returns (language, code_content, end_position_after_closing_fence).
|
|
fn parse_code_block(text: &str) -> Option<(Option<&str>, &str, usize)> {
|
|
// text starts with ```
|
|
let first_line_end = text.find('\n')?;
|
|
let first_line = &text[3..first_line_end].trim();
|
|
|
|
// Extract language (if any)
|
|
let lang = if first_line.is_empty() {
|
|
None
|
|
} else {
|
|
// Language is the first word on the line
|
|
let lang_str = first_line.split_whitespace().next().unwrap_or(*first_line);
|
|
Some(lang_str)
|
|
};
|
|
|
|
// Find the closing fence
|
|
let code_start = first_line_end + 1;
|
|
let after_opening = &text[code_start..];
|
|
|
|
// Look for closing ``` at start of a line
|
|
let mut search_pos = 0;
|
|
for line in after_opening.lines() {
|
|
if line.trim_start().starts_with("```") {
|
|
// Found closing fence
|
|
let code = &after_opening[..search_pos];
|
|
let closing_fence_end = search_pos + line.len();
|
|
// Include the newline after closing fence if present
|
|
let total_end = if after_opening.len() > closing_fence_end
|
|
&& after_opening.as_bytes().get(closing_fence_end) == Some(&b'\n')
|
|
{
|
|
code_start + closing_fence_end + 1
|
|
} else {
|
|
code_start + closing_fence_end
|
|
};
|
|
return Some((lang, code, total_end));
|
|
}
|
|
search_pos += line.len() + 1; // +1 for newline
|
|
}
|
|
|
|
// No closing fence found - treat entire rest as code
|
|
Some((lang, after_opening, text.len()))
|
|
}
|
|
|
|
/// Highlight a code block with the given language.
|
|
fn highlight_code(code: &str, lang: Option<&str>) -> String {
|
|
let syntax = lang
|
|
.and_then(|l| SYNTAX_SET.find_syntax_by_token(l))
|
|
.unwrap_or_else(|| SYNTAX_SET.find_syntax_plain_text());
|
|
|
|
// Use a dark theme suitable for terminals
|
|
let theme = &THEME_SET.themes["base16-ocean.dark"];
|
|
let mut highlighter = HighlightLines::new(syntax, theme);
|
|
|
|
let mut output = String::new();
|
|
|
|
for line in LinesWithEndings::from(code) {
|
|
match highlighter.highlight_line(line, &SYNTAX_SET) {
|
|
Ok(ranges) => {
|
|
let escaped = as_24_bit_terminal_escaped(&ranges[..], false);
|
|
output.push_str(&escaped);
|
|
}
|
|
Err(_) => {
|
|
// Fallback: just append the line without highlighting
|
|
output.push_str(line);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Reset terminal colors at the end
|
|
output.push_str("\x1b[0m");
|
|
output
|
|
}
|
|
|
|
/// Render markdown with syntax-highlighted code blocks.
|
|
///
|
|
/// This function:
|
|
/// 1. Parses the markdown to find code blocks
|
|
/// 2. Applies syntect highlighting to code blocks
|
|
/// 3. Renders non-code portions with termimad
|
|
/// 4. Combines everything into the final output
|
|
pub fn render_markdown_with_highlighting(markdown: &str, skin: &termimad::MadSkin) -> String {
|
|
let segments = parse_markdown_segments(markdown);
|
|
let mut output = String::new();
|
|
|
|
for segment in segments {
|
|
match segment {
|
|
MarkdownSegment::Text(text) => {
|
|
if !text.is_empty() {
|
|
// Render with termimad
|
|
let rendered = skin.term_text(text);
|
|
output.push_str(&format!("{}", rendered));
|
|
}
|
|
}
|
|
MarkdownSegment::CodeBlock { lang, code } => {
|
|
// Add a subtle header showing the language
|
|
if let Some(l) = lang {
|
|
output.push_str(&format!("\x1b[2;3m{}\x1b[0m\n", l));
|
|
}
|
|
// Highlight and append the code
|
|
let highlighted = highlight_code(code, lang);
|
|
output.push_str(&highlighted);
|
|
// Ensure we end with a newline
|
|
if !output.ends_with('\n') {
|
|
output.push('\n');
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
output
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_parse_simple_code_block() {
|
|
let md = "Some text\n```rust\nfn main() {}\n```\nMore text";
|
|
let segments = parse_markdown_segments(md);
|
|
|
|
assert_eq!(segments.len(), 3);
|
|
assert!(matches!(segments[0], MarkdownSegment::Text("Some text\n")));
|
|
assert!(matches!(
|
|
segments[1],
|
|
MarkdownSegment::CodeBlock {
|
|
lang: Some("rust"),
|
|
code: "fn main() {}\n"
|
|
}
|
|
));
|
|
assert!(matches!(segments[2], MarkdownSegment::Text("More text")));
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_no_language() {
|
|
let md = "```\nplain code\n```";
|
|
let segments = parse_markdown_segments(md);
|
|
|
|
assert_eq!(segments.len(), 1);
|
|
assert!(matches!(
|
|
segments[0],
|
|
MarkdownSegment::CodeBlock {
|
|
lang: None,
|
|
code: "plain code\n"
|
|
}
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn test_highlight_rust_code() {
|
|
let code = "fn main() {\n println!(\"Hello\");\n}\n";
|
|
let highlighted = highlight_code(code, Some("rust"));
|
|
|
|
// Should contain ANSI escape codes
|
|
assert!(highlighted.contains("\x1b["));
|
|
// Should end with reset
|
|
assert!(highlighted.ends_with("\x1b[0m"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_no_code_blocks() {
|
|
let md = "Just plain markdown with **bold** and *italic*.";
|
|
let segments = parse_markdown_segments(md);
|
|
|
|
assert_eq!(segments.len(), 1);
|
|
assert!(matches!(segments[0], MarkdownSegment::Text(_)));
|
|
}
|
|
}
|