diff --git a/Cargo.lock b/Cargo.lock index ff4c5c3..820751e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1808,15 +1808,6 @@ dependencies = [ "either", ] -[[package]] -name = "itertools" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" -dependencies = [ - "either", -] - [[package]] name = "itoa" version = "1.0.18" @@ -2156,10 +2147,10 @@ dependencies = [ [[package]] name = "rust-norg" version = "0.1.0" -source = "git+https://github.com/nvim-neorg/rust-norg?rev=79673015447b62d021d57b92f80be1454fe5cf83#79673015447b62d021d57b92f80be1454fe5cf83" +source = "git+https://github.com/nvim-neorg/rust-norg?rev=8e40d2443c39b4719e1c6637b93007fa64353e92#8e40d2443c39b4719e1c6637b93007fa64353e92" dependencies = [ "chumsky", - "itertools 0.13.0", + "itertools", "serde", "textwrap", "unicode_categories", @@ -2423,7 +2414,6 @@ dependencies = [ "arborium", "htmlescape", "insta", - "itertools 0.14.0", "napi", "napi-derive", "proptest", diff --git a/Cargo.toml b/Cargo.toml index c681629..22514b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,10 +20,9 @@ napi-derive = { version = "=3.5.6", features = ["type-def"] } serde = { version = "=1.0.228", features = ["derive"] } serde_json = "=1.0.150" htmlescape = "=0.3.1" -rust-norg = { git = "https://github.com/nvim-neorg/rust-norg", rev = "79673015447b62d021d57b92f80be1454fe5cf83" } +rust-norg = { git = "https://github.com/nvim-neorg/rust-norg", rev = "8e40d2443c39b4719e1c6637b93007fa64353e92" } arborium = { version = "=2.17.0", features = ["all-languages"] } textwrap = "=0.16.2" -itertools = "=0.14.0" [dev-dependencies] insta = { version = "=1.47.2", features = ["yaml"] } diff --git a/src/parser/ast_handlers/nestable.rs b/src/parser/ast_handlers/nestable.rs index 100c022..990f065 100644 --- a/src/parser/ast_handlers/nestable.rs +++ b/src/parser/ast_handlers/nestable.rs @@ -2,45 +2,55 @@ use crate::segments::convert_segments; use crate::utils::into_slug; use htmlescape::encode_minimal; use rust_norg::{DetachedModifierExtension, NorgASTFlat, TodoStatus}; +use std::fmt::Write; pub fn nestable_modifier( text: &NorgASTFlat, extensions: &[DetachedModifierExtension], + children_html: &str, ) -> Option { - match text { - NorgASTFlat::Paragraph(segments) => { - let content = convert_segments(segments); - (!content.trim().is_empty()).then(|| format_nestable(&content, extensions)) + let content = match text { + NorgASTFlat::Paragraph(segments) => convert_segments(segments), + _ => { + debug_assert!(false, "non-Paragraph text in nestable modifier"); + String::new() } - _ => None, + }; + if content.trim().is_empty() && children_html.trim().is_empty() && extensions.is_empty() { + return None; } + Some(list_item(&content, extensions, children_html)) } -fn format_nestable(content: &str, extensions: &[DetachedModifierExtension]) -> String { - let mut classes: Vec = Vec::new(); - let mut attrs: Vec = Vec::new(); - let mut prefix: Vec<&str> = Vec::new(); +fn list_item( + content: &str, + extensions: &[DetachedModifierExtension], + children_html: &str, +) -> String { + let mut classes = String::new(); + let mut attrs = String::new(); + let mut prefix = String::new(); for extension in extensions { match extension { DetachedModifierExtension::Todo(status) => { if matches!(status, TodoStatus::Recurring(_)) { - classes.push("todo-recurring".into()); + push_space_separated(&mut classes, "todo-recurring"); } - prefix.push(todo_html(status)); + push_space_separated(&mut prefix, todo_html(status)); } DetachedModifierExtension::Priority(priority) => { - classes.push(format!("priority-{}", into_slug(priority))); - attrs.push(format!(r#"data-priority="{}""#, encode_minimal(priority))); + push_space_separated(&mut classes, &format!("priority-{}", into_slug(priority))); + push_attr(&mut attrs, "data-priority", priority); } DetachedModifierExtension::Timestamp(timestamp) => { - attrs.push(format!(r#"data-timestamp="{}""#, encode_minimal(timestamp))); + push_attr(&mut attrs, "data-timestamp", timestamp); } DetachedModifierExtension::DueDate(date) => { - attrs.push(format!(r#"data-due="{}""#, encode_minimal(date))); + push_attr(&mut attrs, "data-due", date); } DetachedModifierExtension::StartDate(date) => { - attrs.push(format!(r#"data-start="{}""#, encode_minimal(date))); + push_attr(&mut attrs, "data-start", date); } } } @@ -48,20 +58,26 @@ fn format_nestable(content: &str, extensions: &[DetachedModifierExtension]) -> S let class_attr = if classes.is_empty() { String::new() } else { - format!(r#" class="{}""#, classes.join(" ")) - }; - let data_attrs = if attrs.is_empty() { - String::new() - } else { - format!(" {}", attrs.join(" ")) + format!(r#" class="{classes}""#) }; - let prefix_html = if prefix.is_empty() { - String::new() + let separator = if prefix.is_empty() || content.trim().is_empty() { + "" } else { - format!("{} ", prefix.join(" ")) + " " }; - format!("{prefix_html}{content}") + format!("{prefix}{separator}{content}{children_html}") +} + +fn push_space_separated(buf: &mut String, value: &str) { + if !buf.is_empty() { + buf.push(' '); + } + buf.push_str(value); +} + +fn push_attr(buf: &mut String, name: &str, value: &str) { + let _ = write!(buf, r#" {name}="{}""#, encode_minimal(value)); } fn todo_html(status: &TodoStatus) -> &'static str { diff --git a/src/parser/ast_handlers/verbatim.rs b/src/parser/ast_handlers/verbatim.rs index bdf2dba..9c92a19 100644 --- a/src/parser/ast_handlers/verbatim.rs +++ b/src/parser/ast_handlers/verbatim.rs @@ -1,14 +1,13 @@ use super::error::EmbedParseError; -use crate::types::{EmbedComponent, OutputMode}; +use crate::types::OutputMode; use arborium::Highlighter; use htmlescape::encode_minimal; -use itertools::Itertools; use textwrap::dedent; pub enum VerbatimTagResult { Html(String), Css(String), - Embed(EmbedComponent), + Embed { mode: String, code: String }, } pub enum VerbatimTag { @@ -40,31 +39,33 @@ impl VerbatimTag { highlighter: &mut Highlighter, embed_index: usize, ) -> Result, EmbedParseError> { + let first_param = || { + parameters + .first() + .filter(|s| !s.is_empty()) + .map(String::as_str) + }; + match self { Self::Code => { let code = dedent(content); - let lang = parameters - .first() - .filter(|s| !s.is_empty()) - .map(String::as_str) - .unwrap_or("text"); - - let highlighted = highlighter.highlight(lang, &code); - let html = match highlighted { - Ok(h) => format!( + let lang = first_param().unwrap_or("text"); + let body = match highlighter.highlight(lang, &code) { + Ok(highlighted) => format!( r#"
{}
"#, - wrap_lines(&h) + wrap_lines(&highlighted) ), Err(_) => format!( r#"
{}
"#, wrap_lines(&encode_minimal(&code)) ), }; - Ok(Some(VerbatimTagResult::Html(html))) + Ok(Some(VerbatimTagResult::Html(body))) } - Self::Image => Ok(parameters.first().filter(|s| !s.is_empty()).map(|path| { + + Self::Image => Ok(first_param().map(|path| { let src = if path.starts_with('/') || path.starts_with("http") { - path.clone() + path.to_string() } else { format!("./{path}") }; @@ -74,40 +75,11 @@ impl VerbatimTag { encode_minimal(content.trim()) )) })), - Self::Embed => { - let embed_lang = parameters - .first() - .filter(|s| !s.is_empty()) - .map(String::as_str); - match embed_lang { - Some("css") => Ok(Some(VerbatimTagResult::Css(content.to_string()))), - None => Err(EmbedParseError::MissingLanguage { index: embed_index }), - Some(lang) => { - let embed_mode = lang.parse::().map_err(|_| { - EmbedParseError::InvalidLanguage { - index: embed_index, - language: lang.to_string(), - } - })?; + Self::Embed => render_embed(first_param(), content, mode, embed_index), - match mode { - None => Ok(None), - Some(m) if m != embed_mode => Err(EmbedParseError::LanguageMismatch { - index: embed_index, - language: lang.to_string(), - mode: m, - }), - Some(_) => Ok(Some(VerbatimTagResult::Embed(EmbedComponent { - index: 0, - mode: embed_mode.to_string(), - code: content.to_string(), - }))), - } - } - } - } Self::DocumentMeta => Ok(None), + Self::Unknown => Ok(Some(VerbatimTagResult::Html(format!( r#"
{}
"#, encode_minimal(content) @@ -116,10 +88,52 @@ impl VerbatimTag { } } -/// Wraps each of highlighted HTML in `` -/// This enables per-line styling such as line numbers or highlighting specific lines +fn render_embed( + lang: Option<&str>, + content: &str, + mode: Option, + index: usize, +) -> Result, EmbedParseError> { + let Some(lang) = lang else { + return Err(EmbedParseError::MissingLanguage { index }); + }; + + if lang == "css" { + return Ok(Some(VerbatimTagResult::Css(content.to_string()))); + } + + let embed_mode = lang + .parse::() + .map_err(|_| EmbedParseError::InvalidLanguage { + index, + language: lang.to_string(), + })?; + + match mode { + None => Ok(None), + Some(m) if m != embed_mode => Err(EmbedParseError::LanguageMismatch { + index, + language: lang.to_string(), + mode: m, + }), + Some(_) => Ok(Some(VerbatimTagResult::Embed { + mode: embed_mode.to_string(), + code: content.to_string(), + })), + } +} + +/// Wraps each line of highlighted HTML in `` so consumers +/// can attach per-line styling (line numbers, highlights, etc.). fn wrap_lines(html: &str) -> String { - html.lines() - .map(|line| format!(r#"{line}"#)) - .join("\n") + let mut out = String::with_capacity(html.len() + 64); + for (i, line) in html.lines().enumerate() { + if i > 0 { + out.push('\n'); + } + out.push_str(r#""#); + out.push_str(line); + out.push_str(""); + } + out } diff --git a/src/parser/html.rs b/src/parser/html.rs index 31d0765..4cb5b3d 100644 --- a/src/parser/html.rs +++ b/src/parser/html.rs @@ -1,8 +1,12 @@ use crate::ast_handlers::*; +use crate::segments::convert_segments; use crate::types::{EmbedComponent, OutputMode}; +use crate::utils::into_slug; use arborium::Highlighter; -use itertools::Itertools; -use rust_norg::{NestableDetachedModifier, NorgAST}; +use htmlescape::encode_minimal; +use rust_norg::{ + NestableDetachedModifier, NorgAST, NorgASTFlat, ParagraphSegment, RangeableDetachedModifier, +}; struct TransformState { parts: Vec, @@ -11,13 +15,23 @@ struct TransformState { css_blocks: Vec, mode: Option, highlighter: Highlighter, + /// Ordinal of every `@embed` declaration (incl. CSS, `None`-mode, error), + /// used in error messages to match `find_embed_line`. Unlike + /// `embed_components.len()`, it counts embeds that emit no component. + embed_decls: usize, } impl TransformState { - fn push_embed(&mut self, mut embed: EmbedComponent) { - embed.index = self.embed_components.len() as u32; - self.parts.push(std::mem::take(&mut self.current_html)); - self.embed_components.push(embed); + fn new(mode: Option) -> Self { + Self { + parts: Vec::new(), + current_html: String::new(), + embed_components: Vec::new(), + css_blocks: Vec::new(), + mode, + highlighter: Highlighter::new(), + embed_decls: 0, + } } fn push_html(&mut self, html: &str) { @@ -29,14 +43,22 @@ impl TransformState { match result { VerbatimTagResult::Html(html) => self.push_html(&html), VerbatimTagResult::Css(css) => self.css_blocks.push(css), - VerbatimTagResult::Embed(embed) => self.push_embed(embed), + VerbatimTagResult::Embed { mode, code } => { + let index = self.embed_components.len() as u32; + self.parts.push(std::mem::take(&mut self.current_html)); + self.embed_components + .push(EmbedComponent { index, mode, code }); + } } } fn finalize(mut self) -> (Vec, Vec, String) { self.parts.push(self.current_html); - let embed_css = self.css_blocks.join("\n"); - (self.parts, self.embed_components, embed_css) + ( + self.parts, + self.embed_components, + self.css_blocks.join("\n"), + ) } } @@ -44,72 +66,107 @@ pub fn transform( ast: &[NorgAST], mode: Option, ) -> Result<(Vec, Vec, String), EmbedParseError> { - let mut state = TransformState { - parts: Vec::new(), - current_html: String::new(), - embed_components: Vec::new(), - css_blocks: Vec::new(), - mode, - highlighter: Highlighter::new(), - }; + let mut state = TransformState::new(mode); transform_nodes(ast, &mut state)?; Ok(state.finalize()) } fn transform_nodes(nodes: &[NorgAST], state: &mut TransformState) -> Result<(), EmbedParseError> { - for (list_type, group) in nodes - .iter() - .chunk_by(|node| match node { - NorgAST::NestableDetachedModifier { modifier_type, .. } => Some(modifier_type.clone()), - _ => None, - }) - .into_iter() - { - match list_type { - Some(modifier_type) => { - let list_items: String = group - .filter_map(|node| match node { - NorgAST::NestableDetachedModifier { - text, extensions, .. - } => nestable_modifier(text.as_ref(), extensions), - _ => None, - }) - .collect(); + for node in nodes { + transform_node(node, state)?; + } + Ok(()) +} - if !list_items.is_empty() { - let tag = match modifier_type { - NestableDetachedModifier::UnorderedList => "ul", - NestableDetachedModifier::OrderedList => "ol", - NestableDetachedModifier::Quote => "blockquote", - }; - state.push_html(&format!("<{tag}>{list_items}")); - } - } - None => { - for node in group { - transform_node(node, state)?; +fn render_children( + nodes: &[NorgAST], + state: &mut TransformState, +) -> Result { + let saved = std::mem::take(&mut state.current_html); + let parts_len = state.parts.len(); + let embeds_len = state.embed_components.len(); + let css_len = state.css_blocks.len(); + let outcome = transform_nodes(nodes, state); + let mut captured = std::mem::take(&mut state.current_html); + state.current_html = saved; + outcome?; + // rust-norg's stage_4 only allows List nodes (or CarryoverTag wrapping a + // Nestable) inside a list item's `content`, so verbatim tags cannot + // appear here. If that ever changes, `apply_verbatim` would push to + // state.parts/embed_components/css_blocks against the swapped-empty + // current_html and misalign the embed-component stream — assert it stays + // inert. + debug_assert_eq!(state.parts.len(), parts_len); + debug_assert_eq!(state.embed_components.len(), embeds_len); + debug_assert_eq!(state.css_blocks.len(), css_len); + let new_len = captured.trim_end_matches('\n').len(); + captured.truncate(new_len); + Ok(captured) +} + +fn render_list( + modifier_type: &NestableDetachedModifier, + items: &[NorgAST], + state: &mut TransformState, +) -> Result<(), EmbedParseError> { + let mut rendered = String::new(); + for node in items { + match node { + NorgAST::NestableDetachedModifier { + text, + extensions, + content, + .. + } => { + let children_html = render_children(content, state)?; + if let Some(item) = nestable_modifier(text, extensions, &children_html) { + rendered.push_str(&item); } } + _ => debug_assert!(false, "non-Nestable item inside List"), } } + + if rendered.is_empty() { + return Ok(()); + } + + let tag = match modifier_type { + NestableDetachedModifier::UnorderedList => "ul", + NestableDetachedModifier::OrderedList => "ol", + NestableDetachedModifier::Quote => "blockquote", + }; + state.push_html(&format!("<{tag}>{rendered}")); Ok(()) } fn transform_node(node: &NorgAST, state: &mut TransformState) -> Result<(), EmbedParseError> { match node { - NorgAST::NestableDetachedModifier { .. } => {} + NorgAST::List { + modifier_type, + items, + } => render_list(modifier_type, items, state)?, + NorgAST::NestableDetachedModifier { .. } => { + debug_assert!(false, "bare NestableDetachedModifier outside List"); + } NorgAST::VerbatimRangedTag { name, parameters, content, .. } => { - if let Some(result) = VerbatimTag::from(name.as_slice()).render( + let tag = VerbatimTag::from(name.as_slice()); + // Capture the ordinal before incrementing; see `embed_decls` doc. + let embed_index = state.embed_decls; + if matches!(tag, VerbatimTag::Embed) { + state.embed_decls += 1; + } + if let Some(result) = tag.render( parameters, content, state.mode, &mut state.highlighter, - state.embed_components.len(), + embed_index, )? { state.apply_verbatim(result); } @@ -120,8 +177,8 @@ fn transform_node(node: &NorgAST, state: &mut TransformState) -> Result<(), Embe content, .. } => { - let title_html = crate::segments::convert_segments(title); - let id = crate::utils::into_slug(&title_html); + let title_html = convert_segments(title); + let id = into_slug(&title_html); state.push_html(&format!("{title_html}")); transform_nodes(content, state)?; } @@ -135,12 +192,8 @@ fn transform_node(node: &NorgAST, state: &mut TransformState) -> Result<(), Embe title, content, .. - } => { - state.push_html(&rangeable_modifier(modifier_type, title, content)); - } - NorgAST::DelimitingModifier(delim) => { - state.push_html(delimiter(delim)); - } + } => state.push_html(&rangeable_modifier(modifier_type, title, content)), + NorgAST::DelimitingModifier(delim) => state.push_html(delimiter(delim)), NorgAST::CarryoverTag { .. } | NorgAST::RangedTag { .. } | NorgAST::InfirmTag { .. } => { eprintln!("Warning: unimplemented tag"); } @@ -149,41 +202,32 @@ fn transform_node(node: &NorgAST, state: &mut TransformState) -> Result<(), Embe } fn rangeable_modifier( - modifier_type: &rust_norg::RangeableDetachedModifier, - title: &[rust_norg::ParagraphSegment], - content: &[rust_norg::NorgASTFlat], + modifier_type: &RangeableDetachedModifier, + title: &[ParagraphSegment], + content: &[NorgASTFlat], ) -> String { - let title_html = crate::segments::convert_segments(title); + let title_html = convert_segments(title); let body: String = content .iter() .filter_map(|node| match node { - rust_norg::NorgASTFlat::Paragraph(segments) => { - let html = crate::segments::convert_segments(segments); - (!html.trim().is_empty()).then(|| format!("

{html}

")) - } + NorgASTFlat::Paragraph(segments) => paragraph(segments), _ => None, }) .collect(); + let title_escaped = encode_minimal(&title_html); match modifier_type { - rust_norg::RangeableDetachedModifier::Definition => format!( - "
{}
{}
", - htmlescape::encode_minimal(&title_html), - body - ), - rust_norg::RangeableDetachedModifier::Footnote => { - let id = crate::utils::into_slug(&title_html); + RangeableDetachedModifier::Definition => { + format!("
{title_escaped}
{body}
") + } + RangeableDetachedModifier::Footnote => { + let id = into_slug(&title_html); format!( - "", - htmlescape::encode_minimal(&id), - htmlescape::encode_minimal(&title_html), - body + "" ) } - rust_norg::RangeableDetachedModifier::Table => format!( - "{}
{}
", - htmlescape::encode_minimal(&title_html), - body - ), + RangeableDetachedModifier::Table => { + format!("{body}
{title_escaped}
") + } } } diff --git a/src/parser/lib.rs b/src/parser/lib.rs index 12ca67e..da6d8be 100644 --- a/src/parser/lib.rs +++ b/src/parser/lib.rs @@ -15,11 +15,11 @@ pub use utils::into_slug; use arborium::theme::builtin; use napi::bindgen_prelude::*; use napi_derive::napi; -use serde_json::Map; +use serde_json::{Map, Value}; #[napi(object)] pub struct NorgParseResult { - pub metadata: Map, + pub metadata: Map, pub html_parts: Vec, pub toc: Vec, pub embed_components: Vec, @@ -34,41 +34,55 @@ pub fn parse_norg(content: String, mode: Option) -> Result String { - let base = err.to_string(); - if let Some(line) = find_embed_line(content, err.index()) { - format!("{base}. Offending line: {line}") - } else { - base + match find_embed_line(content, err.index()) { + Some(line) => format!("{err}. Offending line: {line}"), + None => err.to_string(), } } fn find_embed_line(content: &str, index: usize) -> Option { - let mut count = 0; - for line in content.lines() { - let trimmed = line.trim_start(); - if let Some(rest) = trimmed.strip_prefix("@embed") - && (rest.is_empty() || rest.chars().next().is_none_or(|c| c.is_whitespace())) - { - if count == index { - return Some(line.to_string()); - } - count += 1; - } + content + .lines() + .filter(|line| { + line.trim_start() + .strip_prefix("@embed") + .is_some_and(|rest| rest.is_empty() || rest.starts_with(char::is_whitespace)) + }) + .nth(index) + .map(str::to_string) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn embed_error_after_css_reports_correct_number_and_line() { + // A CSS embed precedes the failing one. The error ordinal and offending + // line must point at the failing `@embed bogus` (the 2nd declaration), + // not get shifted by the CSS embed that emits no component. + let content = "@embed css\n.foo { color: red; }\n@end\n\n@embed bogus\ncontent\n@end\n"; + let ast = rust_norg::parse_tree(content).unwrap(); + let err = transform(&ast, Some(OutputMode::html)).unwrap_err(); + let msg = format_embed_error(content, &err); + + assert!(msg.contains("embed #2"), "wrong ordinal in: {msg}"); + assert!( + msg.contains("Offending line: @embed bogus"), + "wrong offending line in: {msg}" + ); } - None } #[napi] diff --git a/src/parser/metadata.rs b/src/parser/metadata.rs index 30d112b..6394ee8 100644 --- a/src/parser/metadata.rs +++ b/src/parser/metadata.rs @@ -1,3 +1,4 @@ +use crate::ast_handlers::VerbatimTag; use rust_norg::NorgAST::{self, VerbatimRangedTag}; use rust_norg::metadata::{NorgMeta, parse_metadata}; use serde_json::{Map, Value, json}; @@ -6,7 +7,11 @@ pub fn extract_metadata(ast: &[NorgAST]) -> Map { ast.iter() .find_map(|node| match node { VerbatimRangedTag { name, content, .. } - if matches!(name.as_slice(), [doc, meta] if doc == "document" && meta == "meta") => { + if matches!( + VerbatimTag::from(name.as_slice()), + VerbatimTag::DocumentMeta + ) => + { Some(content.as_str()) } _ => None, diff --git a/src/parser/segments.rs b/src/parser/segments.rs index 7b49293..9143a91 100644 --- a/src/parser/segments.rs +++ b/src/parser/segments.rs @@ -3,18 +3,18 @@ use htmlescape::encode_minimal; use rust_norg::{LinkTarget, ParagraphSegment, ParagraphSegmentToken}; pub fn convert_segments(segments: &[ParagraphSegment]) -> String { - let mut result = String::with_capacity(segments.len() * 32); + let mut out = String::with_capacity(segments.len() * 32); for segment in segments { - result.push_str(&convert_segment(segment)); + out.push_str(&convert_segment(segment)); } - result + out } pub fn convert_code_segments(segments: &[ParagraphSegment]) -> String { segments .iter() .filter_map(|segment| match segment { - ParagraphSegment::Token(token) => Some(handle_segment_token(token, encode_minimal)), + ParagraphSegment::Token(token) => Some(render_token(token)), _ => None, }) .collect() @@ -22,7 +22,7 @@ pub fn convert_code_segments(segments: &[ParagraphSegment]) -> String { fn convert_segment(segment: &ParagraphSegment) -> String { match segment { - ParagraphSegment::Token(token) => handle_segment_token(token, encode_minimal), + ParagraphSegment::Token(token) => render_token(token), ParagraphSegment::AttachedModifier { modifier_type, @@ -34,15 +34,13 @@ fn convert_segment(segment: &ParagraphSegment) -> String { description, filepath, .. - } => convert_link(targets, description.as_ref(), filepath.as_ref()), + } => convert_link(targets, description.as_deref(), filepath.as_deref()), ParagraphSegment::Anchor { content, .. } => convert_segments(content), ParagraphSegment::InlineVerbatim(tokens) => { - format!( - "{}", - encode_minimal(&tokens.iter().map(ToString::to_string).collect::()) - ) + let text: String = tokens.iter().map(ToString::to_string).collect(); + format!("{}", encode_minimal(&text)) } _ => { @@ -52,93 +50,81 @@ fn convert_segment(segment: &ParagraphSegment) -> String { } } -fn handle_segment_token(token: &ParagraphSegmentToken, encode: impl Fn(&str) -> String) -> String { +fn render_token(token: &ParagraphSegmentToken) -> String { match token { ParagraphSegmentToken::Whitespace => " ".into(), - ParagraphSegmentToken::Text(text) => encode(text), - ParagraphSegmentToken::Special(ch) => encode(&ch.to_string()), - ParagraphSegmentToken::Escape(ch) => ch.to_string(), + ParagraphSegmentToken::Text(text) => encode_minimal(text), + ParagraphSegmentToken::Special(ch) | ParagraphSegmentToken::Escape(ch) => { + let mut buf = [0u8; 4]; + encode_minimal(ch.encode_utf8(&mut buf)) + } } } fn convert_attached_modifier(modifier_type: char, content: &[ParagraphSegment]) -> String { - match modifier_type { - '`' => format!("{}", convert_code_segments(content)), - '*' => format!("{}", convert_segments(content)), - '_' => format!("{}", convert_segments(content)), - '^' => format!("{}", convert_segments(content)), - ',' => format!("{}", convert_segments(content)), - '-' => format!("{}", convert_segments(content)), - '!' => format!( - "{}", - convert_segments(content) - ), - '$' => format!("{}", convert_segments(content)), - '&' => format!("{}", convert_segments(content)), - '/' => format!("{}", convert_segments(content)), - '=' => format!("{}", convert_segments(content)), - _ => convert_segments(content), + if modifier_type == '`' { + return format!("{}", convert_code_segments(content)); } + let (open, close) = match modifier_type { + '*' => ("", ""), + '_' => ("", ""), + '^' => ("", ""), + ',' => ("", ""), + '-' => ("", ""), + '!' => (r#""#, ""), + '$' => (r#""#, ""), + '&' => ("", ""), + '/' => ("", ""), + '=' => ("", ""), + _ => return convert_segments(content), + }; + let inner = convert_segments(content); + format!("{open}{inner}{close}") +} + +/// `.norg` paths are rewritten to `.html` so links resolve in the build output. +fn norg_to_html(path: &str) -> String { + path.strip_suffix(".norg") + .map(|base| format!("{base}.html")) + .unwrap_or_else(|| path.to_string()) +} + +fn anchor(href: &str, display: &str, external: bool) -> String { + let target = if external { r#" target="_blank""# } else { "" }; + format!( + r#"{}"#, + encode_minimal(href), + encode_minimal(display) + ) } fn convert_link( targets: &[LinkTarget], - description: Option<&Vec>, - filepath: Option<&String>, + description: Option<&[ParagraphSegment]>, + filepath: Option<&str>, ) -> String { - let text = description.map(|d| convert_segments(d)); + let display = description.map(convert_segments); match targets.first() { Some(LinkTarget::Url(url)) => { - let display_text = text.as_deref().unwrap_or(url); - let href = if let Some(fp) = filepath { - fp.as_str() - } else if url.starts_with("http") { - url.as_str() - } else if let Some(base) = url.strip_suffix(".norg") { - return format!( - r#"{}"#, - encode_minimal(display_text) - ); - } else { - url.as_str() - }; - - if url.starts_with("http") && filepath.is_none() { - format!( - r#"{}"#, - encode_minimal(href), - encode_minimal(display_text) - ) - } else { - format!( - r#"{}"#, - encode_minimal(href), - encode_minimal(display_text) - ) + let display_text = display.as_deref().unwrap_or(url); + match filepath { + Some(fp) => anchor(fp, display_text, false), + None if url.starts_with("http") => anchor(url, display_text, true), + None => anchor(&norg_to_html(url), display_text, false), } } Some(LinkTarget::Heading { title, .. }) => { let title_html = convert_segments(title); let slug = into_slug(&title_html); - let display = match &text { - Some(t) => t.as_str(), - None => &title_html, - }; - format!("{display}", encode_minimal(&slug),) - } - Some(LinkTarget::Path(path)) => { - let href = path - .strip_suffix(".norg") - .map(|base| format!("{base}.html")) - .unwrap_or_else(|| path.clone()); - let display_text = text.as_deref().unwrap_or(path); - format!( - r#"{}"#, - encode_minimal(&href), - encode_minimal(display_text) - ) + let display_text = display.as_deref().unwrap_or(&title_html); + format!("{display_text}") } + Some(LinkTarget::Path(path)) => anchor( + &norg_to_html(path), + display.as_deref().unwrap_or(path), + false, + ), Some( LinkTarget::Footnote(_) | LinkTarget::Definition(_) @@ -147,20 +133,25 @@ fn convert_link( | LinkTarget::Extendable(_) | LinkTarget::Wiki(_), ) => String::new(), - None => match filepath { - Some(fp) => { - let href = fp - .strip_suffix(".norg") - .map(|base| format!("{base}.html")) - .unwrap_or_else(|| fp.clone()); - let display_text = text.as_deref().unwrap_or(fp.as_str()); - format!( - r#"{}"#, - encode_minimal(&href), - encode_minimal(display_text) - ) - } - None => String::new(), - }, + None => filepath + .map(|fp| anchor(&norg_to_html(fp), display.as_deref().unwrap_or(fp), false)) + .unwrap_or_default(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn escaped_metacharacters_are_html_escaped() { + // `\<`, `\>`, `\&` escape the modifier meaning of the char but must + // still be encoded so they render as literal text, not raw markup. + let segments = [ + ParagraphSegment::Token(ParagraphSegmentToken::Escape('<')), + ParagraphSegment::Token(ParagraphSegmentToken::Escape('&')), + ParagraphSegment::Token(ParagraphSegmentToken::Escape('>')), + ]; + assert_eq!(convert_segments(&segments), "<&>"); } } diff --git a/src/parser/toc.rs b/src/parser/toc.rs index d0cf864..b7ccc7c 100644 --- a/src/parser/toc.rs +++ b/src/parser/toc.rs @@ -5,11 +5,11 @@ use rust_norg::NorgAST; pub fn extract_toc(ast: &[NorgAST]) -> Vec { let mut toc = Vec::new(); - extract_toc_recursive(ast, &mut toc); + collect_headings(ast, &mut toc); toc } -fn extract_toc_recursive(ast: &[NorgAST], toc: &mut Vec) { +fn collect_headings(ast: &[NorgAST], toc: &mut Vec) { for node in ast { if let NorgAST::Heading { level, @@ -27,7 +27,7 @@ fn extract_toc_recursive(ast: &[NorgAST], toc: &mut Vec) { id, }); - extract_toc_recursive(content, toc); + collect_headings(content, toc); } } } diff --git a/src/parser/types.rs b/src/parser/types.rs index b3ded38..6af10b6 100644 --- a/src/parser/types.rs +++ b/src/parser/types.rs @@ -1,5 +1,6 @@ use napi_derive::napi; use serde::{Deserialize, Serialize}; +use std::{fmt, str::FromStr}; #[napi(string_enum)] #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -24,22 +25,16 @@ impl OutputMode { } } -impl std::str::FromStr for OutputMode { +impl FromStr for OutputMode { type Err = (); fn from_str(s: &str) -> Result { - match s { - "html" => Ok(Self::html), - "svelte" => Ok(Self::svelte), - "vue" => Ok(Self::vue), - "react" => Ok(Self::react), - _ => Err(()), - } + Self::ALL.into_iter().find(|m| m.as_str() == s).ok_or(()) } } -impl std::fmt::Display for OutputMode { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl fmt::Display for OutputMode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(self.as_str()) } } diff --git a/src/parser/utils.rs b/src/parser/utils.rs index fa862bd..0f7ac51 100644 --- a/src/parser/utils.rs +++ b/src/parser/utils.rs @@ -1,12 +1,22 @@ +/// Slugifies arbitrary text: lowercase alphanumerics joined by single dashes, +/// with no leading or trailing dash. Lowercases via `str::to_lowercase` so +/// context-sensitive mappings (e.g. Greek word-final Σ → ς) match the rendered +/// text; `char::to_lowercase` would emit a different codepoint here and break +/// inbound anchor links. pub fn into_slug(text: &str) -> String { - text.to_lowercase() - .chars() - .map(|c| if c.is_alphanumeric() { c } else { '-' }) - .collect::() - .split('-') - .filter(|segment| !segment.is_empty()) - .collect::>() - .join("-") + let lowered = text.to_lowercase(); + let mut slug = String::with_capacity(lowered.len()); + for c in lowered.chars() { + if c.is_alphanumeric() { + slug.push(c); + } else if !slug.is_empty() && !slug.ends_with('-') { + slug.push('-'); + } + } + if slug.ends_with('-') { + slug.pop(); + } + slug } #[cfg(test)] @@ -21,5 +31,8 @@ mod tests { assert_eq!(into_slug(""), ""); assert_eq!(into_slug("!!!"), ""); assert_eq!(into_slug("123"), "123"); + // Greek word-final sigma must lowercase to ς (U+03C2), not σ (U+03C3), + // matching `str::to_lowercase`'s context-aware mapping. + assert_eq!(into_slug("ΛΟΓΟΣ"), "λογος"); } } diff --git a/tests/fixtures/nested-lists.norg b/tests/fixtures/nested-lists.norg new file mode 100644 index 0000000..422c9aa --- /dev/null +++ b/tests/fixtures/nested-lists.norg @@ -0,0 +1,35 @@ +@document.meta +title: Nested Lists +@end + +* Nested Lists + +** Two Levels + +- Top level item +-- Sub item A +-- Sub item B +- Another top level + +** Three Levels + +- Outer +-- Middle +--- Inner +--- Inner two +-- Middle two +- Outer two + +** Mixed Markers + +- Bullet parent +~~ Numbered child A +~~ Numbered child B +- Bullet parent two + +** Ordered With Nested + +~ First +~~ Nested first +~~ Nested second +~ Second diff --git a/tests/parser/html_tests.rs b/tests/parser/html_tests.rs index 26eb420..6c1cf4e 100644 --- a/tests/parser/html_tests.rs +++ b/tests/parser/html_tests.rs @@ -10,6 +10,7 @@ use vite_plugin_norg_parser::{extract_metadata, extract_toc, transform}; #[case::images("tests/fixtures/images.norg")] #[case::links("tests/fixtures/links.norg")] #[case::embed_css("tests/fixtures/embed-css.norg")] +#[case::nested_lists("tests/fixtures/nested-lists.norg")] fn test_norg_fixture_files(#[case] fixture_path: &str) { let content = fs::read_to_string(fixture_path) .unwrap_or_else(|_| panic!("Failed to read {fixture_path}")); diff --git a/tests/parser/snapshots/parser_tests__parser__html_tests__tests__fixtures__nested-lists.norg.snap b/tests/parser/snapshots/parser_tests__parser__html_tests__tests__fixtures__nested-lists.norg.snap new file mode 100644 index 0000000..c7134ba --- /dev/null +++ b/tests/parser/snapshots/parser_tests__parser__html_tests__tests__fixtures__nested-lists.norg.snap @@ -0,0 +1,22 @@ +--- +source: tests/parser/html_tests.rs +expression: "(html, toc, metadata, embed_css)" +--- +- "

Nested Lists

\n

Two Levels

\n
  • Top level item
    • Sub item A
    • Sub item B
  • Another top level
\n

Three Levels

\n
  • Outer
    • Middle
      • Inner
      • Inner two
    • Middle two
  • Outer two
\n

Mixed Markers

\n
  • Bullet parent
\n
  1. Numbered child A
  2. Numbered child B
\n
  • Bullet parent two
\n

Ordered With Nested

\n
  1. First
    1. Nested first
    2. Nested second
  2. Second
\n" +- - level: 1 + title: Nested Lists + id: nested-lists + - level: 2 + title: Two Levels + id: two-levels + - level: 2 + title: Three Levels + id: three-levels + - level: 2 + title: Mixed Markers + id: mixed-markers + - level: 2 + title: Ordered With Nested + id: ordered-with-nested +- title: Nested Lists +- "" diff --git a/tests/plugin/__snapshots__/html.test.ts.snap b/tests/plugin/__snapshots__/html.test.ts.snap index 380c4c9..764377f 100644 --- a/tests/plugin/__snapshots__/html.test.ts.snap +++ b/tests/plugin/__snapshots__/html.test.ts.snap @@ -53,3 +53,12 @@ export const toc = [{"level":1,"title":"Link Types","id":"link-types"},{"level": export default { metadata, html, toc };" `; + +exports[`HTML Generator > generates correct output for nested-lists.norg 1`] = ` +" +export const metadata = {"title":"Nested Lists"}; +export const html = "

Nested Lists

\\n

Two Levels

\\n
  • Top level item
    • Sub item A
    • Sub item B
  • Another top level
\\n

Three Levels

\\n
  • Outer
    • Middle
      • Inner
      • Inner two
    • Middle two
  • Outer two
\\n

Mixed Markers

\\n
  • Bullet parent
\\n
  1. Numbered child A
  2. Numbered child B
\\n
  • Bullet parent two
\\n

Ordered With Nested

\\n
  1. First
    1. Nested first
    2. Nested second
  2. Second
\\n"; +export const toc = [{"level":1,"title":"Nested Lists","id":"nested-lists"},{"level":2,"title":"Two Levels","id":"two-levels"},{"level":2,"title":"Three Levels","id":"three-levels"},{"level":2,"title":"Mixed Markers","id":"mixed-markers"},{"level":2,"title":"Ordered With Nested","id":"ordered-with-nested"}]; + +export default { metadata, html, toc };" +`; diff --git a/tests/plugin/__snapshots__/react.test.ts.snap b/tests/plugin/__snapshots__/react.test.ts.snap index c0db07f..5c09ab6 100644 --- a/tests/plugin/__snapshots__/react.test.ts.snap +++ b/tests/plugin/__snapshots__/react.test.ts.snap @@ -66,3 +66,14 @@ export function Component() { } export default Component;" `; + +exports[`React Generator > generates correct output for nested-lists.norg 1`] = ` +" +export const metadata = {"title":"Nested Lists"}; +export const toc = [{"level":1,"title":"Nested Lists","id":"nested-lists"},{"level":2,"title":"Two Levels","id":"two-levels"},{"level":2,"title":"Three Levels","id":"three-levels"},{"level":2,"title":"Mixed Markers","id":"mixed-markers"},{"level":2,"title":"Ordered With Nested","id":"ordered-with-nested"}]; + +export function Component() { + return <>
Nested Lists\\n

Two Levels

\\n
  • Top level item
    • Sub item A
    • Sub item B
  • Another top level
\\n

Three Levels

\\n
  • Outer
    • Middle
      • Inner
      • Inner two
    • Middle two
  • Outer two
\\n

Mixed Markers

\\n
  • Bullet parent
\\n
  1. Numbered child A
  2. Numbered child B
\\n
  • Bullet parent two
\\n

Ordered With Nested

\\n
  1. First
    1. Nested first
    2. Nested second
  2. Second
\\n" }} />; +} +export default Component;" +`; diff --git a/tests/plugin/__snapshots__/svelte.test.ts.snap b/tests/plugin/__snapshots__/svelte.test.ts.snap index 03fb51a..6c22351 100644 --- a/tests/plugin/__snapshots__/svelte.test.ts.snap +++ b/tests/plugin/__snapshots__/svelte.test.ts.snap @@ -48,3 +48,11 @@ exports[`Svelte Generator > generates correct output for links.norg 1`] = ` {@html "

Link Types

\\n

URL Links

\\n

https://example.com Neorg GitHub

\\n

File Links

\\n

docs/readme.norg File Description

\\n

Heading Links

\\n

Main Heading Sub Heading Custom link text

\\n

Mixed Content

\\n

This paragraph has a Rust website and tutorial file.

\\n"}" `; + +exports[`Svelte Generator > generates correct output for nested-lists.norg 1`] = ` +" +{@html "

Nested Lists

\\n

Two Levels

\\n
  • Top level item
    • Sub item A
    • Sub item B
  • Another top level
\\n

Three Levels

\\n
  • Outer
    • Middle
      • Inner
      • Inner two
    • Middle two
  • Outer two
\\n

Mixed Markers

\\n
  • Bullet parent
\\n
  1. Numbered child A
  2. Numbered child B
\\n
  • Bullet parent two
\\n

Ordered With Nested

\\n
  1. First
    1. Nested first
    2. Nested second
  2. Second
\\n"}" +`; diff --git a/tests/plugin/__snapshots__/vue.test.ts.snap b/tests/plugin/__snapshots__/vue.test.ts.snap index e7d0805..18f673a 100644 --- a/tests/plugin/__snapshots__/vue.test.ts.snap +++ b/tests/plugin/__snapshots__/vue.test.ts.snap @@ -105,3 +105,19 @@ defineExpose({ metadata, toc });
" `; + +exports[`Vue Generator > generates correct output for nested-lists.norg 1`] = ` +" + + +" +`; diff --git a/tests/plugin/fixtures.ts b/tests/plugin/fixtures.ts index 1faf5e3..0a09619 100644 --- a/tests/plugin/fixtures.ts +++ b/tests/plugin/fixtures.ts @@ -12,6 +12,7 @@ export const fixtures = [ 'images.norg', 'links.norg', 'embed-css.norg', + 'nested-lists.norg', ]; export async function loadCode(