diff --git a/docs/content/5.api/1.parse.md b/docs/content/5.api/1.parse.md
index 54578f3..4a6f79e 100644
--- a/docs/content/5.api/1.parse.md
+++ b/docs/content/5.api/1.parse.md
@@ -391,6 +391,46 @@ console.log(result.meta.summary)
// ComarkNode[] with only the content before
```
+## HTML Parsing
+
+HTML tags embedded in Comark content are parsed into AST nodes by default and can be mixed freely with Comark components and markdown syntax.
+
+::code-group
+
+```typescript [parse.ts]
+const content = `
+
+ ::alert{type="info"}
+ Hello world
+ ::
+
+`
+
+const result = await parse(content)
+console.log(result.nodes)
+```
+
+```json [Output]
+[
+ ["div", { "class": "note" },
+ ["alert", { "type": "info" },
+ "Hello ",
+ ["strong", { "class": "text-red-500" }, "world"]
+ ]
+ ]
+]
+```
+
+::
+
+To disable HTML parsing and treat tags as plain text, set `html: false`:
+
+```typescript [parse.ts]
+const result = await parse(content, { html: false })
+```
+
+---
+
## Error Handling
```typescript [parse.ts]
diff --git a/docs/content/5.api/3.reference.md b/docs/content/5.api/3.reference.md
index 5237f03..84b77e5 100644
--- a/docs/content/5.api/3.reference.md
+++ b/docs/content/5.api/3.reference.md
@@ -27,7 +27,8 @@ import { parse } from 'comark'
const result = await parse(markdownContent, {
autoUnwrap: true, // Remove wrappers from single-paragraph containers
- autoClose: true // Auto-close incomplete syntax
+ autoClose: true, // Auto-close incomplete syntax
+ html: true // Parse embedded HTML tags into AST nodes (default: true)
})
// Returns: ComarkTree
@@ -168,6 +169,7 @@ interface ComarkTree {
interface ParseOptions {
autoUnwrap?: boolean // Remove unnecessary
wrappers (default: true)
autoClose?: boolean // Auto-close incomplete syntax (default: true)
+ html?: boolean // Parse embedded HTML tags into AST nodes (default: true)
plugins?: ComarkPlugin[] // Array of plugins to apply
}
```
diff --git a/packages/comark-react/src/components/ComarkRenderer.tsx b/packages/comark-react/src/components/ComarkRenderer.tsx
index ba07c47..ee87006 100644
--- a/packages/comark-react/src/components/ComarkRenderer.tsx
+++ b/packages/comark-react/src/components/ComarkRenderer.tsx
@@ -159,7 +159,7 @@ function renderNode(
// Parse special prop values (props starting with :)
for (const [propKey, value] of Object.entries(nodeProps)) {
- if (propKey === '$comark') {
+ if (propKey === '$') {
Reflect.deleteProperty(props, propKey)
}
if (propKey === 'style') {
diff --git a/packages/comark-vue/src/components/ComarkRenderer.ts b/packages/comark-vue/src/components/ComarkRenderer.ts
index 4be8124..a5630d4 100644
--- a/packages/comark-vue/src/components/ComarkRenderer.ts
+++ b/packages/comark-vue/src/components/ComarkRenderer.ts
@@ -123,7 +123,7 @@ function renderNode(
// Prepare props — use for...in instead of Object.entries() to avoid intermediate array allocation
const props: Record = {}
for (const k in nodeProps) {
- if (k === '$comark') {
+ if (k === '$') {
continue
}
if (k === 'className') {
diff --git a/packages/comark/SPEC/HTML/block+component.md b/packages/comark/SPEC/HTML/block+component.md
new file mode 100644
index 0000000..a83920c
--- /dev/null
+++ b/packages/comark/SPEC/HTML/block+component.md
@@ -0,0 +1,58 @@
+---
+timeout:
+ parse: 5ms
+ html: 5ms
+ markdown: 5ms
+---
+
+## Input
+
+```md
+
+::component
+Default Slot
+::
+
+```
+
+## AST
+
+```json
+{
+ "frontmatter": {},
+ "meta": {},
+ "nodes": [
+ [
+ "hello",
+ {
+ "$": { "html": 1, "block": 1 }
+ },
+ [
+ "component",
+ {},
+ "Default Slot"
+ ]
+ ]
+ ]
+}
+```
+
+## HTML
+
+```html
+
+
+ Default Slot
+
+
+```
+
+## Markdown
+
+```md
+
+ ::component
+ Default Slot
+ ::
+
+```
diff --git a/packages/comark/SPEC/HTML/block.md b/packages/comark/SPEC/HTML/block.md
new file mode 100644
index 0000000..161875e
--- /dev/null
+++ b/packages/comark/SPEC/HTML/block.md
@@ -0,0 +1,53 @@
+---
+timeout:
+ parse: 5ms
+ html: 5ms
+ markdown: 5ms
+---
+
+## Input
+
+```md
+
+Hello **World**
+
+```
+
+## AST
+
+```json
+{
+ "frontmatter": {},
+ "meta": {},
+ "nodes": [
+ [
+ "hello",
+ {
+ "$": { "html": 1, "block": 1 }
+ },
+ "Hello ",
+ [
+ "strong",
+ {},
+ "World"
+ ]
+ ]
+ ]
+}
+```
+
+## HTML
+
+```html
+
+ Hello World
+
+```
+
+## Markdown
+
+```md
+
+Hello **World**
+
+```
diff --git a/packages/comark/SPEC/HTML/inline.md b/packages/comark/SPEC/HTML/inline.md
new file mode 100644
index 0000000..374ccf3
--- /dev/null
+++ b/packages/comark/SPEC/HTML/inline.md
@@ -0,0 +1,53 @@
+---
+timeout:
+ parse: 5ms
+ html: 5ms
+ markdown: 5ms
+---
+
+## Input
+
+```md
+Hello **World**
+```
+
+## AST
+
+```json
+{
+ "frontmatter": {},
+ "meta": {},
+ "nodes": [
+ [
+ "p",
+ {},
+ [
+ "hello",
+ {
+ "$": { "html": 1, "block": 0 }
+ },
+ "Hello ",
+ [
+ "strong",
+ {},
+ "World"
+ ]
+ ]
+ ]
+ ]
+}
+```
+
+## HTML
+
+```html
+
+ Hello World
+
+```
+
+## Markdown
+
+```md
+Hello **World**
+```
diff --git a/packages/comark/SPEC/HTML/mix+paragraph.md b/packages/comark/SPEC/HTML/mix+paragraph.md
new file mode 100644
index 0000000..d5bb521
--- /dev/null
+++ b/packages/comark/SPEC/HTML/mix+paragraph.md
@@ -0,0 +1,62 @@
+---
+timeout:
+ parse: 5ms
+ html: 5ms
+ markdown: 5ms
+---
+
+## Input
+
+```md
+
+Hello **World**
+
+Another Pragraph
+```
+
+## AST
+
+```json
+{
+ "frontmatter": {},
+ "meta": {},
+ "nodes": [
+ [
+ "hello",
+ {
+ "$": { "html": 1, "block": 1 }
+ },
+ "Hello ",
+ [
+ "strong",
+ {},
+ "World"
+ ]
+ ],
+ [
+ "p",
+ {},
+ "Another Pragraph"
+ ]
+ ]
+}
+```
+
+## HTML
+
+```html
+
+ Hello World
+
+Another Pragraph
+```
+
+## Markdown
+
+```md
+
+Hello **World**
+
+
+Another Pragraph
+```
diff --git a/packages/comark/SPEC/HTML/mix.md b/packages/comark/SPEC/HTML/mix.md
new file mode 100644
index 0000000..47690f0
--- /dev/null
+++ b/packages/comark/SPEC/HTML/mix.md
@@ -0,0 +1,132 @@
+---
+timeout:
+ parse: 5ms
+ html: 5ms
+ markdown: 5ms
+---
+
+## Input
+
+```md
+
+ ::comp2
+ #title
+ This is the title of `comp2` component
+
+ #default
+ In this paragraph , we [mix html and _markdonw _]
+ ::
+
+```
+
+## AST
+
+```json
+{
+ "frontmatter": {},
+ "meta": {},
+ "nodes": [
+ [
+ "comp1",
+ { "$": { "html": 1, "block": 1 } },
+ [
+ "comp2",
+ {},
+ [
+ "template",
+ { "name": "title" },
+ "This is the title of ",
+ [
+ "code",
+ {},
+ "comp2"
+ ],
+ " component"
+ ],
+ [
+ "template",
+ {
+ "name": "default"
+ },
+ "In this ",
+ [
+ "strong",
+ {
+ "$": {
+ "block": 0,
+ "html": 1
+ },
+ "class": "text-red-500"
+ },
+ "paragraph"
+ ],
+ ", we ",
+ [
+ "span",
+ {},
+ "mix ",
+ [
+ "sub",
+ {
+ "$": {
+ "block": 0,
+ "html": 1
+ }
+ },
+ "html"
+ ],
+ " and ",
+ [
+ "em",
+ {},
+ [
+ "sub",
+ {
+ "$": {
+ "block": 0,
+ "html": 1
+ }
+ },
+ "markdonw"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+}
+```
+
+## HTML
+
+```html
+
+
+
+ This is the title of comp2 component
+
+
+ In this paragraph , we
+ mix html and
+ markdonw
+
+
+
+
+
+```
+
+## Markdown
+
+```md
+
+ ::comp2
+ #title
+ This is the title of `comp2` component
+
+ #default
+ In this paragraph , we [mix html and *markdonw *]
+ ::
+
+```
diff --git a/packages/comark/package.json b/packages/comark/package.json
index 7b99774..976a096 100644
--- a/packages/comark/package.json
+++ b/packages/comark/package.json
@@ -71,6 +71,7 @@
"@comark/markdown-it": "^0.3.2",
"entities": "^4.5.0",
"js-yaml": "^4.1.1",
+ "htmlparser2": "^9.0.0",
"markdown-exit": "1.0.0-beta.9"
}
}
diff --git a/packages/comark/src/ast/types.ts b/packages/comark/src/ast/types.ts
index 406bcac..fc32a86 100644
--- a/packages/comark/src/ast/types.ts
+++ b/packages/comark/src/ast/types.ts
@@ -5,7 +5,11 @@ export type ComarkComment = [null, {}, string]
export type ComarkElementAttributes = {
[key: string]: unknown
- $comark?: { line?: number }
+ $?: {
+ line?: number
+ html?: 0 | 1
+ block?: 0 | 1
+ }
}
export type ComarkElement = [string, ComarkElementAttributes, ...ComarkNode[]]
diff --git a/packages/comark/src/index.ts b/packages/comark/src/index.ts
index ab0e457..8bdcac0 100644
--- a/packages/comark/src/index.ts
+++ b/packages/comark/src/index.ts
@@ -9,6 +9,8 @@ import { marmdownItTokensToComarkTree } from './internal/parse/token-processor'
import { autoCloseMarkdown } from './internal/parse/auto-close/index'
import { parseFrontmatter } from './internal/front-matter'
import { extractReusableNodes } from './internal/parse/incremental'
+import html_block from './internal/parse/html/html_block_rule'
+import html_inline from './internal/parse/html/html_inline_rule'
// Re-export ComarkTree and ComarkNode for convenience
export type { ComarkTree, ComarkNode } from 'comark/ast'
@@ -40,6 +42,15 @@ export type * from './types'
* const tree = await parse('# Hello **World**\n::alert\nhi\n::')
* console.log(tree.nodes)
* // → [ ['h1', { id: 'hello-world' }, 'Hello ', ['strong', {}, 'World'] ], ['alert', {}, 'hi'] ]
+ *
+ * // Enable HTML parsing (on by default) — HTML tags are included in the AST
+ * const parseWithHtml = createParse({ html: true })
+ * const tree2 = await parseWithHtml('Hello _world_')
+ * console.log(tree2.nodes)
+ * // → [ ['strong', { class: 'bold' }, 'Hello'], ' ', ['em', {}, 'world'] ]
+ *
+ * // Disable HTML parsing — HTML tags are treated as plain text
+ * const parseNoHtml = createParse({ html: false })
* ```
*/
export function createParse(options: ParseOptions = {}): ComarkParseFn {
@@ -49,12 +60,19 @@ export function createParse(options: ParseOptions = {}): ComarkParseFn {
plugins.unshift(alert())
const parser = new MarkdownExit({
- html: true,
+ html: false,
linkify: true,
})
.enable(['table', 'strikethrough'])
.use(pluginMdc)
+ if (options.html !== false) {
+ parser.inline.ruler.before('text', 'comark_html_inline', html_inline)
+ parser.block.ruler.before('html_block', 'comark_html_block', html_block, {
+ alt: ['paragraph', 'reference', 'blockquote'],
+ })
+ }
+
for (const plugin of plugins) {
for (const markdownItPlugin of (plugin.markdownItPlugins || [])) {
parser.use(markdownItPlugin as unknown as MarkdownExitPlugin)
diff --git a/packages/comark/src/internal/parse/html/html_block_rule.ts b/packages/comark/src/internal/parse/html/html_block_rule.ts
new file mode 100644
index 0000000..f63b036
--- /dev/null
+++ b/packages/comark/src/internal/parse/html/html_block_rule.ts
@@ -0,0 +1,76 @@
+// BASED ON https://github.com/serkodev/markdown-exit/blob/fe1351070a5841426223ab4a0a5c7874ba2b1257/packages/markdown-exit/src/parser/block/rules/html_block.ts
+
+import type { StateBlock } from 'markdown-exit'
+import block_names from './html_blocks'
+import { HTML_OPEN_CLOSE_TAG_RE } from './html_re'
+
+// An array of opening and corresponding closing sequences for html tags,
+// last argument defines whether it can terminate a paragraph or not
+//
+const HTML_SEQUENCES: [RegExp, RegExp, boolean][] = [
+ [new RegExp(`${HTML_OPEN_CLOSE_TAG_RE.source}\\s*$`), /^<\/[^>]+>$/, true],
+ [/^<(script|pre|style|textarea)(?=(\s|>|$))/i, /<\/(script|pre|style|textarea)>/i, true],
+ [/^/, true],
+ [/^<\?/, /\?>/, true],
+ [/^/, true],
+ [/^/, true],
+ [new RegExp(`^?(${block_names.join('|')})(?=(\\s|/?>|$))`, 'i'), /^$/, true],
+ [new RegExp(`${HTML_OPEN_CLOSE_TAG_RE.source}\\s*$`), /^$/, false],
+]
+
+export default function html_block(state: StateBlock, startLine: number, endLine: number, silent: boolean) {
+ let pos = state.bMarks[startLine] + state.tShift[startLine]
+ let max = state.eMarks[startLine]
+
+ // if it's indented more than 3 spaces, it should be a code block
+ if (state.sCount[startLine] - state.blkIndent >= 4)
+ return false
+
+ if (state.src.charCodeAt(pos) !== 0x3C/* < */)
+ return false
+
+ let lineText = state.src.slice(pos, max)
+
+ let i = 0
+ for (; i < HTML_SEQUENCES.length; i++) {
+ if (HTML_SEQUENCES[i][0].test(lineText))
+ break
+ }
+
+ if (i === HTML_SEQUENCES.length)
+ return false
+
+ if (silent) {
+ // true if this sequence can be a terminator, false otherwise
+ return HTML_SEQUENCES[i][2]
+ }
+
+ let nextLine = startLine + 1
+
+ // If we are here - we detected HTML block.
+ // Let's roll down till block end.
+ if (i !== 0 && !HTML_SEQUENCES[i][1].test(lineText)) {
+ for (; nextLine < endLine; nextLine++) {
+ if (state.sCount[nextLine] < state.blkIndent) {
+ break
+ }
+
+ pos = state.bMarks[nextLine] + state.tShift[nextLine]
+ max = state.eMarks[nextLine]
+ lineText = state.src.slice(pos, max)
+
+ if (HTML_SEQUENCES[i][1].test(lineText)) {
+ if (lineText.length !== 0)
+ nextLine++
+ break
+ }
+ }
+ }
+ state.line = nextLine
+
+ const token = lineText.startsWith('') ? state.push('html_block_close', '', -1) : state.push('html_block', '', 1)
+ token.map = [startLine, nextLine]
+ token.content = state.getLines(startLine, nextLine, state.blkIndent, true)
+
+ return true
+}
diff --git a/packages/comark/src/internal/parse/html/html_blocks.ts b/packages/comark/src/internal/parse/html/html_blocks.ts
new file mode 100644
index 0000000..78f605d
--- /dev/null
+++ b/packages/comark/src/internal/parse/html/html_blocks.ts
@@ -0,0 +1,67 @@
+// List of valid html blocks names, according to commonmark spec
+// https://spec.commonmark.org/0.30/#html-blocks
+
+export default [
+ 'address',
+ 'article',
+ 'aside',
+ 'base',
+ 'basefont',
+ 'blockquote',
+ 'body',
+ 'caption',
+ 'center',
+ 'col',
+ 'colgroup',
+ 'dd',
+ 'details',
+ 'dialog',
+ 'dir',
+ 'div',
+ 'dl',
+ 'dt',
+ 'fieldset',
+ 'figcaption',
+ 'figure',
+ 'footer',
+ 'form',
+ 'frame',
+ 'frameset',
+ 'h1',
+ 'h2',
+ 'h3',
+ 'h4',
+ 'h5',
+ 'h6',
+ 'head',
+ 'header',
+ 'hr',
+ 'html',
+ 'iframe',
+ 'legend',
+ 'li',
+ 'link',
+ 'main',
+ 'menu',
+ 'menuitem',
+ 'nav',
+ 'noframes',
+ 'ol',
+ 'optgroup',
+ 'option',
+ 'p',
+ 'param',
+ 'search',
+ 'section',
+ 'summary',
+ 'table',
+ 'tbody',
+ 'td',
+ 'tfoot',
+ 'th',
+ 'thead',
+ 'title',
+ 'tr',
+ 'track',
+ 'ul',
+]
diff --git a/packages/comark/src/internal/parse/html/html_inline_rule.ts b/packages/comark/src/internal/parse/html/html_inline_rule.ts
new file mode 100644
index 0000000..6cee757
--- /dev/null
+++ b/packages/comark/src/internal/parse/html/html_inline_rule.ts
@@ -0,0 +1,52 @@
+// BASED ON https://github.com/serkodev/markdown-exit/blob/fe1351070a5841426223ab4a0a5c7874ba2b1257/packages/markdown-exit/src/parser/inline/rules/html_inline.ts
+
+import type { StateInline } from 'markdown-exit'
+import { HTML_TAG_RE } from './html_re'
+
+function isLinkOpen(str: string) {
+ return /^\s]/i.test(str)
+}
+function isLinkClose(str: string) {
+ return /^<\/a\s*>/i.test(str)
+}
+
+function isLetter(ch: number) {
+ /* eslint no-bitwise:0 */
+ const lc = ch | 0x20 // to lower case
+ return (lc >= 0x61/* a */) && (lc <= 0x7A/* z */)
+}
+
+export default function html_inline(state: StateInline, silent: boolean) {
+ // Check start
+ const max = state.posMax
+ const pos = state.pos
+ if (state.src.charCodeAt(pos) !== 0x3C
+ ||/* < */ pos + 2 >= max) {
+ return false
+ }
+
+ // Quick fail on second char
+ const ch = state.src.charCodeAt(pos + 1)
+ if (ch !== 0x21
+ &&/* ! */ ch !== 0x3F
+ &&/* ? */ ch !== 0x2F
+ &&/* / */ !isLetter(ch)) {
+ return false
+ }
+
+ const match = state.src.slice(pos).match(HTML_TAG_RE)
+ if (!match)
+ return false
+
+ if (!silent) {
+ const token = state.push('html_inline', '', 0)
+ token.content = match[0]
+
+ if (isLinkOpen(token.content))
+ state.linkLevel++
+ if (isLinkClose(token.content))
+ state.linkLevel--
+ }
+ state.pos += match[0].length
+ return true
+}
diff --git a/packages/comark/src/internal/parse/html/html_re.ts b/packages/comark/src/internal/parse/html/html_re.ts
new file mode 100644
index 0000000..cb87544
--- /dev/null
+++ b/packages/comark/src/internal/parse/html/html_re.ts
@@ -0,0 +1,27 @@
+// Regexps to match html elements
+
+const attr_name = '[a-zA-Z_:][a-zA-Z0-9:._-]*'
+
+const unquoted = '[^"\'=<>`\\x00-\\x20]+'
+const single_quoted = '\'[^\']*\''
+const double_quoted = '"[^"]*"'
+
+const attr_value = `(?:${unquoted}|${single_quoted}|${double_quoted})`
+
+const attribute = `(?:\\s+${attr_name}(?:\\s*=\\s*${attr_value})?)`
+
+const open_tag = `<[A-Za-z][A-Za-z0-9\\-]*${attribute}*\\s*\\/?>`
+
+const close_tag = '<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>'
+const comment = ''
+const processing = '<\\?[\\s\\S]*?\\?>'
+const declaration = ']*>'
+const cdata = ''
+
+// eslint-disable-next-line regexp/no-super-linear-backtracking, regexp/prefer-w
+const HTML_TAG_RE = new RegExp(`^(?:${open_tag}|${close_tag}|${comment}|${processing}|${declaration}|${cdata})`)
+
+// eslint-disable-next-line regexp/use-ignore-case, regexp/no-super-linear-backtracking, regexp/prefer-w
+const HTML_OPEN_CLOSE_TAG_RE = new RegExp(`^(?:${open_tag}|${close_tag})`)
+
+export { HTML_OPEN_CLOSE_TAG_RE, HTML_TAG_RE }
diff --git a/packages/comark/src/internal/parse/html/index.ts b/packages/comark/src/internal/parse/html/index.ts
new file mode 100644
index 0000000..44d24ad
--- /dev/null
+++ b/packages/comark/src/internal/parse/html/index.ts
@@ -0,0 +1,141 @@
+import { Parser } from 'htmlparser2'
+import type { ComarkNode } from '../../../ast'
+
+const VOID_ELEMENTS = new Set([
+ 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
+ 'link', 'meta', 'param', 'source', 'track', 'wbr',
+])
+
+function attribsToComarkAttrs(attribs: Record, isInline: boolean = false): Record {
+ const attrs: Record = {
+ $: {
+ html: 1,
+ block: isInline ? 0 : 1,
+ },
+ }
+ for (const key in attribs) {
+ const value = attribs[key]
+ if (value === '') {
+ attrs[`:${key}`] = 'true'
+ }
+ else {
+ attrs[key] = value
+ }
+ }
+ return attrs
+}
+
+interface HtmlTagInfo {
+ tag: string
+ attrs: Record
+ isVoid: boolean
+ isClose: boolean
+}
+
+/**
+ * Parse a single inline HTML tag fragment (opening, closing, or void).
+ * Returns null if the content is not a recognisable HTML tag.
+ */
+export function parseInlineHtmlTag(html: string): HtmlTagInfo | null {
+ const trimmed = html.trim()
+ if (!trimmed.startsWith('<')) return null
+
+ // Fast path: closing tag
+ const closeMatch = trimmed.match(/^<\/([a-z][a-z0-9]*)\s*>/i)
+ if (closeMatch) {
+ return { tag: closeMatch[1].toLowerCase(), attrs: {}, isVoid: false, isClose: true }
+ }
+
+ let info: HtmlTagInfo | null = null
+ const parser = new Parser({
+ onopentag(name, attribs) {
+ info = {
+ tag: name,
+ attrs: attribsToComarkAttrs(attribs, true),
+ isVoid: VOID_ELEMENTS.has(name),
+ isClose: false,
+ }
+ },
+ }, { decodeEntities: false })
+
+ parser.write(trimmed)
+ parser.end()
+ return info
+}
+
+/**
+ * Parse a full HTML string into ComarkNodes using htmlparser2.
+ * Handles nested elements, text, void elements, and comments.
+ */
+export function htmlToComarkNodes(html: string): ComarkNode[] {
+ const root: ComarkNode[] = []
+ const stack: { tag: string, attrs: Record, children: ComarkNode[] }[] = []
+
+ const parser = new Parser({
+ onopentag(name, attribs) {
+ const attrs = attribsToComarkAttrs(attribs)
+ if (VOID_ELEMENTS.has(name)) {
+ const node = [name, attrs] as ComarkNode
+ if (stack.length > 0) {
+ stack[stack.length - 1].children.push(node)
+ }
+ else {
+ root.push(node)
+ }
+ return
+ }
+ stack.push({ tag: name, attrs, children: [] })
+ },
+
+ ontext(text) {
+ const trimmed = text.trim()
+ if (!trimmed) return
+ if (stack.length > 0) {
+ stack[stack.length - 1].children.push(trimmed)
+ }
+ else {
+ root.push(trimmed)
+ }
+ },
+
+ onclosetag(name) {
+ if (VOID_ELEMENTS.has(name)) {
+ return
+ }
+ // Find matching frame (handles mismatched tags gracefully)
+ let idx = stack.length - 1
+ while (idx >= 0 && stack[idx].tag !== name) {
+ idx--
+ }
+ if (idx >= 0) {
+ while (stack.length > idx) {
+ const frame = stack.pop()!
+ const node = frame.children.length > 0
+ ? [frame.tag, frame.attrs, ...frame.children] as ComarkNode
+ : [frame.tag, frame.attrs] as ComarkNode
+ if (stack.length > 0) {
+ stack[stack.length - 1].children.push(node)
+ }
+ else {
+ root.push(node)
+ }
+ }
+ }
+ },
+
+ oncomment(data) {
+ const node = [null, {}, data] as unknown as ComarkNode
+ if (stack.length > 0) {
+ stack[stack.length - 1].children.push(node)
+ }
+ else {
+ root.push(node)
+ }
+ },
+ }, { decodeEntities: true })
+
+ parser.write(html.trim())
+ parser.end()
+
+ return root
+}
diff --git a/packages/comark/src/internal/parse/incremental.ts b/packages/comark/src/internal/parse/incremental.ts
index 98dfc5b..218027a 100644
--- a/packages/comark/src/internal/parse/incremental.ts
+++ b/packages/comark/src/internal/parse/incremental.ts
@@ -12,7 +12,7 @@ export function extractReusableNodes(markdown: string, lastOutput: ComarkTree) {
let lastNodeIgnored = false
while (i >= 0) {
const node = lastOutput.nodes[i] as ComarkElement
- if (node[1] && node[1].$comark?.line) {
+ if (node[1] && node[1].$?.line) {
if (lastNodeIgnored) {
lastValidNodeIndex = i
break
@@ -25,7 +25,7 @@ export function extractReusableNodes(markdown: string, lastOutput: ComarkTree) {
}
const lastNode = lastValidNodeIndex !== -1 ? lastOutput.nodes[lastValidNodeIndex] : null
if (lastNode) {
- const remainingMarkdownStartLine = (lastNode[1] as ComarkElementAttributes).$comark?.line ?? 0
+ const remainingMarkdownStartLine = (lastNode[1] as ComarkElementAttributes).$?.line ?? 0
return {
remainingMarkdownStartLine,
reusedNodes: lastOutput.nodes.slice(0, lastValidNodeIndex + 1),
diff --git a/packages/comark/src/internal/parse/token-processor.ts b/packages/comark/src/internal/parse/token-processor.ts
index 0b6d7a0..fa6fccc 100644
--- a/packages/comark/src/internal/parse/token-processor.ts
+++ b/packages/comark/src/internal/parse/token-processor.ts
@@ -1,4 +1,5 @@
-import type { ComarkNode } from 'comark/ast'
+import type { ComarkElementAttributes, ComarkNode } from 'comark/ast'
+import { htmlToComarkNodes, parseInlineHtmlTag } from './html'
// Mapping from token types to tag names
const BLOCK_TAG_MAP: Record = {
@@ -22,10 +23,10 @@ const INLINE_TAG_MAP: Record = {
sub_open: 'del',
}
+// ─── main entry point ───────────────────────────────────────────────────────
+
/**
* Convert Markdown-It tokens to a Comark tree
- * @param tokens - The tokens to convert
- * @returns The Comark tree
*/
export function marmdownItTokensToComarkTree(tokens: any[], options: { startLine: number, preservePositions: boolean } = { startLine: 0, preservePositions: false }): ComarkNode[] {
const nodes: ComarkNode[] = []
@@ -36,16 +37,15 @@ export function marmdownItTokensToComarkTree(tokens: any[], options: { startLine
const result = processBlockToken(tokens, i, false)
if (result.node) {
if (options.preservePositions) {
- // find end line of node from token.map
for (let j = i; j < result.nextIndex; j++) {
if (tokens[j].map && tokens[j].map[1]) {
endLine = (tokens[j].map[1] as number) + options.startLine
}
}
- ;(result.node[1] as Record).$comark = {
- ...((result.node[1] as Record).$comark || {}),
- line: endLine,
+ if (!(result.node[1] as Record).$) {
+ (result.node[1] as Record).$ = {}
}
+ ;((result.node[1] as Record).$ as Record).line = endLine
}
nodes.push(result.node)
}
@@ -122,48 +122,6 @@ function processAttributes(
return attrs
}
-/**
- * Parse HTML inline content to extract tag and attributes
- * Example: ' '
- * Returns: { tag: 'input', attrs: { class: 'foo', checked: true, disabled: true, type: 'checkbox' } }
- */
-function parseHtmlInline(html: string): { tag: string, attrs: Record, selfClosing: boolean } | null {
- // Match opening or self-closing tags
- // Use \s[^>]* to ensure attributes start with whitespace, preventing overlap with tag name
- const tagMatch = html.match(/^<(\w+)(\s[^>]*)?(\/?)>/)
- if (!tagMatch) {
- return null
- }
-
- const tag = tagMatch[1]
- const attrsString = tagMatch[2]
- const selfClosing = tagMatch[3] === '/' || tag === 'input' || tag === 'br' || tag === 'img' || tag === 'hr'
-
- const attrs: Record = {}
-
- // Parse attributes from the string
- // Match: attr="value" or attr='' or attr (boolean)
- const attrRegex = /(\w+)(?:="([^"]*)"|='([^']*)'|=(\S+)|(?=\s|$))/g
- let match
-
- while ((match = attrRegex.exec(attrsString)) !== null) {
- const attrName = match[1]
- // Get value from whichever capture group matched (quotes or unquoted)
- const attrValue = match[2] !== undefined ? match[2] : (match[3] !== undefined ? match[3] : (match[4] || ''))
-
- // Handle boolean attributes - if value is empty string, it's a boolean true
- if (attrValue === '') {
- attrs[`:${attrName}`] = 'true'
- }
- else {
- // Regular attribute
- attrs[attrName] = attrValue
- }
- }
-
- return { tag, attrs, selfClosing }
-}
-
/**
* Parse codeblock info string to extract language, highlights, filename, and meta
* Example: "javascript {1-3} [filename.ts] meta=value"
@@ -272,10 +230,6 @@ function parseCodeblockInfo(info: string): {
/**
* Extract Comark attributes from mdc_inline_props token
- * @param tokens - Array of tokens
- * @param startIndex - Index to start searching from (after the element token)
- * @param skipEmptyText - Whether to skip empty text tokens before props token
- * @returns Object with attrs and nextIndex
*/
function extractAttributes(
tokens: any[],
@@ -308,10 +262,24 @@ function processBlockToken(tokens: any[], startIndex: number, insideNestedContex
return { node: ['hr', {}] as ComarkNode, nextIndex: startIndex + 1 }
}
+ // html_block is now handled upstream (in marmdownItTokensToComarkTree /
+ // processBlockChildren / processBlockChildrenWithSlots) before reaching here.
+ // This branch is kept as a safety fallback.
if (token.type === 'html_block') {
- if (token.content.startsWith('') ? content.slice(4, -3) : content.slice(4)
+ return { node: [null, {}, inner] as unknown as ComarkNode, nextIndex: startIndex + 1 }
+ }
+
+ const children = processBlockChildren(tokens, startIndex + 1, 'html_block_close', false, false, false)
+ const [node1] = htmlToComarkNodes(content)
+ if (!node1) {
+ return { node: null, nextIndex: startIndex + 1 }
}
+ const node = [node1[0]!, node1[1]! as ComarkElementAttributes, ...children.nodes] as ComarkNode
+
+ return { node, nextIndex: children.nextIndex + 1 }
}
// Handle Comark block components (e.g., ::component ... ::)
@@ -494,6 +462,19 @@ function processBlockChildrenWithSlots(
while (i < tokens.length && tokens[i].type !== closeType) {
const token = tokens[i]
+ // html_block can produce multiple nodes — handle before processBlockToken
+ if (token.type === 'html_block') {
+ const htmlNodes = htmlToComarkNodes(token.content)
+ if (currentSlotName !== null) {
+ currentSlotChildren.push(...htmlNodes)
+ }
+ else {
+ nodes.push(...htmlNodes)
+ }
+ i++
+ continue
+ }
+
// Check for slot marker: #slotname creates mdc_block_slot tokens
if (token.type === 'mdc_block_slot') {
// Extract slot name from token.attrs
@@ -562,6 +543,13 @@ function processBlockChildren(
while (i < tokens.length && tokens[i].type !== closeType) {
const token = tokens[i]
+ // html_block can produce multiple nodes — handle before processBlockToken
+ if (token.type === 'html_block') {
+ nodes.push(...htmlToComarkNodes(token.content))
+ i++
+ continue
+ }
+
if (token.type === 'inline') {
const inlineNodes = processInlineTokens(token.children || [], inHeading)
nodes.push(...inlineNodes)
@@ -670,7 +658,7 @@ function slugify(text: string): string {
return slug
}
-function processInlineTokens(tokens: any[], inHeading: boolean = false): ComarkNode[] {
+export function processInlineTokens(tokens: any[], inHeading: boolean = false): ComarkNode[] {
const nodes: ComarkNode[] = []
let i = 0
@@ -709,15 +697,50 @@ function processInlineToken(tokens: any[], startIndex: number, inHeading: boolea
return { node: token.content || null, nextIndex: startIndex + 1 }
}
- // Handle html_inline tokens (e.g., task list checkboxes)
+ // Handle html_inline tokens using htmlparser2
if (token.type === 'html_inline') {
- const parsed = parseHtmlInline(token.content || '')
- if (parsed && parsed.selfClosing) {
- // Self-closing tags like , ,
- return { node: [parsed.tag, parsed.attrs] as ComarkNode, nextIndex: startIndex + 1 }
+ const content = token.content || ''
+ const tagInfo = parseInlineHtmlTag(content)
+
+ if (!tagInfo) {
+ // Not a recognisable tag — return as raw text
+ return { node: content || null, nextIndex: startIndex + 1 }
}
- // For non-self-closing HTML or unparseable HTML, return as text
- return { node: token.content || null, nextIndex: startIndex + 1 }
+
+ if (tagInfo.isClose) {
+ // Orphaned closing tag — skip (handled by the opener's lookahead)
+ return { node: null, nextIndex: startIndex + 1 }
+ }
+
+ if (tagInfo.isVoid) {
+ // Self-closing void element: , , , …
+ return { node: [tagInfo.tag, tagInfo.attrs] as ComarkNode, nextIndex: startIndex + 1 }
+ }
+
+ // Non-void opening tag — look ahead for the matching closing tag
+ const children: ComarkNode[] = []
+ let j = startIndex + 1
+
+ while (j < tokens.length) {
+ const nextToken = tokens[j]
+ if (nextToken.type === 'html_inline') {
+ const nextInfo = parseInlineHtmlTag(nextToken.content || '')
+ if (nextInfo?.isClose && nextInfo.tag === tagInfo.tag) {
+ j++ // consume the closing tag
+ break
+ }
+ }
+ const result = processInlineToken(tokens, j, inHeading)
+ j = result.nextIndex
+ if (result.node) {
+ children.push(result.node as ComarkNode)
+ }
+ }
+
+ const node = children.length > 0
+ ? [tagInfo.tag, tagInfo.attrs, ...children] as ComarkNode
+ : [tagInfo.tag, tagInfo.attrs] as ComarkNode
+ return { node, nextIndex: j }
}
// Handle Comark inline span (e.g., [text]{attr})
diff --git a/packages/comark/src/internal/stringify/attributes.ts b/packages/comark/src/internal/stringify/attributes.ts
index e068355..342e8a3 100644
--- a/packages/comark/src/internal/stringify/attributes.ts
+++ b/packages/comark/src/internal/stringify/attributes.ts
@@ -25,7 +25,6 @@ export function comarkAttributes(attributes: Record) {
return `${key}="${value}"`
})
-
.join(' ')
return attrs.length > 0 ? `{${attrs}}` : ''
diff --git a/packages/comark/src/internal/stringify/handlers/html.ts b/packages/comark/src/internal/stringify/handlers/html.ts
index 8150e8f..f29f490 100644
--- a/packages/comark/src/internal/stringify/handlers/html.ts
+++ b/packages/comark/src/internal/stringify/handlers/html.ts
@@ -9,12 +9,13 @@ const inlineTags = new Set(['strong', 'em', 'code', 'a', 'br', 'span', 'img'])
const blockTags = new Set(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'ul', 'ol', 'blockquote', 'hr', 'table', 'td', 'th'])
export function html(node: ComarkElement, state: State, parent?: ComarkElement) {
- const [tag, attributes, ...children] = node
+ const [tag, attr, ...children] = node
+ const { $ = {}, ...attributes } = attr
const hasOnlyTextChildren = children.every(child => typeof child === 'string' || inlineTags.has(String(child?.[0])))
const hasTextSibling = children.some(child => typeof child === 'string')
const isBlock = textBlocks.has(String(tag))
- const isInline = inlineTags.has(String(tag))
+ const isInline = inlineTags.has(String(tag)) && $.block === 0
let oneLiner = isBlock && hasOnlyTextChildren
@@ -30,10 +31,14 @@ export function html(node: ComarkElement, state: State, parent?: ComarkElement)
oneLiner = true
}
+ if ($.block === 0) {
+ oneLiner = true
+ }
+
const isSelfClose = selfCloseTags.has(String(tag))
// Do not modify context if we are already in html mode
- const revert = state.applyContext({ html: true, inline: oneLiner })
+ const revert = state.applyContext({ inline: oneLiner })
const childrenContent = children.map(child => state.one(child, state, node))
@@ -69,7 +74,7 @@ export function html(node: ComarkElement, state: State, parent?: ComarkElement)
}
if (!oneLiner && content) {
- content = '\n' + paddNoneHtmlContent(content, state) + '\n'
+ content = '\n' + paddNoneHtmlContent(content, state).trimEnd() + '\n'
}
return `<${tag}${attrs}>${content}${tag}>`
@@ -82,8 +87,8 @@ function paddNoneHtmlContent(content: string, state: State) {
}
return (
- (content.trim().startsWith('<') ? '' : '\n')
+ (content.trim().startsWith('<') ? '' : '')
+ content
- + (content.trim().endsWith('>') ? '' : '\n')
+ + (content.trim().endsWith('>') ? '' : '')
)
}
diff --git a/packages/comark/src/internal/stringify/handlers/mdc.ts b/packages/comark/src/internal/stringify/handlers/mdc.ts
index c6d21f7..5e4d1cc 100644
--- a/packages/comark/src/internal/stringify/handlers/mdc.ts
+++ b/packages/comark/src/internal/stringify/handlers/mdc.ts
@@ -8,7 +8,8 @@ import { html } from './html'
const INLINE_HTML_ELEMENTS = new Set(['a', 'strong', 'em', 'span'])
export function mdc(node: ComarkElement, state: State, parent?: ComarkElement) {
- const [tag, attributes, ...children] = node
+ const [tag, attr, ...children] = node
+ const { $, ...attributes } = attr
if (tag === 'table') {
return html(node, state)
diff --git a/packages/comark/src/internal/stringify/state.ts b/packages/comark/src/internal/stringify/state.ts
index ec1cd50..6e0fd3b 100644
--- a/packages/comark/src/internal/stringify/state.ts
+++ b/packages/comark/src/internal/stringify/state.ts
@@ -26,7 +26,7 @@ export function one(node: ComarkNode, state: State, parent?: ComarkElement) {
return userHandler(node, state, parent)
}
- if (state.context.html) {
+ if (state.context.html || node[1].$?.html === 1) {
return state.handlers.html(node, state, parent)
}
diff --git a/packages/comark/src/types.ts b/packages/comark/src/types.ts
index 05ac6c7..d2ade1b 100644
--- a/packages/comark/src/types.ts
+++ b/packages/comark/src/types.ts
@@ -56,6 +56,29 @@ export interface ParseOptions {
*/
autoClose?: boolean
+ /**
+ * Whether to parse HTML tags embedded in Comark/markdown content.
+ * When enabled, HTML block and inline elements are parsed into AST nodes and can be
+ * mixed freely with Comark components and markdown syntax.
+ *
+ * @default true
+ * @example
+ * // With html: true (default) — HTML is parsed into AST nodes
+ * // Input: `text `
+ * // AST: ['strong', { class: 'bold' }, 'text']
+ *
+ * // HTML can be mixed with Comark components:
+ * // Input:
+ * //
+ * // ::alert
+ * // Hello world
+ * // ::
+ * //
+ *
+ * // With html: false — HTML tags are left as raw text / ignored
+ */
+ html?: boolean
+
/**
* Additional plugins to use
* @default []
diff --git a/packages/comark/test/streaming.test.ts b/packages/comark/test/streaming.test.ts
index a21bbdd..6268000 100644
--- a/packages/comark/test/streaming.test.ts
+++ b/packages/comark/test/streaming.test.ts
@@ -3,23 +3,23 @@ import { createParse } from 'comark'
import type { ComarkElement } from 'comark/ast'
describe('streaming mode', () => {
- describe('$comark.line metadata', () => {
+ describe('$.line metadata', () => {
it('preserves position metadata on nodes in streaming mode', async () => {
const parse = createParse()
const result = await parse('# Hello\n\nParagraph one.\n\nParagraph two.\n', { streaming: true })
const nodes = result.nodes as ComarkElement[]
- expect(nodes[0][1].$comark?.line).toBeDefined()
- expect(nodes[1][1].$comark?.line).toBeDefined()
- expect(nodes[2][1].$comark?.line).toBeDefined()
+ expect(nodes[0][1].$?.line).toBeDefined()
+ expect(nodes[1][1].$?.line).toBeDefined()
+ expect(nodes[2][1].$?.line).toBeDefined()
})
- it('does NOT add $comark.line metadata without streaming', async () => {
+ it('does NOT add $.line metadata without streaming', async () => {
const parse = createParse()
const result = await parse('# Hello\n\nParagraph one.\n')
const nodes = result.nodes as ComarkElement[]
- expect(nodes[0][1].$comark).toBeUndefined()
+ expect(nodes[0][1].$).toBeUndefined()
})
it('line numbers are monotonically increasing', async () => {
@@ -27,7 +27,7 @@ describe('streaming mode', () => {
const result = await parse('# Heading\n\nPara 1\n\nPara 2\n\nPara 3\n', { streaming: true })
const nodes = result.nodes as ComarkElement[]
- const lines = nodes.map(n => n[1].$comark?.line ?? 0)
+ const lines = nodes.map(n => n[1].$?.line ?? 0)
for (let i = 1; i < lines.length; i++) {
expect(lines[i]).toBeGreaterThan(lines[i - 1])
}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index b3d1d82..0d27333 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -417,6 +417,9 @@ importers:
entities:
specifier: ^4.5.0
version: 4.5.0
+ htmlparser2:
+ specifier: ^9.0.0
+ version: 9.1.0
js-yaml:
specifier: ^4.1.1
version: 4.1.1
@@ -6627,6 +6630,9 @@ packages:
html-whitespace-sensitive-tag-names@3.0.1:
resolution: {integrity: sha512-q+310vW8zmymYHALr1da4HyXUQ0zgiIwIicEfotYPWGN0OJVEN/58IJ3A4GBYcEq3LGAZqKb+ugvP0GNB9CEAA==}
+ htmlparser2@9.1.0:
+ resolution: {integrity: sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==}
+
http-cache-semantics@4.2.0:
resolution: {integrity: sha512-dTxcvPXqPvXBQpq5dUr6mEMJX4oIEFv6bwom3FDwKRDsuIjjJGANqhBuoAn9c1RQJIdAKav33ED65E2ys+87QQ==}
@@ -17125,6 +17131,13 @@ snapshots:
html-whitespace-sensitive-tag-names@3.0.1: {}
+ htmlparser2@9.1.0:
+ dependencies:
+ domelementtype: 2.3.0
+ domhandler: 5.0.3
+ domutils: 3.2.2
+ entities: 4.5.0
+
http-cache-semantics@4.2.0: {}
http-errors@2.0.1: