Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/core/src/api/parsers/html/parseHTML.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ import {
import { Block } from "../../../blocks/defaultBlocks.js";
import { nodeToBlock } from "../../nodeConversions/nodeToBlock.js";
import { nestedListsToBlockNoteStructure } from "./util/nestedLists.js";
import { preprocessHTMLWhitespace } from "./util/normalizeWhitespace.js";

export function HTMLToBlocks<
BSchema extends BlockSchema,
I extends InlineContentSchema,
S extends StyleSchema,
>(html: string, pmSchema: Schema): Block<BSchema, I, S>[] {
const htmlNode = nestedListsToBlockNoteStructure(html);
preprocessHTMLWhitespace(htmlNode);
const parser = DOMParser.fromSchema(pmSchema);

// Other approach might be to use
Expand Down
87 changes: 87 additions & 0 deletions packages/core/src/api/parsers/html/util/normalizeWhitespace.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/**
* Checks if the given HTML element contains markers indicating it was
* generated by Notion. Notion uses `\n` in text nodes to represent hard
* breaks, which is non-standard but intentional.
*
* Detected by the `<!-- notionvc: UUID -->` comment that Notion places
* on the clipboard.
*/
function isNotionHTML(element: HTMLElement): boolean {
const walker = element.ownerDocument.createTreeWalker(
element,
// NodeFilter.SHOW_COMMENT
128,
);

let node: Node | null;
while ((node = walker.nextNode())) {
if (/^\s*notionvc:/.test(node.nodeValue || "")) {
return true;
}
}

return false;
}

/**
* Normalizes whitespace in text nodes by collapsing runs of whitespace
* (including newlines) to single spaces, matching CSS white-space:normal
* behavior.
*
* This is needed because ProseMirror's DOMParser, when `linebreakReplacement`
* is set in the schema (as BlockNote does for hard breaks), converts `\n`
* characters in text nodes to hard break nodes instead of collapsing them.
* This causes HTML source line wrapping (e.g. from MS Word) to create
* visible line breaks in the editor.
*
* Skipped for sources like Notion that intentionally use `\n` in text nodes
* to represent hard breaks instead of `<br>` tags.
*
* Skips `<pre>` and `<code>` elements where whitespace should be preserved.
*/
function normalizeTextNodeWhitespace(element: HTMLElement) {
const preserveWSTags = new Set(["PRE", "CODE"]);
const walker = element.ownerDocument.createTreeWalker(
element,
// NodeFilter.SHOW_TEXT
4,
{
acceptNode(node) {
// Skip text nodes inside pre/code elements
let parent = node.parentElement;
while (parent && parent !== element) {
if (preserveWSTags.has(parent.tagName)) {
// NodeFilter.FILTER_REJECT
return 2;
}
parent = parent.parentElement;
}
// NodeFilter.FILTER_ACCEPT
return 1;
},
},
);

const textNodes: Text[] = [];
let node: Node | null;
while ((node = walker.nextNode())) {
textNodes.push(node as Text);
}

for (const textNode of textNodes) {
if (textNode.nodeValue && /[\r\n]/.test(textNode.nodeValue)) {
textNode.nodeValue = textNode.nodeValue.replace(/[ \t\r\n\f]+/g, " ");
}
}
}

/**
* Normalizes whitespace in HTML text nodes to match standard CSS
* white-space:normal behavior. Skipped for Notion HTML which intentionally
* uses `\n` for hard breaks.
*/
export function preprocessHTMLWhitespace(element: HTMLElement) {
if (!isNotionHTML(element)) {
normalizeTextNodeWhitespace(element);
}
}
10 changes: 5 additions & 5 deletions tests/src/unit/core/clipboard/paste/pasteTestInstances.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import { TextSelection } from "@tiptap/pm/state";

import {
TestBlockSchema,
TestInlineContentSchema,
TestStyleSchema,
} from "../../testSchema.js";
import { PasteTestCase } from "../../../shared/clipboard/paste/pasteTestCase.js";
import {
testPasteHTML,
testPasteMarkdown,
} from "../../../shared/clipboard/paste/pasteTestExecutors.js";
import { getPosOfTextNode } from "../../../shared/testUtil.js";
import { TestInstance } from "../../../types.js";
import {
TestBlockSchema,
TestInlineContentSchema,
TestStyleSchema,
} from "../../testSchema.js";

export const pasteTestInstancesHTML: TestInstance<
PasteTestCase<TestBlockSchema, TestInlineContentSchema, TestStyleSchema>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@
{
"styles": {},
"text": "Table Cell
Table Cell

Table Cell
",
Table Cell Table Cell",
"type": "text",
},
],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
[
{
"children": [],
"content": [
{
"styles": {
"bold": true,
"underline": true,
},
"text": "Que se passe-t-il si je réponds tard à un message chat et que l'utilisateur n'est plus en ligne :",
"type": "text",
},
],
"id": "1",
"props": {
"backgroundColor": "default",
"textAlignment": "left",
"textColor": "default",
},
"type": "paragraph",
},
{
"children": [],
"content": [
{
"styles": {},
"text": "Lorsque vous envoyez un message à un utilisateur dans une conversation chat, et qu'il est encore en ligne, il recevra le message sur sa bulle chatbot.",
"type": "text",
},
],
"id": "2",
"props": {
"backgroundColor": "default",
"textAlignment": "left",
"textColor": "default",
},
"type": "paragraph",
},
{
"children": [],
"content": [
{
"styles": {},
"text": "Cependant S'il n'est plus en ligne, votre message sera envoyé par email si :",
"type": "text",
},
],
"id": "3",
"props": {
"backgroundColor": "default",
"textAlignment": "left",
"textColor": "default",
},
"type": "paragraph",
},
{
"children": [],
"content": [
{
"styles": {},
"text": ". l'utilisateur n'a pas lu votre réponse après 2 minutes",
"type": "text",
},
],
"id": "4",
"props": {
"backgroundColor": "default",
"textAlignment": "left",
"textColor": "default",
},
"type": "paragraph",
},
{
"children": [],
"content": [
{
"styles": {},
"text": ". l'utilisateur n'est plus présent sur votre site web",
"type": "text",
},
],
"id": "5",
"props": {
"backgroundColor": "default",
"textAlignment": "left",
"textColor": "default",
},
"type": "paragraph",
},
{
"children": [],
"content": [
{
"styles": {},
"text": " ",
"type": "text",
},
],
"id": "6",
"props": {
"backgroundColor": "default",
"textAlignment": "left",
"textColor": "default",
},
"type": "paragraph",
},
{
"children": [],
"content": [
{
"styles": {},
"text": "Cela se fait automatiquement donc, lorsque nous répondons par chat, si l'utilisateur n'est plus là, Crisp renvoie le message alors par email et le canal de discussion se transforme en canal de discussion email.

Il est possible aussi de créer une conversation email directement le profil de l'utilisateur (bouton bleu en haut à droite de la conversation)",
"type": "text",
},
],
"id": "7",
"props": {
"backgroundColor": "default",
"textAlignment": "left",
"textColor": "default",
},
"type": "paragraph",
},
]
64 changes: 64 additions & 0 deletions tests/src/unit/core/formatConversion/parse/parseTestInstances.ts
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,70 @@ console.log("Third Line")</code></pre>`,
},
executeTest: testParseHTML,
},
{
testCase: {
name: "msWordPaste",
content: `<html xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:w="urn:schemas-microsoft-com:office:word"
xmlns:m="http://schemas.microsoft.com/office/2004/12/omml"
xmlns="http://www.w3.org/TR/REC-html40">

<head>
<meta http-equiv=Content-Type content="text/html; charset=utf-8">
<meta name=ProgId content=Word.Document>
<meta name=Generator content="Microsoft Word 15">
<meta name=Originator content="Microsoft Word 15">
<style>
<!--
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
\t{margin-top:0cm;
\tmargin-right:0cm;
\tmargin-bottom:8.0pt;
\tmargin-left:0cm;
\tline-height:107%;
\tfont-size:11.0pt;
\tfont-family:"Calibri",sans-serif;}
-->
</style>
</head>

<body lang=en-NL style='tab-interval:36.0pt;word-wrap:break-word'>
<!--StartFragment-->

<p class=MsoNormal><b><u><span lang=FR>Que se passe-t-il si je réponds tard à
un message chat et que l'utilisateur n'est plus en ligne&nbsp;:<o:p></o:p></span></u></b></p>

<p class=MsoNormal><span lang=FR>Lorsque vous envoyez un message à un
utilisateur dans une conversation chat, et qu'il est encore en ligne, il
recevra le message sur sa bulle chatbot.<o:p></o:p></span></p>

<p class=MsoNormal style='margin-bottom:0cm;line-height:normal'><span lang=FR>Cependant
S'il n'est plus en ligne, votre message sera envoyé par email si :<o:p></o:p></span></p>

<p class=MsoNormal style='margin-bottom:0cm;line-height:normal'><span lang=FR>.
l'utilisateur n'a pas lu votre réponse après 2 minutes<o:p></o:p></span></p>

<p class=MsoNormal style='margin-bottom:0cm;line-height:normal'><span lang=FR>.
l'utilisateur n'est plus présent sur votre site web<o:p></o:p></span></p>

<p class=MsoNormal><span lang=FR><o:p>&nbsp;</o:p></span></p>

<p class=MsoNormal><span lang=FR>Cela se fait automatiquement donc, lorsque
nous répondons par chat, si l'utilisateur n'est plus là, Crisp renvoie le
message alors par email et le canal de discussion se transforme en canal de
discussion email.<br>
<br>
Il est possible aussi de créer une conversation email directement le profil de
l'utilisateur (bouton bleu en haut à droite de la conversation)<o:p></o:p></span></p>

<!--EndFragment-->
</body>

</html>`,
},
executeTest: testParseHTML,
},
];

export const parseTestInstancesMarkdown: TestInstance<
Expand Down
Loading