Skip to content

Commit 56a81b5

Browse files
committed
🎨 Removed extra new lines and improved header formatting in plain text.
see https://forum.ghost.org/t/unnecessary-and-excessive-newlines-in-plain-text-part-of-newsletters/60267/ - Previously relied on presence of \n in source HTML. - Now sets reasonable new lines based on HTML elements. - Formats headers for better readability. - Tests added for all changes. Headers now look like: ********** Header One ********** ---------- Header Two ---------- Header Three and Four --------------------- Header Five and Six
1 parent 7753c94 commit 56a81b5

File tree

2 files changed

+97
-2
lines changed

2 files changed

+97
-2
lines changed

‎packages/html-to-plaintext/lib/html-to-plaintext.js‎

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,77 @@ const loadConverters = () => {
6161
});
6262

6363
const emailSettings = mergeSettings({
64+
preserveNewlines: false,
6465
selectors: [
6566
// equiv hideLinkHrefIfSameAsText: true
6667
{selector: 'a', options: {hideLinkHrefIfSameAsText: true}},
6768
// Don't include html .preheader in email
68-
{selector: '.preheader', format: 'skip'}
69-
]
69+
{selector: '.preheader', format: 'skip'},
70+
{selector: 'p', options: {leadingLineBreaks: 2, trailingLineBreaks: 1}},
71+
{selector: 'h1', format: 'customHeader'},
72+
{selector: 'h2', format: 'customHeader'},
73+
{selector: 'h3', format: 'customHeader'},
74+
{selector: 'h4', format: 'customHeader'},
75+
{selector: 'h5', options: {uppercase: false, leadingLineBreaks: 2, trailingLineBreaks: 1}},
76+
{selector: 'h6', options: {uppercase: false, leadingLineBreaks: 2, trailingLineBreaks: 1}}
77+
],
78+
formatters: {
79+
customHeader: function (elem, walk, builder) {
80+
function extractText(element) {
81+
if (element.type === 'text') {
82+
return element.data;
83+
}
84+
if (element.children) {
85+
return element.children.map(extractText).join('');
86+
}
87+
return '';
88+
}
89+
90+
const text = extractText(elem).trim();
91+
// Early return if header is empty
92+
if (!text) {
93+
return;
94+
}
95+
96+
const tagName = elem.name.toLowerCase();
97+
const stars = '*'.repeat(text.length);
98+
const dashes = '-'.repeat(text.length);
99+
100+
switch (tagName) {
101+
case 'h1':
102+
builder.addLineBreak();
103+
builder.addLineBreak();
104+
builder.addInline(`${stars}`);
105+
builder.addLineBreak();
106+
builder.addInline(`${text}`);
107+
builder.addLineBreak();
108+
builder.addInline(`${stars}`);
109+
builder.addLineBreak();
110+
break;
111+
112+
case 'h2':
113+
builder.addLineBreak();
114+
builder.addLineBreak();
115+
builder.addInline(`${dashes}`);
116+
builder.addLineBreak();
117+
builder.addInline(`${text}`);
118+
builder.addLineBreak();
119+
builder.addInline(`${dashes}`);
120+
builder.addLineBreak();
121+
break;
122+
123+
case 'h3':
124+
case 'h4':
125+
builder.addLineBreak();
126+
builder.addLineBreak();
127+
builder.addInline(`${text}`);
128+
builder.addLineBreak();
129+
builder.addInline(`${dashes}`);
130+
builder.addLineBreak();
131+
break;
132+
}
133+
}
134+
}
70135
});
71136

72137
const commentSettings = mergeSettings({

‎packages/html-to-plaintext/test/html-to-plaintext.test.js‎

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,36 @@ describe('Html to Plaintext', function () {
8888
});
8989
});
9090

91+
describe('New lines and format headers', function () {
92+
it('Strips excessive new lines and formats headers', function () {
93+
const html = '<p>Some ordinary text</p>\n\n\n\n<p>Should not be way far apart from earlier text.</p>';
94+
const expected = 'Some ordinary text\n\nShould not be way far apart from earlier text.';
95+
const {email} = getEmailandExcert(html);
96+
assert.equal(email, expected);
97+
});
98+
99+
it('Check header formatting', function () {
100+
const html = '<h1>Header One</h1>\n<p>What should I even write about?</p><p>And more</p><h2>With Header Two</h2><p>What about code?<h3>And Header Three</h3><p>Good bye</p>';
101+
const expected = '\n**********\nHeader One\n**********\n\nWhat should I even write about?\n\nAnd more\n\n---------------\nWith Header Two\n---------------\n\nWhat about code?\n\nAnd Header Three\n----------------\n\nGood bye';
102+
const {email} = getEmailandExcert(html);
103+
assert.equal(email, expected);
104+
});
105+
106+
it('Empty headers return nothing', function () {
107+
const html = '<h1></h1>';
108+
const expected = '';
109+
const {email} = getEmailandExcert(html);
110+
assert.equal(email, expected);
111+
});
112+
113+
it('Non-text header contents don’t appear', function () {
114+
const html = '<h1>Hello<!--Test-->world</h1>';
115+
const expected = '\n**********\nHelloworld\n**********';
116+
const {email} = getEmailandExcert(html);
117+
assert.equal(email, expected);
118+
});
119+
});
120+
91121
describe('commentSnippet converter', function () {
92122
function testConverter({input, expected}) {
93123
return () => {

0 commit comments

Comments
 (0)