Skip to content

Commit dd16315

Browse files
committed
fix: add descriptions to RSS feed items
1 parent c582f6e commit dd16315

5 files changed

Lines changed: 257 additions & 10 deletions

File tree

apps/site/scripts/blog-data/__test__/generate.test.mjs

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ mock.module('node:fs', {
1313
readable.push(`---\n`);
1414
file.frontMatterContent.forEach(line => readable.push(`${line}\n`));
1515
readable.push(`---\n`);
16+
file.content?.forEach(line => readable.push(`${line}\n`));
1617
readable.push(null);
1718
readable.close = () => {};
1819
return readable;
@@ -61,6 +62,125 @@ describe('generateBlogData', () => {
6162
assert.equal(post.author, 'author');
6263
});
6364

65+
it('should generate a description from the first content paragraph', async () => {
66+
files = [
67+
{
68+
path: 'pages/en/blog/post1.md',
69+
frontMatterContent: [
70+
`date: '2020-01-01T00:00:00.000Z'`,
71+
`title: POST 1`,
72+
`author: author`,
73+
],
74+
content: [
75+
'',
76+
'## Summary',
77+
'',
78+
'Read the [Node.js release notes](https://nodejs.org/) for',
79+
'**runtime** updates and `security` fixes.',
80+
'',
81+
'This is the second paragraph.',
82+
],
83+
},
84+
];
85+
86+
const blogData = await generateBlogData();
87+
88+
assert.equal(
89+
blogData.posts[0].description,
90+
'Read the Node.js release notes for runtime updates and security fixes.'
91+
);
92+
});
93+
94+
it('should use only the first list item for description previews', async () => {
95+
files = [
96+
{
97+
path: 'pages/en/blog/post1.md',
98+
frontMatterContent: [
99+
`date: '2020-01-01T00:00:00.000Z'`,
100+
`title: POST 1`,
101+
`author: author`,
102+
],
103+
content: [
104+
'',
105+
'### Notable Changes',
106+
'',
107+
'- \\[[`abc123def4`](https://github.com/nodejs/node/commit/abc123def4)] - **crypto**: update `randomUUID()` [#12345](https://github.com/nodejs/node/pull/12345)',
108+
'- **fs**: this should not be included',
109+
'',
110+
],
111+
},
112+
];
113+
114+
const blogData = await generateBlogData();
115+
116+
assert.equal(
117+
blogData.posts[0].description,
118+
'crypto: update randomUUID() #12345'
119+
);
120+
});
121+
122+
it('should skip category-only list items for description previews', async () => {
123+
files = [
124+
{
125+
path: 'pages/en/blog/post1.md',
126+
frontMatterContent: [
127+
`date: '2020-01-01T00:00:00.000Z'`,
128+
`title: POST 1`,
129+
`author: author`,
130+
],
131+
content: [
132+
'',
133+
'### Notable Changes',
134+
'',
135+
'- **console**:',
136+
' - \\[[`abc123def4`](https://github.com/nodejs/node/commit/abc123def4)] - **console**: add color mode [#12345](https://github.com/nodejs/node/pull/12345)',
137+
'- **fs**',
138+
' - **fs**: this should not be included',
139+
'',
140+
],
141+
},
142+
];
143+
144+
const blogData = await generateBlogData();
145+
146+
assert.equal(
147+
blogData.posts[0].description,
148+
'console: add color mode #12345'
149+
);
150+
});
151+
152+
it('should ignore markup blocks when generating description previews', async () => {
153+
files = [
154+
{
155+
path: 'pages/en/blog/post1.md',
156+
frontMatterContent: [
157+
`date: '2020-01-01T00:00:00.000Z'`,
158+
`title: POST 1`,
159+
`author: author`,
160+
],
161+
content: [
162+
'',
163+
'<div className="note">',
164+
'This lowercase HTML block should be ignored.',
165+
'</div>',
166+
'<AlertBox type="warning">',
167+
'This JSX block should be ignored too.',
168+
'</AlertBox>',
169+
'<BlogImage />',
170+
'',
171+
'This is the first real paragraph.',
172+
],
173+
},
174+
];
175+
176+
const blogData = await generateBlogData();
177+
178+
assert.equal(
179+
blogData.posts[0].description,
180+
'This is the first real paragraph.'
181+
);
182+
});
183+
64184
it('should collect the data from a single md file if only one is found', async () => {
65185
files = [
66186
{

apps/site/scripts/blog-data/generate.mjs

Lines changed: 133 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,71 @@ import { getMarkdownFiles } from '#site/next.helpers.mjs';
1111
// gets the current blog path based on local module path
1212
const blogPath = join(process.cwd(), 'pages/en/blog');
1313

14+
const escapeRegExp = value => value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
15+
16+
const getMarkupBlockTag = line => {
17+
const match = /^<([A-Za-z][\w.-]*)(?:\s|>|\/>|$)/.exec(line);
18+
19+
if (!match) {
20+
return undefined;
21+
}
22+
23+
const tag = match[1];
24+
const closingTag = new RegExp(`</${escapeRegExp(tag)}>\\s*$`);
25+
26+
return {
27+
tag,
28+
isClosed: /\/>\s*$/.test(line) || closingTag.test(line),
29+
};
30+
};
31+
32+
const isNonParagraphLine = line =>
33+
line.startsWith('#') ||
34+
line.startsWith('![') ||
35+
line.startsWith('```') ||
36+
line.startsWith('~~~') ||
37+
line.startsWith('---') ||
38+
line.startsWith('</') ||
39+
/^\[[^\]]+\]:/.test(line) ||
40+
/^<!--.*-->$/.test(line);
41+
42+
const listItemMarker = /^\s*([-*]|\d+\.)\s+/;
43+
44+
const stripMarkdownMarkup = paragraph =>
45+
paragraph
46+
.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1')
47+
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
48+
.replace(/\[([^\]]+)\]\[[^\]]*\]/g, '$1')
49+
.replace(/`([^`]+)`/g, '$1')
50+
.replace(/\*\*([^*]+)\*\*/g, '$1')
51+
.replace(/__([^_]+)__/g, '$1')
52+
.replace(/\*([^*]+)\*/g, '$1')
53+
.replace(/_([^_]+)_/g, '$1')
54+
.replace(/\\([[\]_*`])/g, '$1')
55+
.replace(/^\[[a-f0-9]{7,12}\]\s+-\s+/i, '')
56+
.replace(/<\/?[^>]+>/g, '')
57+
.replace(/&nbsp;/g, ' ')
58+
.replace(/\s+/g, ' ')
59+
.trim();
60+
61+
const isCategoryOnlyListItem = item => {
62+
const strippedItem = stripMarkdownMarkup(item);
63+
64+
return (
65+
/^(\*\*[^*]+\*\*|`[^`]+`):?$/.test(item) ||
66+
/^[\w ./-]+:$/.test(strippedItem)
67+
);
68+
};
69+
1470
/**
1571
* This method parses the source (raw) Markdown content into Frontmatter
1672
* and returns basic information for blog posts
1773
*
1874
* @param {string} filename the filename related to the blogpost
1975
* @param {string} source the source markdown content of the blog post
76+
* @param {string} paragraph the first paragraph of the blog post
2077
*/
21-
const getFrontMatter = (filename, source) => {
78+
const getFrontMatter = (filename, source, paragraph) => {
2279
const {
2380
title = 'Untitled',
2481
author = 'The Node.js Project',
@@ -42,6 +99,7 @@ const getFrontMatter = (filename, source) => {
4299
author,
43100
username,
44101
date: new Date(date),
102+
description: stripMarkdownMarkup(paragraph) || undefined,
45103
categories,
46104
slug,
47105
};
@@ -76,28 +134,93 @@ const generateBlogData = async () => {
76134

77135
let rawFrontmatter = '';
78136
let frontmatterSeparatorsEncountered = 0;
137+
let ignoredMarkupTag;
138+
const paragraphLines = [];
79139

80140
// We read line by line
81141
_readLine.on('line', line => {
82-
rawFrontmatter += `${line}\n`;
83-
84142
// We observe the frontmatter separators
85-
if (line === '---') {
86-
frontmatterSeparatorsEncountered++;
143+
if (frontmatterSeparatorsEncountered < 2) {
144+
rawFrontmatter += `${line}\n`;
145+
146+
if (line === '---') {
147+
frontmatterSeparatorsEncountered++;
148+
}
149+
150+
return;
151+
}
152+
153+
const trimmedLine = line.trim();
154+
155+
if (ignoredMarkupTag) {
156+
const closingTag = new RegExp(
157+
`</${escapeRegExp(ignoredMarkupTag)}>\\s*$`
158+
);
159+
160+
if (closingTag.test(trimmedLine)) {
161+
ignoredMarkupTag = undefined;
162+
}
163+
164+
return;
165+
}
166+
167+
if (!trimmedLine) {
168+
if (paragraphLines.length > 0) {
169+
_readLine.close();
170+
_stream.close();
171+
}
172+
173+
return;
174+
}
175+
176+
const markupBlockTag = getMarkupBlockTag(trimmedLine);
177+
178+
if (markupBlockTag) {
179+
if (!markupBlockTag.isClosed) {
180+
ignoredMarkupTag = markupBlockTag.tag;
181+
}
182+
183+
return;
87184
}
88185

89-
// Once we have two separators we close the readLine and the stream
90-
if (frontmatterSeparatorsEncountered === 2) {
186+
if (listItemMarker.test(line)) {
187+
if (paragraphLines.length === 0) {
188+
const listItem = line.replace(listItemMarker, '').trim();
189+
190+
if (isCategoryOnlyListItem(listItem)) {
191+
return;
192+
}
193+
194+
paragraphLines.push(listItem);
195+
}
196+
91197
_readLine.close();
92198
_stream.close();
199+
200+
return;
93201
}
202+
203+
if (isNonParagraphLine(trimmedLine)) {
204+
if (paragraphLines.length > 0) {
205+
_readLine.close();
206+
_stream.close();
207+
}
208+
209+
return;
210+
}
211+
212+
paragraphLines.push(trimmedLine);
94213
});
95214

96215
// Then we parse gray-matter on the frontmatter
97-
// This allows us to only read the frontmatter part of each file
98-
// and optimise the read-process as we have thousands of markdown files
216+
// This allows us to read only the frontmatter and the first useful
217+
// preview line instead of loading every blog post in full.
99218
_readLine.on('close', () => {
100-
const frontMatterData = getFrontMatter(filename, rawFrontmatter);
219+
const frontMatterData = getFrontMatter(
220+
filename,
221+
rawFrontmatter,
222+
paragraphLines.join(' ')
223+
);
101224

102225
frontMatterData.categories.forEach(category => {
103226
// we add the category to the categories set

apps/site/types/blog.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ export type BlogPost = {
88
author: string;
99
username: string;
1010
date: string;
11+
description?: string;
1112
categories: Array<BlogCategory>;
1213
slug: string;
1314
};

apps/site/util/__tests__/feeds.test.mjs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ describe('generateWebsiteFeeds', () => {
1616
slug: '/post-1',
1717
title: 'Post 1',
1818
date: '2025-04-18',
19+
description: 'This is the first paragraph.',
1920
categories: ['all'],
2021
},
2122
],
@@ -40,6 +41,7 @@ describe('generateWebsiteFeeds', () => {
4041
guid: `${blogData.posts[0].slug}?${date.getTime()}`,
4142
date,
4243
link: `${base}${blogData.posts[0].slug}`,
44+
description: blogData.posts[0].description,
4345
},
4446
]);
4547
});

apps/site/util/feeds.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ export const generateWebsiteFeeds = ({ posts }: BlogPostsRSC) => {
4747
title: post.title,
4848
date,
4949
link: `${canonicalUrl}${post.slug}`,
50+
...(post.description && { description: post.description }),
5051
guid:
5152
time > guidTimestampStartDate
5253
? `${post.slug}?${date.getTime()}`

0 commit comments

Comments
 (0)