fix: add descriptions to RSS feed items

musshiyaki · musshiyaki · commit dd16315d0611 · 2026-06-01T16:36:37.000+09:00
diff --git a/apps/site/scripts/blog-data/__test__/generate.test.mjs b/apps/site/scripts/blog-data/__test__/generate.test.mjs
@@ -13,6 +13,7 @@ mock.module('node:fs', {
       readable.push(`---\n`);
       file.frontMatterContent.forEach(line => readable.push(`${line}\n`));
       readable.push(`---\n`);
+      file.content?.forEach(line => readable.push(`${line}\n`));
       readable.push(null);
       readable.close = () => {};
       return readable;
@@ -61,6 +62,125 @@ describe('generateBlogData', () => {
     assert.equal(post.author, 'author');
   });
 
+  it('should generate a description from the first content paragraph', async () => {
+    files = [
+      {
+        path: 'pages/en/blog/post1.md',
+        frontMatterContent: [
+          `date: '2020-01-01T00:00:00.000Z'`,
+          `title: POST 1`,
+          `author: author`,
+        ],
+        content: [
+          '',
+          '## Summary',
+          '',
+          'Read the [Node.js release notes](https://nodejs.org/) for',
+          '**runtime** updates and `security` fixes.',
+          '',
+          'This is the second paragraph.',
+        ],
+      },
+    ];
+
+    const blogData = await generateBlogData();
+
+    assert.equal(
+      blogData.posts[0].description,
+      'Read the Node.js release notes for runtime updates and security fixes.'
+    );
+  });
+
+  it('should use only the first list item for description previews', async () => {
+    files = [
+      {
+        path: 'pages/en/blog/post1.md',
+        frontMatterContent: [
+          `date: '2020-01-01T00:00:00.000Z'`,
+          `title: POST 1`,
+          `author: author`,
+        ],
+        content: [
+          '',
+          '### Notable Changes',
+          '',
+          '- \\[[`abc123def4`](https://github.com/nodejs/node/commit/abc123def4)] - **crypto**: update `randomUUID()` [#12345](https://github.com/nodejs/node/pull/12345)',
+          '- **fs**: this should not be included',
+          '',
+        ],
+      },
+    ];
+
+    const blogData = await generateBlogData();
+
+    assert.equal(
+      blogData.posts[0].description,
+      'crypto: update randomUUID() #12345'
+    );
+  });
+
+  it('should skip category-only list items for description previews', async () => {
+    files = [
+      {
+        path: 'pages/en/blog/post1.md',
+        frontMatterContent: [
+          `date: '2020-01-01T00:00:00.000Z'`,
+          `title: POST 1`,
+          `author: author`,
+        ],
+        content: [
+          '',
+          '### Notable Changes',
+          '',
+          '- **console**:',
+          '  - \\[[`abc123def4`](https://github.com/nodejs/node/commit/abc123def4)] - **console**: add color mode [#12345](https://github.com/nodejs/node/pull/12345)',
+          '- **fs**',
+          '  - **fs**: this should not be included',
+          '',
+        ],
+      },
+    ];
+
+    const blogData = await generateBlogData();
+
+    assert.equal(
+      blogData.posts[0].description,
+      'console: add color mode #12345'
+    );
+  });
+
+  it('should ignore markup blocks when generating description previews', async () => {
+    files = [
+      {
+        path: 'pages/en/blog/post1.md',
+        frontMatterContent: [
+          `date: '2020-01-01T00:00:00.000Z'`,
+          `title: POST 1`,
+          `author: author`,
+        ],
+        content: [
+          '',
+          '<div className="note">',
+          'This lowercase HTML block should be ignored.',
+          '</div>',
+          '<AlertBox type="warning">',
+          'This JSX block should be ignored too.',
+          '</AlertBox>',
+          '<BlogImage />',
+          '',
+          'This is the first real paragraph.',
+        ],
+      },
+    ];
+
+    const blogData = await generateBlogData();
+
+    assert.equal(
+      blogData.posts[0].description,
+      'This is the first real paragraph.'
+    );
+  });
+
   it('should collect the data from a single md file if only one is found', async () => {
     files = [
       {
diff --git a/apps/site/scripts/blog-data/generate.mjs b/apps/site/scripts/blog-data/generate.mjs
@@ -11,14 +11,71 @@ import { getMarkdownFiles } from '#site/next.helpers.mjs';
 // gets the current blog path based on local module path
 const blogPath = join(process.cwd(), 'pages/en/blog');
 
+const escapeRegExp = value => value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+
+const getMarkupBlockTag = line => {
+  const match = /^<([A-Za-z][\w.-]*)(?:\s|>|\/>|$)/.exec(line);
+
+  if (!match) {
+    return undefined;
+  }
+
+  const tag = match[1];
+  const closingTag = new RegExp(`</${escapeRegExp(tag)}>\\s*$`);
+
+  return {
+    tag,
+    isClosed: /\/>\s*$/.test(line) || closingTag.test(line),
+  };
+};
+
+const isNonParagraphLine = line =>
+  line.startsWith('#') ||
+  line.startsWith('![') ||
+  line.startsWith('```') ||
+  line.startsWith('~~~') ||
+  line.startsWith('---') ||
+  line.startsWith('</') ||
+  /^\[[^\]]+\]:/.test(line) ||
+  /^<!--.*-->$/.test(line);
+
+const listItemMarker = /^\s*([-*]|\d+\.)\s+/;
+
+const stripMarkdownMarkup = paragraph =>
+  paragraph
+    .replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1')
+    .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
+    .replace(/\[([^\]]+)\]\[[^\]]*\]/g, '$1')
+    .replace(/`([^`]+)`/g, '$1')
+    .replace(/\*\*([^*]+)\*\*/g, '$1')
+    .replace(/__([^_]+)__/g, '$1')
+    .replace(/\*([^*]+)\*/g, '$1')
+    .replace(/_([^_]+)_/g, '$1')
+    .replace(/\\([[\]_*`])/g, '$1')
+    .replace(/^\[[a-f0-9]{7,12}\]\s+-\s+/i, '')
+    .replace(/<\/?[^>]+>/g, '')
+    .replace(/&nbsp;/g, ' ')
+    .replace(/\s+/g, ' ')
+    .trim();
+
+const isCategoryOnlyListItem = item => {
+  const strippedItem = stripMarkdownMarkup(item);
+
+  return (
+    /^(\*\*[^*]+\*\*|`[^`]+`):?$/.test(item) ||
+    /^[\w ./-]+:$/.test(strippedItem)
+  );
+};
+
 /**
  * This method parses the source (raw) Markdown content into Frontmatter
  * and returns basic information for blog posts
  *
  * @param {string} filename the filename related to the blogpost
  * @param {string} source the source markdown content of the blog post
+ * @param {string} paragraph the first paragraph of the blog post
  */
-const getFrontMatter = (filename, source) => {
+const getFrontMatter = (filename, source, paragraph) => {
   const {
     title = 'Untitled',
     author = 'The Node.js Project',
@@ -42,6 +99,7 @@ const getFrontMatter = (filename, source) => {
     author,
     username,
     date: new Date(date),
+    description: stripMarkdownMarkup(paragraph) || undefined,
     categories,
     slug,
   };
@@ -76,28 +134,93 @@ const generateBlogData = async () => {
 
           let rawFrontmatter = '';
           let frontmatterSeparatorsEncountered = 0;
+          let ignoredMarkupTag;
+          const paragraphLines = [];
 
           // We read line by line
           _readLine.on('line', line => {
-            rawFrontmatter += `${line}\n`;
-
             // We observe the frontmatter separators
-            if (line === '---') {
-              frontmatterSeparatorsEncountered++;
+            if (frontmatterSeparatorsEncountered < 2) {
+              rawFrontmatter += `${line}\n`;
+
+              if (line === '---') {
+                frontmatterSeparatorsEncountered++;
+              }
+
+              return;
+            }
+
+            const trimmedLine = line.trim();
+
+            if (ignoredMarkupTag) {
+              const closingTag = new RegExp(
+                `</${escapeRegExp(ignoredMarkupTag)}>\\s*$`
+              );
+
+              if (closingTag.test(trimmedLine)) {
+                ignoredMarkupTag = undefined;
+              }
+
+              return;
+            }
+
+            if (!trimmedLine) {
+              if (paragraphLines.length > 0) {
+                _readLine.close();
+                _stream.close();
+              }
+
+              return;
+            }
+
+            const markupBlockTag = getMarkupBlockTag(trimmedLine);
+
+            if (markupBlockTag) {
+              if (!markupBlockTag.isClosed) {
+                ignoredMarkupTag = markupBlockTag.tag;
+              }
+
+              return;
             }
 
-            // Once we have two separators we close the readLine and the stream
-            if (frontmatterSeparatorsEncountered === 2) {
+            if (listItemMarker.test(line)) {
+              if (paragraphLines.length === 0) {
+                const listItem = line.replace(listItemMarker, '').trim();
+
+                if (isCategoryOnlyListItem(listItem)) {
+                  return;
+                }
+
+                paragraphLines.push(listItem);
+              }
+
               _readLine.close();
               _stream.close();
+
+              return;
             }
+
+            if (isNonParagraphLine(trimmedLine)) {
+              if (paragraphLines.length > 0) {
+                _readLine.close();
+                _stream.close();
+              }
+
+              return;
+            }
+
+            paragraphLines.push(trimmedLine);
           });
 
           // Then we parse gray-matter on the frontmatter
-          // This allows us to only read the frontmatter part of each file
-          // and optimise the read-process as we have thousands of markdown files
+          // This allows us to read only the frontmatter and the first useful
+          // preview line instead of loading every blog post in full.
           _readLine.on('close', () => {
-            const frontMatterData = getFrontMatter(filename, rawFrontmatter);
+            const frontMatterData = getFrontMatter(
+              filename,
+              rawFrontmatter,
+              paragraphLines.join(' ')
+            );
 
             frontMatterData.categories.forEach(category => {
               // we add the category to the categories set
diff --git a/apps/site/types/blog.ts b/apps/site/types/blog.ts
@@ -8,6 +8,7 @@ export type BlogPost = {
   author: string;
   username: string;
   date: string;
+  description?: string;
   categories: Array<BlogCategory>;
   slug: string;
 };
diff --git a/apps/site/util/__tests__/feeds.test.mjs b/apps/site/util/__tests__/feeds.test.mjs
@@ -16,6 +16,7 @@ describe('generateWebsiteFeeds', () => {
           slug: '/post-1',
           title: 'Post 1',
           date: '2025-04-18',
+          description: 'This is the first paragraph.',
           categories: ['all'],
         },
       ],
@@ -40,6 +41,7 @@ describe('generateWebsiteFeeds', () => {
         guid: `${blogData.posts[0].slug}?${date.getTime()}`,
         date,
         link: `${base}${blogData.posts[0].slug}`,
+        description: blogData.posts[0].description,
       },
     ]);
   });
diff --git a/apps/site/util/feeds.ts b/apps/site/util/feeds.ts
@@ -47,6 +47,7 @@ export const generateWebsiteFeeds = ({ posts }: BlogPostsRSC) => {
             title: post.title,
             date,
             link: `${canonicalUrl}${post.slug}`,
+            ...(post.description && { description: post.description }),
             guid:
               time > guidTimestampStartDate
                 ? `${post.slug}?${date.getTime()}`