@@ -11,14 +11,71 @@ import { getMarkdownFiles } from '#site/next.helpers.mjs';
1111// gets the current blog path based on local module path
1212const blogPath = join ( process . cwd ( ) , 'pages/en/blog' ) ;
1313
14+ const escapeRegExp = value => value . replace ( / [ . * + ? ^ $ { } ( ) | [ \] \\ ] / g, '\\$&' ) ;
15+
16+ const getMarkupBlockTag = line => {
17+ const match = / ^ < ( [ A - Z a - z ] [ \w . - ] * ) (?: \s | > | \/ > | $ ) / . exec ( line ) ;
18+
19+ if ( ! match ) {
20+ return undefined ;
21+ }
22+
23+ const tag = match [ 1 ] ;
24+ const closingTag = new RegExp ( `</${ escapeRegExp ( tag ) } >\\s*$` ) ;
25+
26+ return {
27+ tag,
28+ isClosed : / \/ > \s * $ / . test ( line ) || closingTag . test ( line ) ,
29+ } ;
30+ } ;
31+
32+ const isNonParagraphLine = line =>
33+ line . startsWith ( '#' ) ||
34+ line . startsWith ( '![' ) ||
35+ line . startsWith ( '```' ) ||
36+ line . startsWith ( '~~~' ) ||
37+ line . startsWith ( '---' ) ||
38+ line . startsWith ( '</' ) ||
39+ / ^ \[ [ ^ \] ] + \] : / . test ( line ) ||
40+ / ^ < ! - - .* - - > $ / . test ( line ) ;
41+
42+ const listItemMarker = / ^ \s * ( [ - * ] | \d + \. ) \s + / ;
43+
44+ const stripMarkdownMarkup = paragraph =>
45+ paragraph
46+ . replace ( / ! \[ ( [ ^ \] ] * ) \] \( [ ^ ) ] + \) / g, '$1' )
47+ . replace ( / \[ ( [ ^ \] ] + ) \] \( [ ^ ) ] + \) / g, '$1' )
48+ . replace ( / \[ ( [ ^ \] ] + ) \] \[ [ ^ \] ] * \] / g, '$1' )
49+ . replace ( / ` ( [ ^ ` ] + ) ` / g, '$1' )
50+ . replace ( / \* \* ( [ ^ * ] + ) \* \* / g, '$1' )
51+ . replace ( / _ _ ( [ ^ _ ] + ) _ _ / g, '$1' )
52+ . replace ( / \* ( [ ^ * ] + ) \* / g, '$1' )
53+ . replace ( / _ ( [ ^ _ ] + ) _ / g, '$1' )
54+ . replace ( / \\ ( [ [ \] _ * ` ] ) / g, '$1' )
55+ . replace ( / ^ \[ [ a - f 0 - 9 ] { 7 , 12 } \] \s + - \s + / i, '' )
56+ . replace ( / < \/ ? [ ^ > ] + > / g, '' )
57+ . replace ( / & n b s p ; / g, ' ' )
58+ . replace ( / \s + / g, ' ' )
59+ . trim ( ) ;
60+
61+ const isCategoryOnlyListItem = item => {
62+ const strippedItem = stripMarkdownMarkup ( item ) ;
63+
64+ return (
65+ / ^ ( \* \* [ ^ * ] + \* \* | ` [ ^ ` ] + ` ) : ? $ / . test ( item ) ||
66+ / ^ [ \w . / - ] + : $ / . test ( strippedItem )
67+ ) ;
68+ } ;
69+
1470/**
1571 * This method parses the source (raw) Markdown content into Frontmatter
1672 * and returns basic information for blog posts
1773 *
1874 * @param {string } filename the filename related to the blogpost
1975 * @param {string } source the source markdown content of the blog post
76+ * @param {string } paragraph the first paragraph of the blog post
2077 */
21- const getFrontMatter = ( filename , source ) => {
78+ const getFrontMatter = ( filename , source , paragraph ) => {
2279 const {
2380 title = 'Untitled' ,
2481 author = 'The Node.js Project' ,
@@ -42,6 +99,7 @@ const getFrontMatter = (filename, source) => {
4299 author,
43100 username,
44101 date : new Date ( date ) ,
102+ description : stripMarkdownMarkup ( paragraph ) || undefined ,
45103 categories,
46104 slug,
47105 } ;
@@ -76,28 +134,93 @@ const generateBlogData = async () => {
76134
77135 let rawFrontmatter = '' ;
78136 let frontmatterSeparatorsEncountered = 0 ;
137+ let ignoredMarkupTag ;
138+ const paragraphLines = [ ] ;
79139
80140 // We read line by line
81141 _readLine . on ( 'line' , line => {
82- rawFrontmatter += `${ line } \n` ;
83-
84142 // We observe the frontmatter separators
85- if ( line === '---' ) {
86- frontmatterSeparatorsEncountered ++ ;
143+ if ( frontmatterSeparatorsEncountered < 2 ) {
144+ rawFrontmatter += `${ line } \n` ;
145+
146+ if ( line === '---' ) {
147+ frontmatterSeparatorsEncountered ++ ;
148+ }
149+
150+ return ;
151+ }
152+
153+ const trimmedLine = line . trim ( ) ;
154+
155+ if ( ignoredMarkupTag ) {
156+ const closingTag = new RegExp (
157+ `</${ escapeRegExp ( ignoredMarkupTag ) } >\\s*$`
158+ ) ;
159+
160+ if ( closingTag . test ( trimmedLine ) ) {
161+ ignoredMarkupTag = undefined ;
162+ }
163+
164+ return ;
165+ }
166+
167+ if ( ! trimmedLine ) {
168+ if ( paragraphLines . length > 0 ) {
169+ _readLine . close ( ) ;
170+ _stream . close ( ) ;
171+ }
172+
173+ return ;
174+ }
175+
176+ const markupBlockTag = getMarkupBlockTag ( trimmedLine ) ;
177+
178+ if ( markupBlockTag ) {
179+ if ( ! markupBlockTag . isClosed ) {
180+ ignoredMarkupTag = markupBlockTag . tag ;
181+ }
182+
183+ return ;
87184 }
88185
89- // Once we have two separators we close the readLine and the stream
90- if ( frontmatterSeparatorsEncountered === 2 ) {
186+ if ( listItemMarker . test ( line ) ) {
187+ if ( paragraphLines . length === 0 ) {
188+ const listItem = line . replace ( listItemMarker , '' ) . trim ( ) ;
189+
190+ if ( isCategoryOnlyListItem ( listItem ) ) {
191+ return ;
192+ }
193+
194+ paragraphLines . push ( listItem ) ;
195+ }
196+
91197 _readLine . close ( ) ;
92198 _stream . close ( ) ;
199+
200+ return ;
93201 }
202+
203+ if ( isNonParagraphLine ( trimmedLine ) ) {
204+ if ( paragraphLines . length > 0 ) {
205+ _readLine . close ( ) ;
206+ _stream . close ( ) ;
207+ }
208+
209+ return ;
210+ }
211+
212+ paragraphLines . push ( trimmedLine ) ;
94213 } ) ;
95214
96215 // Then we parse gray-matter on the frontmatter
97- // This allows us to only read the frontmatter part of each file
98- // and optimise the read-process as we have thousands of markdown files
216+ // This allows us to read only the frontmatter and the first useful
217+ // preview line instead of loading every blog post in full.
99218 _readLine . on ( 'close' , ( ) => {
100- const frontMatterData = getFrontMatter ( filename , rawFrontmatter ) ;
219+ const frontMatterData = getFrontMatter (
220+ filename ,
221+ rawFrontmatter ,
222+ paragraphLines . join ( ' ' )
223+ ) ;
101224
102225 frontMatterData . categories . forEach ( category => {
103226 // we add the category to the categories set
0 commit comments