@@ -79,6 +79,7 @@ def _process_notebook(nb_path: Path, output_dir: Path) -> dict | None:
7979 # Extract metadata
8080 title = _extract_title (notebook )
8181 description = _extract_description (notebook )
82+ thumbnail_path = _extract_thumbnail (notebook )
8283
8384 # Get category and tags from mapping
8485 category , tags = CATEGORY_MAPPINGS .get (stem , ("advanced" , []))
@@ -92,7 +93,24 @@ def _process_notebook(nb_path: Path, output_dir: Path) -> dict | None:
9293 with open (output_path , "w" , encoding = "utf-8" ) as f :
9394 json .dump (clean_notebook , f , indent = 1 , ensure_ascii = False )
9495
95- return {
96+ # Process thumbnail path - convert to figures-relative path
97+ thumbnail = None
98+ if thumbnail_path :
99+ # Handle relative paths like "../figures/image.png" or "figures/image.png"
100+ # Extract just the filename or subdirectory/filename
101+ path_parts = Path (thumbnail_path .replace ("\\ " , "/" ))
102+ # Get the path relative to figures directory
103+ # Common patterns: ../figures/foo.png, figures/foo.png, ./figures/foo.png
104+ parts = path_parts .parts
105+ if "figures" in parts :
106+ # Find index of 'figures' and take everything after
107+ fig_idx = parts .index ("figures" )
108+ thumbnail = "/" .join (parts [fig_idx + 1 :])
109+ else :
110+ # Just use the filename
111+ thumbnail = path_parts .name
112+
113+ result = {
96114 "slug" : slug ,
97115 "file" : nb_path .name ,
98116 "title" : title ,
@@ -102,6 +120,11 @@ def _process_notebook(nb_path: Path, output_dir: Path) -> dict | None:
102120 "executable" : executable ,
103121 }
104122
123+ if thumbnail :
124+ result ["thumbnail" ] = thumbnail
125+
126+ return result
127+
105128
106129def _strip_outputs (notebook : dict ) -> dict :
107130 """Remove outputs from notebook cells (keep source only)."""
@@ -138,6 +161,49 @@ def _extract_title(notebook: dict) -> str:
138161 return "Untitled"
139162
140163
164+ def _extract_thumbnail (notebook : dict ) -> str | None :
165+ """Extract first image path from markdown cells.
166+
167+ Looks for images in markdown cells using:
168+ - Markdown syntax: 
169+ - HTML syntax: <img src="path">
170+ - RST syntax: .. image:: path
171+
172+ Returns the image path (relative to notebook) or None if not found.
173+ """
174+ # Pattern for markdown images: 
175+ md_pattern = re .compile (r'!\[[^\]]*\]\(([^)]+)\)' )
176+ # Pattern for HTML images: <img src="path"> or <img src='path'>
177+ html_pattern = re .compile (r'<img[^>]+src=["\']([^"\']+)["\']' , re .IGNORECASE )
178+ # Pattern for RST images: .. image:: path
179+ rst_pattern = re .compile (r'\.\.\s+image::\s*(\S+)' )
180+
181+ for cell in notebook .get ("cells" , []):
182+ cell_type = cell .get ("cell_type" )
183+ # Check markdown and raw cells (RST is often in raw cells)
184+ if cell_type in ("markdown" , "raw" ):
185+ source = cell .get ("source" , [])
186+ if isinstance (source , list ):
187+ source = "" .join (source )
188+
189+ # Try markdown syntax first
190+ match = md_pattern .search (source )
191+ if match :
192+ return match .group (1 )
193+
194+ # Try HTML syntax
195+ match = html_pattern .search (source )
196+ if match :
197+ return match .group (1 )
198+
199+ # Try RST syntax
200+ match = rst_pattern .search (source )
201+ if match :
202+ return match .group (1 )
203+
204+ return None
205+
206+
141207def _extract_description (notebook : dict ) -> str :
142208 """Extract description from first paragraph after title.
143209
0 commit comments