From 6a482bb254c050ebf217ede344d12bb12bf5bbd3 Mon Sep 17 00:00:00 2001 From: Egor Kraev Date: Tue, 20 Jan 2026 11:54:20 +0100 Subject: [PATCH] Add option for gslides markdown parser to raise on unsupported elements --- gslides_api/__init__.py | 1 + gslides_api/agnostic/markdown_parser.py | 84 +++++++++++++++----- gslides_api/element/shape.py | 5 ++ gslides_api/element/table.py | 6 ++ gslides_api/element/text_content.py | 6 +- gslides_api/markdown/from_markdown.py | 22 +++++- tests/test_markdown_features.py | 100 +++++++++++++++++++++++- 7 files changed, 203 insertions(+), 21 deletions(-) diff --git a/gslides_api/__init__.py b/gslides_api/__init__.py index 04daa67..ceae77c 100644 --- a/gslides_api/__init__.py +++ b/gslides_api/__init__.py @@ -1,4 +1,5 @@ from ._version import __version__, __version_info__ +from .agnostic.markdown_parser import UnsupportedMarkdownError from .client import GoogleAPIClient, initialize_credentials from .page.slide import Slide from .presentation import Presentation diff --git a/gslides_api/agnostic/markdown_parser.py b/gslides_api/agnostic/markdown_parser.py index 438bca5..fbd4714 100644 --- a/gslides_api/agnostic/markdown_parser.py +++ b/gslides_api/agnostic/markdown_parser.py @@ -10,6 +10,12 @@ import marko + +class UnsupportedMarkdownError(ValueError): + """Raised when markdown contains elements that cannot be converted to the target format.""" + + pass + from gslides_api.agnostic.ir import ( FormattedDocument, FormattedList, @@ -26,6 +32,7 @@ def parse_markdown_to_ir( markdown_text: str, base_style: Optional[FullTextStyle] = None, heading_style: Optional[FullTextStyle] = None, + strict: bool = True, ) -> FormattedDocument: """Parse markdown string into platform-agnostic intermediate representation. @@ -33,9 +40,15 @@ def parse_markdown_to_ir( markdown_text: The markdown text to parse base_style: Optional base style to apply to all text heading_style: Optional style to apply to headings + strict: If True (default), raises UnsupportedMarkdownError for unsupported + elements. If False, logs an error and skips unsupported elements. Returns: FormattedDocument containing the parsed and styled content + + Raises: + UnsupportedMarkdownError: When strict=True and unsupported markdown + elements are encountered (e.g., fenced code blocks, block quotes). """ base_style = base_style or FullTextStyle() @@ -47,7 +60,7 @@ def parse_markdown_to_ir( doc = marko.Markdown().parse(markdown_text) # Convert AST to IR - return _markdown_ast_to_ir(doc, base_style=base_style, heading_style=heading_style) + return _markdown_ast_to_ir(doc, base_style=base_style, heading_style=heading_style, strict=strict) def _markdown_ast_to_ir( @@ -55,6 +68,7 @@ def _markdown_ast_to_ir( base_style: Optional[FullTextStyle] = None, heading_style: Optional[FullTextStyle] = None, list_depth: int = 0, + strict: bool = True, ) -> FormattedDocument: """Convert marko AST to platform-agnostic IR. @@ -63,6 +77,8 @@ def _markdown_ast_to_ir( base_style: Base text style heading_style: Heading text style list_depth: Current nesting level for lists + strict: If True, raises UnsupportedMarkdownError for unsupported elements. + If False, logs an error and skips them. Returns: FormattedDocument with parsed content @@ -83,7 +99,7 @@ def _markdown_ast_to_ir( # Process each child of the document for child in markdown_ast.children: - elements = _process_ast_node(child, base_style, heading_style, list_depth) + elements = _process_ast_node(child, base_style, heading_style, list_depth, strict) document.elements.extend(elements) return document @@ -94,6 +110,7 @@ def _process_ast_node( base_style: FullTextStyle, heading_style: FullTextStyle, list_depth: int = 0, + strict: bool = True, ) -> list[FormattedParagraph | FormattedList]: """Process a single AST node and return IR elements. @@ -102,26 +119,34 @@ def _process_ast_node( base_style: Base text style heading_style: Heading text style list_depth: Current list nesting depth + strict: If True, raises UnsupportedMarkdownError for unsupported elements. + If False, logs an error and skips them. Returns: List of IR elements (paragraphs or lists) """ if isinstance(node, marko.block.Paragraph): - return [_process_paragraph(node, base_style, heading_style, list_depth)] + return [_process_paragraph(node, base_style, heading_style, list_depth, strict)] elif isinstance(node, marko.block.Heading): - return [_process_heading(node, heading_style, list_depth)] + return [_process_heading(node, heading_style, list_depth, strict)] elif isinstance(node, marko.block.List): - return [_process_list(node, base_style, heading_style, list_depth)] + return [_process_list(node, base_style, heading_style, list_depth, strict)] elif isinstance(node, marko.block.BlankLine): # Blank lines create empty paragraphs return [FormattedParagraph(runs=[])] else: - logger.warning(f"Unsupported block element: {type(node)}") - return [] + if strict: + raise UnsupportedMarkdownError( + f"Unsupported block element: {type(node).__name__}. " + f"Use strict=False to skip unsupported elements." + ) + else: + logger.error(f"Unsupported block element: {type(node)}, skipping") + return [] def _process_paragraph( @@ -129,6 +154,7 @@ def _process_paragraph( base_style: FullTextStyle, heading_style: FullTextStyle, list_depth: int = 0, + strict: bool = True, ) -> FormattedParagraph: """Process a paragraph node into a FormattedParagraph. @@ -137,13 +163,14 @@ def _process_paragraph( base_style: Base text style heading_style: Heading style list_depth: Current list depth + strict: If True, raises UnsupportedMarkdownError for unsupported elements. Returns: FormattedParagraph with styled text runs """ runs = [] for child in para.children: - runs.extend(_process_inline_node(child, base_style, heading_style, list_depth)) + runs.extend(_process_inline_node(child, base_style, heading_style, list_depth, strict)) return FormattedParagraph(runs=runs, is_heading=False) @@ -152,6 +179,7 @@ def _process_heading( heading: marko.block.Heading, heading_style: FullTextStyle, list_depth: int = 0, + strict: bool = True, ) -> FormattedParagraph: """Process a heading node into a FormattedParagraph with heading flag. @@ -159,13 +187,14 @@ def _process_heading( heading: Marko heading node heading_style: Heading text style list_depth: Current list depth + strict: If True, raises UnsupportedMarkdownError for unsupported elements. Returns: FormattedParagraph marked as heading """ runs = [] for child in heading.children: - runs.extend(_process_inline_node(child, heading_style, heading_style, list_depth)) + runs.extend(_process_inline_node(child, heading_style, heading_style, list_depth, strict)) return FormattedParagraph( runs=runs, @@ -179,6 +208,7 @@ def _process_list( base_style: FullTextStyle, heading_style: FullTextStyle, list_depth: int = 0, + strict: bool = True, ) -> FormattedList: """Process a list node into a FormattedList. @@ -187,6 +217,7 @@ def _process_list( base_style: Base text style heading_style: Heading style list_depth: Current list depth + strict: If True, raises UnsupportedMarkdownError for unsupported elements. Returns: FormattedList with list items @@ -195,7 +226,7 @@ def _process_list( for child in list_node.children: if isinstance(child, marko.block.ListItem): # _process_list_item returns a list (main item + nested items) - items.extend(_process_list_item(child, base_style, heading_style, list_depth)) + items.extend(_process_list_item(child, base_style, heading_style, list_depth, strict)) return FormattedList( items=items, @@ -209,6 +240,7 @@ def _process_list_item( base_style: FullTextStyle, heading_style: FullTextStyle, list_depth: int = 0, + strict: bool = True, ) -> list[FormattedListItem]: """Process a list item node into FormattedListItems. @@ -217,6 +249,7 @@ def _process_list_item( base_style: Base text style heading_style: Heading style list_depth: Current list depth + strict: If True, raises UnsupportedMarkdownError for unsupported elements. Returns: List of FormattedListItem objects - the main item plus any nested items @@ -226,13 +259,19 @@ def _process_list_item( for child in list_item.children: if isinstance(child, marko.block.Paragraph): - paragraphs.append(_process_paragraph(child, base_style, heading_style, list_depth + 1)) + paragraphs.append(_process_paragraph(child, base_style, heading_style, list_depth + 1, strict)) elif isinstance(child, marko.block.List): # Nested list - process and keep items with their correct nesting levels - nested_list = _process_list(child, base_style, heading_style, list_depth + 1) + nested_list = _process_list(child, base_style, heading_style, list_depth + 1, strict) nested_items.extend(nested_list.items) else: - logger.warning(f"Unsupported list item child: {type(child)}") + if strict: + raise UnsupportedMarkdownError( + f"Unsupported list item child: {type(child).__name__}. " + f"Use strict=False to skip unsupported elements." + ) + else: + logger.error(f"Unsupported list item child: {type(child)}, skipping") # Return the main item followed by any nested items result = [FormattedListItem( @@ -248,6 +287,7 @@ def _process_inline_node( base_style: FullTextStyle, heading_style: FullTextStyle, list_depth: int = 0, + strict: bool = True, ) -> list[FormattedTextRun]: """Process an inline node into text runs. @@ -256,6 +296,8 @@ def _process_inline_node( base_style: Base text style heading_style: Heading style list_depth: Current list depth + strict: If True, raises UnsupportedMarkdownError for unsupported elements. + If False, logs an error and skips them. Returns: List of FormattedTextRun objects @@ -277,7 +319,7 @@ def _process_inline_node( italic_style.markdown.italic = not italic_style.markdown.italic runs = [] for child in node.children: - runs.extend(_process_inline_node(child, italic_style, heading_style, list_depth)) + runs.extend(_process_inline_node(child, italic_style, heading_style, list_depth, strict)) return runs elif isinstance(node, marko.inline.StrongEmphasis): @@ -285,7 +327,7 @@ def _process_inline_node( bold_style.markdown.bold = True runs = [] for child in node.children: - runs.extend(_process_inline_node(child, bold_style, heading_style, list_depth)) + runs.extend(_process_inline_node(child, bold_style, heading_style, list_depth, strict)) return runs elif isinstance(node, marko.inline.Link): @@ -294,9 +336,15 @@ def _process_inline_node( link_style.rich.underline = True runs = [] for child in node.children: - runs.extend(_process_inline_node(child, link_style, heading_style, list_depth)) + runs.extend(_process_inline_node(child, link_style, heading_style, list_depth, strict)) return runs else: - logger.warning(f"Unsupported inline element: {type(node)}") - return [] + if strict: + raise UnsupportedMarkdownError( + f"Unsupported inline element: {type(node).__name__}. " + f"Use strict=False to skip unsupported elements." + ) + else: + logger.error(f"Unsupported inline element: {type(node)}, skipping") + return [] diff --git a/gslides_api/element/shape.py b/gslides_api/element/shape.py index fb3ef78..bd99d98 100644 --- a/gslides_api/element/shape.py +++ b/gslides_api/element/shape.py @@ -155,6 +155,7 @@ def write_text( overwrite: bool = True, autoscale: bool = False, api_client: Optional[GoogleAPIClient] = None, + strict: bool = True, ): """Write text to the shape, optionally parsing as markdown. @@ -167,6 +168,9 @@ def write_text( overwrite: If True, delete existing text before writing autoscale: If True, scale font size to fit text in the element api_client: Optional client to use for the API call + strict: If True (default), raises UnsupportedMarkdownError for unsupported + elements (e.g., fenced code blocks, block quotes). If False, logs an + error and skips unsupported elements. """ size_inches = self.absolute_size(OutputUnit.IN) if not self.shape.text: @@ -181,6 +185,7 @@ def write_text( overwrite=overwrite, autoscale=autoscale, size_inches=size_inches, + strict=strict, ) for r in requests: diff --git a/gslides_api/element/table.py b/gslides_api/element/table.py index 7d39c56..54c4847 100644 --- a/gslides_api/element/table.py +++ b/gslides_api/element/table.py @@ -125,6 +125,7 @@ def write_text_to_cell_requests( overwrite: bool = True, autoscale: bool = False, check_shape: bool = True, + strict: bool = True, ) -> List[GSlidesAPIRequest]: if isinstance(location, Sequence): location = TableCellLocation(rowIndex=location[0], columnIndex=location[1]) @@ -158,6 +159,7 @@ def write_text_to_cell_requests( overwrite=overwrite, autoscale=autoscale, size_inches=size_inches, + strict=strict, ) else: # Cell exists but has no text content (empty cell from API) @@ -171,6 +173,7 @@ def write_text_to_cell_requests( overwrite=overwrite, autoscale=autoscale, size_inches=size_inches, + strict=strict, ) else: # Table structure not populated yet (e.g., during creation from markdown) @@ -208,6 +211,7 @@ def write_text_to_cell_requests( overwrite=overwrite, autoscale=autoscale, size_inches=size_inches, + strict=strict, ) # Set objectId and cellLocation on all requests @@ -278,6 +282,7 @@ def write_text_to_cell( overwrite: bool = True, autoscale: bool = False, api_client: Optional[GoogleAPIClient] = None, + strict: bool = True, ) -> dict[str, Any] | None: requests = self.write_text_to_cell_requests( text=text, @@ -286,6 +291,7 @@ def write_text_to_cell( styles=styles, overwrite=overwrite, autoscale=autoscale, + strict=strict, ) if requests: client = api_client or default_api_client diff --git a/gslides_api/element/text_content.py b/gslides_api/element/text_content.py index b54db55..e39e29f 100644 --- a/gslides_api/element/text_content.py +++ b/gslides_api/element/text_content.py @@ -121,6 +121,7 @@ def write_text_requests( overwrite: bool = True, autoscale: bool = False, size_inches: Tuple[float, float] | None = None, + strict: bool = True, ): """Convert the text content to a list of requests to update the text in the element. @@ -133,6 +134,9 @@ def write_text_requests( overwrite: If True, delete existing text before writing autoscale: If True, scale font size to fit text in the element size_inches: Required if autoscale=True, the size of the element in inches + strict: If True (default), raises UnsupportedMarkdownError for unsupported + elements (e.g., fenced code blocks, block quotes). If False, logs an + error and skips unsupported elements. IMPORTANT: This does not set the objectId on the requests as the container doesn't know it, so the caller must set it before sending the requests, ditto for CellLocation if needed. @@ -159,7 +163,7 @@ def write_text_requests( style_args["heading_style"] = rich_style_to_gslides(styles[0]) style_args["base_style"] = rich_style_to_gslides(styles[1]) - requests += markdown_to_text_elements(text, **style_args) + requests += markdown_to_text_elements(text, strict=strict, **style_args) # TODO: this is broken, we should use different logic to just dump raw text, asterisks, hashes and all if not as_markdown: diff --git a/gslides_api/markdown/from_markdown.py b/gslides_api/markdown/from_markdown.py index a4d21b4..b1fe6cb 100644 --- a/gslides_api/markdown/from_markdown.py +++ b/gslides_api/markdown/from_markdown.py @@ -181,8 +181,28 @@ def markdown_to_text_elements( numbered_glyph_preset: Optional[ BulletGlyphPreset ] = BulletGlyphPreset.NUMBERED_DIGIT_ALPHA_ROMAN, + strict: bool = True, ) -> list[GSlidesAPIRequest]: + """Convert markdown text to Google Slides API requests. + Args: + markdown_text: The markdown text to convert + base_style: Base text style for normal text + heading_style: Style for headings + start_index: Starting index for text insertion + bullet_glyph_preset: Glyph preset for bullet lists + numbered_glyph_preset: Glyph preset for numbered lists + strict: If True (default), raises UnsupportedMarkdownError for unsupported + elements (e.g., fenced code blocks, block quotes). If False, logs an + error and skips unsupported elements. + + Returns: + List of Google Slides API requests + + Raises: + UnsupportedMarkdownError: When strict=True and unsupported markdown + elements are encountered. + """ heading_style = heading_style or copy.deepcopy(base_style) heading_style = heading_style or TextStyle() heading_style.bold = True @@ -193,7 +213,7 @@ def markdown_to_text_elements( agnostic_heading_style = gslides_style_to_full(heading_style) # Use platform-agnostic markdown parser - ir_doc = parse_markdown_to_ir(markdown_text, agnostic_base_style, agnostic_heading_style) + ir_doc = parse_markdown_to_ir(markdown_text, agnostic_base_style, agnostic_heading_style, strict) # Convert IR to GSlides TextElements (GSlides-specific logic) elements_and_bullets = _ir_to_text_elements(ir_doc, base_style) diff --git a/tests/test_markdown_features.py b/tests/test_markdown_features.py index 7f7db75..3d1f905 100644 --- a/tests/test_markdown_features.py +++ b/tests/test_markdown_features.py @@ -8,8 +8,9 @@ import pytest -from gslides_api.markdown.from_markdown import markdown_to_text_elements +from gslides_api.agnostic.markdown_parser import UnsupportedMarkdownError from gslides_api.domain.text import TextStyle +from gslides_api.markdown.from_markdown import markdown_to_text_elements class TestMarkdownFeatureSupport: @@ -181,4 +182,101 @@ def test_malformed_markdown(self): assert result is not None except Exception as e: print(f"Malformed markdown '{markdown}' caused: {e}") + + +class TestUnsupportedMarkdownElements: + """Test that unsupported markdown elements raise exceptions by default.""" + + def test_fenced_code_block_raises_by_default(self): + """Test that fenced code blocks raise UnsupportedMarkdownError by default.""" + markdown = """``` +code block +```""" + with pytest.raises(UnsupportedMarkdownError) as exc_info: + markdown_to_text_elements(markdown) + assert "FencedCode" in str(exc_info.value) + assert "strict=False" in str(exc_info.value) + + def test_fenced_code_block_skipped_in_lenient_mode(self): + """Test that fenced code blocks are skipped with strict=False.""" + markdown = """``` +code block +```""" + # Should not raise, but returns empty + result = markdown_to_text_elements(markdown, strict=False) + assert result is not None + # Empty because fenced code block is skipped + assert len(result) == 0 + + def test_unsupported_block_elements(self): + """Test all unsupported block element types raise exceptions.""" + unsupported_blocks = [ + # Fenced code block + ("```\ncode\n```", "FencedCode"), + # Block quote + ("> This is a quote", "Quote"), + # Horizontal rule + ("---", "ThematicBreak"), + # Indented code block + (" indented code", "CodeBlock"), + ] + + for markdown, expected_type in unsupported_blocks: + with pytest.raises(UnsupportedMarkdownError) as exc_info: + markdown_to_text_elements(markdown) + assert expected_type in str(exc_info.value), f"Expected {expected_type} in error message for: {markdown}" + + def test_unsupported_inline_elements(self): + """Test unsupported inline element types raise exceptions.""" + unsupported_inlines = [ + # Image + ("![alt text](image.png)", "Image"), + # AutoLink + ("", "AutoLink"), + ] + + for markdown, expected_type in unsupported_inlines: + with pytest.raises(UnsupportedMarkdownError) as exc_info: + markdown_to_text_elements(markdown) + assert expected_type in str(exc_info.value), f"Expected {expected_type} in error message for: {markdown}" + + def test_mixed_supported_and_unsupported_strict(self): + """Test that text with both supported and unsupported elements raises in strict mode.""" + markdown = """Some text +``` +code block +``` +More text""" + with pytest.raises(UnsupportedMarkdownError): + markdown_to_text_elements(markdown) + + def test_mixed_supported_and_unsupported_lenient(self): + """Test that supported elements are processed when unsupported ones are skipped.""" + markdown = """Some text +``` +code block +``` +More text""" + result = markdown_to_text_elements(markdown, strict=False) + assert result is not None + # Should have requests for "Some text" and "More text" paragraphs + assert len(result) > 0 + + def test_strict_parameter_propagates(self): + """Test that strict=False allows all unsupported elements to be skipped.""" + # Mix of multiple unsupported elements + markdown = """> Quote +--- +``` +code +``` +Normal text""" + # Should not raise in lenient mode + result = markdown_to_text_elements(markdown, strict=False) + assert result is not None + + def test_exception_is_exported(self): + """Test that UnsupportedMarkdownError can be imported from the package.""" + from gslides_api import UnsupportedMarkdownError as ImportedError + assert ImportedError is UnsupportedMarkdownError # Some malformed markdown might legitimately fail