Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions gslides_api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ._version import __version__, __version_info__
from .agnostic.markdown_parser import UnsupportedMarkdownError
from .client import GoogleAPIClient, initialize_credentials
from .page.slide import Slide
from .presentation import Presentation
84 changes: 66 additions & 18 deletions gslides_api/agnostic/markdown_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@

import marko


class UnsupportedMarkdownError(ValueError):
"""Raised when markdown contains elements that cannot be converted to the target format."""

pass

from gslides_api.agnostic.ir import (
FormattedDocument,
FormattedList,
Expand All @@ -26,16 +32,23 @@ def parse_markdown_to_ir(
markdown_text: str,
base_style: Optional[FullTextStyle] = None,
heading_style: Optional[FullTextStyle] = None,
strict: bool = True,
) -> FormattedDocument:
"""Parse markdown string into platform-agnostic intermediate representation.

Args:
markdown_text: The markdown text to parse
base_style: Optional base style to apply to all text
heading_style: Optional style to apply to headings
strict: If True (default), raises UnsupportedMarkdownError for unsupported
elements. If False, logs an error and skips unsupported elements.

Returns:
FormattedDocument containing the parsed and styled content

Raises:
UnsupportedMarkdownError: When strict=True and unsupported markdown
elements are encountered (e.g., fenced code blocks, block quotes).
"""
base_style = base_style or FullTextStyle()

Expand All @@ -47,14 +60,15 @@ def parse_markdown_to_ir(
doc = marko.Markdown().parse(markdown_text)

# Convert AST to IR
return _markdown_ast_to_ir(doc, base_style=base_style, heading_style=heading_style)
return _markdown_ast_to_ir(doc, base_style=base_style, heading_style=heading_style, strict=strict)


def _markdown_ast_to_ir(
markdown_ast: Any,
base_style: Optional[FullTextStyle] = None,
heading_style: Optional[FullTextStyle] = None,
list_depth: int = 0,
strict: bool = True,
) -> FormattedDocument:
"""Convert marko AST to platform-agnostic IR.

Expand All @@ -63,6 +77,8 @@ def _markdown_ast_to_ir(
base_style: Base text style
heading_style: Heading text style
list_depth: Current nesting level for lists
strict: If True, raises UnsupportedMarkdownError for unsupported elements.
If False, logs an error and skips them.

Returns:
FormattedDocument with parsed content
Expand All @@ -83,7 +99,7 @@ def _markdown_ast_to_ir(

# Process each child of the document
for child in markdown_ast.children:
elements = _process_ast_node(child, base_style, heading_style, list_depth)
elements = _process_ast_node(child, base_style, heading_style, list_depth, strict)
document.elements.extend(elements)

return document
Expand All @@ -94,6 +110,7 @@ def _process_ast_node(
base_style: FullTextStyle,
heading_style: FullTextStyle,
list_depth: int = 0,
strict: bool = True,
) -> list[FormattedParagraph | FormattedList]:
"""Process a single AST node and return IR elements.

Expand All @@ -102,33 +119,42 @@ def _process_ast_node(
base_style: Base text style
heading_style: Heading text style
list_depth: Current list nesting depth
strict: If True, raises UnsupportedMarkdownError for unsupported elements.
If False, logs an error and skips them.

Returns:
List of IR elements (paragraphs or lists)
"""
if isinstance(node, marko.block.Paragraph):
return [_process_paragraph(node, base_style, heading_style, list_depth)]
return [_process_paragraph(node, base_style, heading_style, list_depth, strict)]

elif isinstance(node, marko.block.Heading):
return [_process_heading(node, heading_style, list_depth)]
return [_process_heading(node, heading_style, list_depth, strict)]

elif isinstance(node, marko.block.List):
return [_process_list(node, base_style, heading_style, list_depth)]
return [_process_list(node, base_style, heading_style, list_depth, strict)]

elif isinstance(node, marko.block.BlankLine):
# Blank lines create empty paragraphs
return [FormattedParagraph(runs=[])]

else:
logger.warning(f"Unsupported block element: {type(node)}")
return []
if strict:
raise UnsupportedMarkdownError(
f"Unsupported block element: {type(node).__name__}. "
f"Use strict=False to skip unsupported elements."
)
else:
logger.error(f"Unsupported block element: {type(node)}, skipping")
return []


def _process_paragraph(
para: marko.block.Paragraph,
base_style: FullTextStyle,
heading_style: FullTextStyle,
list_depth: int = 0,
strict: bool = True,
) -> FormattedParagraph:
"""Process a paragraph node into a FormattedParagraph.

Expand All @@ -137,13 +163,14 @@ def _process_paragraph(
base_style: Base text style
heading_style: Heading style
list_depth: Current list depth
strict: If True, raises UnsupportedMarkdownError for unsupported elements.

Returns:
FormattedParagraph with styled text runs
"""
runs = []
for child in para.children:
runs.extend(_process_inline_node(child, base_style, heading_style, list_depth))
runs.extend(_process_inline_node(child, base_style, heading_style, list_depth, strict))

return FormattedParagraph(runs=runs, is_heading=False)

Expand All @@ -152,20 +179,22 @@ def _process_heading(
heading: marko.block.Heading,
heading_style: FullTextStyle,
list_depth: int = 0,
strict: bool = True,
) -> FormattedParagraph:
"""Process a heading node into a FormattedParagraph with heading flag.

Args:
heading: Marko heading node
heading_style: Heading text style
list_depth: Current list depth
strict: If True, raises UnsupportedMarkdownError for unsupported elements.

Returns:
FormattedParagraph marked as heading
"""
runs = []
for child in heading.children:
runs.extend(_process_inline_node(child, heading_style, heading_style, list_depth))
runs.extend(_process_inline_node(child, heading_style, heading_style, list_depth, strict))

return FormattedParagraph(
runs=runs,
Expand All @@ -179,6 +208,7 @@ def _process_list(
base_style: FullTextStyle,
heading_style: FullTextStyle,
list_depth: int = 0,
strict: bool = True,
) -> FormattedList:
"""Process a list node into a FormattedList.

Expand All @@ -187,6 +217,7 @@ def _process_list(
base_style: Base text style
heading_style: Heading style
list_depth: Current list depth
strict: If True, raises UnsupportedMarkdownError for unsupported elements.

Returns:
FormattedList with list items
Expand All @@ -195,7 +226,7 @@ def _process_list(
for child in list_node.children:
if isinstance(child, marko.block.ListItem):
# _process_list_item returns a list (main item + nested items)
items.extend(_process_list_item(child, base_style, heading_style, list_depth))
items.extend(_process_list_item(child, base_style, heading_style, list_depth, strict))

return FormattedList(
items=items,
Expand All @@ -209,6 +240,7 @@ def _process_list_item(
base_style: FullTextStyle,
heading_style: FullTextStyle,
list_depth: int = 0,
strict: bool = True,
) -> list[FormattedListItem]:
"""Process a list item node into FormattedListItems.

Expand All @@ -217,6 +249,7 @@ def _process_list_item(
base_style: Base text style
heading_style: Heading style
list_depth: Current list depth
strict: If True, raises UnsupportedMarkdownError for unsupported elements.

Returns:
List of FormattedListItem objects - the main item plus any nested items
Expand All @@ -226,13 +259,19 @@ def _process_list_item(

for child in list_item.children:
if isinstance(child, marko.block.Paragraph):
paragraphs.append(_process_paragraph(child, base_style, heading_style, list_depth + 1))
paragraphs.append(_process_paragraph(child, base_style, heading_style, list_depth + 1, strict))
elif isinstance(child, marko.block.List):
# Nested list - process and keep items with their correct nesting levels
nested_list = _process_list(child, base_style, heading_style, list_depth + 1)
nested_list = _process_list(child, base_style, heading_style, list_depth + 1, strict)
nested_items.extend(nested_list.items)
else:
logger.warning(f"Unsupported list item child: {type(child)}")
if strict:
raise UnsupportedMarkdownError(
f"Unsupported list item child: {type(child).__name__}. "
f"Use strict=False to skip unsupported elements."
)
else:
logger.error(f"Unsupported list item child: {type(child)}, skipping")

# Return the main item followed by any nested items
result = [FormattedListItem(
Expand All @@ -248,6 +287,7 @@ def _process_inline_node(
base_style: FullTextStyle,
heading_style: FullTextStyle,
list_depth: int = 0,
strict: bool = True,
) -> list[FormattedTextRun]:
"""Process an inline node into text runs.

Expand All @@ -256,6 +296,8 @@ def _process_inline_node(
base_style: Base text style
heading_style: Heading style
list_depth: Current list depth
strict: If True, raises UnsupportedMarkdownError for unsupported elements.
If False, logs an error and skips them.

Returns:
List of FormattedTextRun objects
Expand All @@ -277,15 +319,15 @@ def _process_inline_node(
italic_style.markdown.italic = not italic_style.markdown.italic
runs = []
for child in node.children:
runs.extend(_process_inline_node(child, italic_style, heading_style, list_depth))
runs.extend(_process_inline_node(child, italic_style, heading_style, list_depth, strict))
return runs

elif isinstance(node, marko.inline.StrongEmphasis):
bold_style = copy.deepcopy(base_style)
bold_style.markdown.bold = True
runs = []
for child in node.children:
runs.extend(_process_inline_node(child, bold_style, heading_style, list_depth))
runs.extend(_process_inline_node(child, bold_style, heading_style, list_depth, strict))
return runs

elif isinstance(node, marko.inline.Link):
Expand All @@ -294,9 +336,15 @@ def _process_inline_node(
link_style.rich.underline = True
runs = []
for child in node.children:
runs.extend(_process_inline_node(child, link_style, heading_style, list_depth))
runs.extend(_process_inline_node(child, link_style, heading_style, list_depth, strict))
return runs

else:
logger.warning(f"Unsupported inline element: {type(node)}")
return []
if strict:
raise UnsupportedMarkdownError(
f"Unsupported inline element: {type(node).__name__}. "
f"Use strict=False to skip unsupported elements."
)
else:
logger.error(f"Unsupported inline element: {type(node)}, skipping")
return []
5 changes: 5 additions & 0 deletions gslides_api/element/shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def write_text(
overwrite: bool = True,
autoscale: bool = False,
api_client: Optional[GoogleAPIClient] = None,
strict: bool = True,
):
"""Write text to the shape, optionally parsing as markdown.

Expand All @@ -167,6 +168,9 @@ def write_text(
overwrite: If True, delete existing text before writing
autoscale: If True, scale font size to fit text in the element
api_client: Optional client to use for the API call
strict: If True (default), raises UnsupportedMarkdownError for unsupported
elements (e.g., fenced code blocks, block quotes). If False, logs an
error and skips unsupported elements.
"""
size_inches = self.absolute_size(OutputUnit.IN)
if not self.shape.text:
Expand All @@ -181,6 +185,7 @@ def write_text(
overwrite=overwrite,
autoscale=autoscale,
size_inches=size_inches,
strict=strict,
)

for r in requests:
Expand Down
6 changes: 6 additions & 0 deletions gslides_api/element/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def write_text_to_cell_requests(
overwrite: bool = True,
autoscale: bool = False,
check_shape: bool = True,
strict: bool = True,
font_scale_factor: float = 1.0,
template_styles: List[RichStyle] | None = None,
) -> List[GSlidesAPIRequest]:
Expand Down Expand Up @@ -194,6 +195,7 @@ def write_text_to_cell_requests(
overwrite=overwrite,
autoscale=autoscale,
size_inches=size_inches,
strict=strict,
)
else:
# Cell exists but has no text content (empty cell from API)
Expand All @@ -207,6 +209,7 @@ def write_text_to_cell_requests(
overwrite=overwrite,
autoscale=autoscale,
size_inches=size_inches,
strict=strict,
)
else:
# Table structure not populated yet (e.g., during creation from markdown)
Expand Down Expand Up @@ -259,6 +262,7 @@ def write_text_to_cell_requests(
overwrite=overwrite,
autoscale=autoscale,
size_inches=size_inches,
strict=strict,
)

# Set objectId and cellLocation on all requests
Expand Down Expand Up @@ -356,6 +360,7 @@ def write_text_to_cell(
overwrite: bool = True,
autoscale: bool = False,
api_client: Optional[GoogleAPIClient] = None,
strict: bool = True,
) -> dict[str, Any] | None:
requests = self.write_text_to_cell_requests(
text=text,
Expand All @@ -364,6 +369,7 @@ def write_text_to_cell(
styles=styles,
overwrite=overwrite,
autoscale=autoscale,
strict=strict,
)
if requests:
client = api_client or default_api_client
Expand Down
6 changes: 5 additions & 1 deletion gslides_api/element/text_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def write_text_requests(
overwrite: bool = True,
autoscale: bool = False,
size_inches: Tuple[float, float] | None = None,
strict: bool = True,
):
"""Convert the text content to a list of requests to update the text in the element.

Expand All @@ -133,6 +134,9 @@ def write_text_requests(
overwrite: If True, delete existing text before writing
autoscale: If True, scale font size to fit text in the element
size_inches: Required if autoscale=True, the size of the element in inches
strict: If True (default), raises UnsupportedMarkdownError for unsupported
elements (e.g., fenced code blocks, block quotes). If False, logs an
error and skips unsupported elements.

IMPORTANT: This does not set the objectId on the requests as the container doesn't know it,
so the caller must set it before sending the requests, ditto for CellLocation if needed.
Expand All @@ -159,7 +163,7 @@ def write_text_requests(
style_args["heading_style"] = rich_style_to_gslides(styles[0])
style_args["base_style"] = rich_style_to_gslides(styles[1])

requests += markdown_to_text_elements(text, **style_args)
requests += markdown_to_text_elements(text, strict=strict, **style_args)

# TODO: this is broken, we should use different logic to just dump raw text, asterisks, hashes and all
if not as_markdown:
Expand Down
Loading