Skip to content

Commit c1455ed

Browse files
committed
rss-bot: Add option to convert body to Markdown
1 parent 1ae6dbe commit c1455ed

3 files changed

Lines changed: 23 additions & 1 deletion

File tree

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ module = [
6868
"google_auth_oauthlib.*",
6969
"googleapiclient.*",
7070
"irc.*",
71+
"markdownify.*",
7172
"mercurial.*",
7273
"nio.*",
7374
"oauth2client.*",
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
feedparser>=6.0.10
2+
markdownify>=0.11.6

zulip/integrations/rss/rss-bot

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@ import re
1313
import sys
1414
import time
1515
import urllib.parse
16+
from collections.abc import Callable
1617
from html.parser import HTMLParser
1718
from typing import Any, Dict, List
1819

1920
import feedparser
21+
from markdownify import markdownify
2022
from typing_extensions import override
2123

2224
import zulip
@@ -107,6 +109,19 @@ parser.add_argument(
107109
default=30,
108110
action="store",
109111
)
112+
body = parser.add_mutually_exclusive_group()
113+
body.add_argument(
114+
"--strip",
115+
dest="strip",
116+
action="store_true",
117+
help="Strip HTML tags from body",
118+
)
119+
body.add_argument(
120+
"--markdownify",
121+
dest="strip",
122+
action="store_false",
123+
help="Convert body from HTML to Markdown",
124+
)
110125

111126
opts = parser.parse_args()
112127

@@ -198,7 +213,12 @@ def send_zulip(entry: Any, feed_name: str) -> Dict[str, Any]:
198213
body = unwrap_text(body)
199214

200215
title = f"**[{entry.title}]({entry.link})**\n" if hasattr(entry, "title") else ""
201-
content = f"{title}{strip_tags(body)}\n{entry.link}"
216+
217+
def md(html: str) -> str:
218+
return markdownify(html, escape_underscores=False)
219+
220+
convert: Callable[[str], str] = strip_tags if opts.strip else md
221+
content = f"{title}{convert(body)}\n{entry.link}"
202222

203223
if opts.math:
204224
content = content.replace("$", "$$")

0 commit comments

Comments
 (0)