From f33a11e10f1206256612868dead62aab130d610b Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Wed, 31 Jul 2024 15:17:57 +0100 Subject: [PATCH 01/36] Linaro changes for JSON files consumed with React --- README.rst | 23 ++------ pyproject.toml | 2 +- sphinxcontrib/serializinghtml/__init__.py | 20 +++++-- .../serializinghtml/nav_html_to_json.py | 54 +++++++++++++++++++ 4 files changed, 75 insertions(+), 24 deletions(-) create mode 100644 sphinxcontrib/serializinghtml/nav_html_to_json.py diff --git a/README.rst b/README.rst index d9ed40a..78eb63a 100644 --- a/README.rst +++ b/README.rst @@ -1,22 +1,5 @@ -============================= -sphinxcontrib-serializinghtml -============================= +This is a fork of https://github.com/sphinx-doc/sphinxcontrib-serializinghtml -sphinxcontrib-serializinghtml is a sphinx extension which outputs -"serialized" HTML files (json and pickle). +Changes made to this fork are to facilitate the creation of JSON files suitable for consumption by React. -For more details, please visit http://www.sphinx-doc.org/. - -Installing -========== - -Install from PyPI:: - - pip install -U sphinxcontrib-serializinghtml - -Contributing -============ - -See `CONTRIBUTING.rst`__ - -.. __: https://github.com/sphinx-doc/sphinx/blob/master/CONTRIBUTING.rst +Since those changes are very specific, they have not been contributed back to the original repo. diff --git a/pyproject.toml b/pyproject.toml index f14054e..a8a4329 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ classifiers = [ "Topic :: Text Processing", "Topic :: Utilities", ] -dependencies = [] +dependencies = ["beautifulsoup4"] dynamic = ["version"] [project.optional-dependencies] diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index bdbeb6f..d6e83ab 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -11,7 +11,7 @@ from sphinx.locale import get_translation from sphinx.util.osutil import SEP, copyfile, ensuredir, os_path -from sphinxcontrib.serializinghtml import jsonimpl +from sphinxcontrib.serializinghtml import jsonimpl, nav_html_to_json if TYPE_CHECKING: from collections.abc import Sequence @@ -91,9 +91,23 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p ctx.setdefault('pathto', lambda p: p) self.add_sidebars(pagename, ctx) + # Add the toc tree as a JSON dictionary + ctx['toctree'] = nav_html_to_json.convert_nav_html_to_json(self._get_local_toctree(pagename)) + if not outfilename: + # PJC: Ensure that index files are actually written under the name of the + # directory leafname. + parts = pagename.split(SEP) + if parts[len(parts)-1] == "index": + if len(parts) == 1: + # Use the project name + page_filename = self.get_builder_config('project_name', 'html') + else: + page_filename = SEP.join(parts[:-1]) + else: + page_filename = pagename outfilename = path.join(self.outdir, - os_path(pagename) + self.out_suffix) + os_path(page_filename) + self.out_suffix) # we're not taking the return value here, since no template is # actually rendered @@ -161,7 +175,7 @@ class JSONHTMLBuilder(SerializingHTMLBuilder): implementation_dumps_unicode = True indexer_format = jsonimpl indexer_dumps_unicode = True - out_suffix = '.fjson' + out_suffix = '.json' globalcontext_filename = 'globalcontext.json' searchindex_filename = 'searchindex.json' diff --git a/sphinxcontrib/serializinghtml/nav_html_to_json.py b/sphinxcontrib/serializinghtml/nav_html_to_json.py new file mode 100644 index 0000000..ff9b3d3 --- /dev/null +++ b/sphinxcontrib/serializinghtml/nav_html_to_json.py @@ -0,0 +1,54 @@ +from bs4 import BeautifulSoup, element +import json + +def section_links(parent_entry: element.Tag, list_entry: element.Tag) -> dict: + section_result = [] + for child in list_entry.children: + if type(child) is element.Tag and child.name == "li": + section_result.append(convert_tag_to_link(child)) + return { + "type": "expandable-link-group", + "text": parent_entry.contents[0].contents[0], + "href": parent_entry.contents[0]["href"], + "items": section_result + } + + +def convert_tag_to_link(item_entry: element.Tag) -> dict: + # The a tag is a child of the li tag + a_tag = item_entry.contents[0] + return { + "type": "link", + "text": a_tag.contents[0], + "href": a_tag["href"] + } + +def convert_nav_html_to_json(html: str) -> list: + result = [] + soup = BeautifulSoup(html, "html.parser") + + # Start with the unordered list + ul = soup.ul + pending_divider = False + # Iterate through list items + for child in ul.children: + if type(child) is element.Tag and child.name == "li": + # Is there a new unordered list within this section? + section = child.find_all("ul", limit=1) + if section != []: + # Yes, there is, so we have a sub-section. If we've got some content + # already, add a divider. + if result != []: + result.append({ "type": "divider" }) + # Now append the current page and the section links. The + # ul tag is the only child returned, hence [0] + result.append(section_links(child, section[0])) + # If there are any "normal" entries after this section + # add a divider first + pending_divider = True + else: + if pending_divider: + result.append({ "type": "divider" }) + pending_divider = False + result.append(convert_tag_to_link(child)) + return result From d2ea55f51560b399a7357e1994735868b5c17154 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Wed, 31 Jul 2024 15:25:56 +0100 Subject: [PATCH 02/36] Update current_page_name to lose "/index" --- sphinxcontrib/serializinghtml/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index d6e83ab..747b72d 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -104,6 +104,7 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p page_filename = self.get_builder_config('project_name', 'html') else: page_filename = SEP.join(parts[:-1]) + ctx['current_page_name'] = page_filename else: page_filename = pagename outfilename = path.join(self.outdir, From 28e7ebdb55aa9ebff75c12b8ed98e8069a11d3dc Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Mon, 5 Aug 2024 08:45:04 +0100 Subject: [PATCH 03/36] Add dir/path support --- sphinxcontrib/serializinghtml/__init__.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index 747b72d..d037db4 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -55,7 +55,24 @@ class SerializingHTMLBuilder(StandaloneHTMLBuilder): def init(self) -> None: self.build_info = BuildInfo(self.config, self.tags) - self.imagedir = '_images' + # Cope with whether or not Sphinx has the required configuration variables + # set. + # See HTML Builder comments for explanation of image setup & handling + html_image_dir = None + try: + html_image_dir = self.get_builder_config('image_dir', 'html') + except AttributeError: + pass + if html_image_dir is not None: + self.imagedir = html_image_dir + else: + self.imagedir = '_images' + html_image_path = None + try: + html_image_path = self.get_builder_config('image_path', 'html') + except AttributeError: + pass + self.imagepath = html_image_path self.current_docname = '' self.theme = None # type: ignore[assignment] # no theme necessary self.templates = None # no template bridge necessary From b2d90cbfe0e1b6ca8a67bce8a58399fcd11c8ebf Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Mon, 5 Aug 2024 09:35:14 +0100 Subject: [PATCH 04/36] Add Linaro to version string --- sphinxcontrib/serializinghtml/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index d037db4..06dc166 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0' +__version__ = '2.0.0+Linaro' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) From 62d87c1b89fad89034cb53e9d32ea40042884f59 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Tue, 6 Aug 2024 14:47:12 +0100 Subject: [PATCH 05/36] Make URIs absolute for Solutions Hub --- sphinxcontrib/serializinghtml/__init__.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index 06dc166..8b3497e 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro' +__version__ = '2.0.0+Linaro-240806' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) @@ -83,11 +83,16 @@ def init(self) -> None: self.use_index = self.get_builder_config('use_index', 'html') def get_target_uri(self, docname: str, typ: str | None = None) -> str: + print(f"get_target_uri: {docname}") + # For the Solutions Hub, we want all URIs to be absolute. They will + # all be /library// except that "index" will + # get trimmed off + project_name = self.get_builder_config('project_name', 'html') if docname == 'index': - return '' + return f"/library/{project_name}/{project_name}" if docname.endswith(SEP + 'index'): - return docname[:-5] # up to sep - return docname + SEP + return f"/library/{project_name}/{docname[:-5]}" # up to sep + return f"/library/{project_name}/{docname}" def dump_context(self, context: dict[str, Any], filename: str | os.PathLike[str]) -> None: context = context.copy() From e8a198143c11a37f842c085d3cb0c91e33e7ff2a Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Tue, 6 Aug 2024 14:51:31 +0100 Subject: [PATCH 06/36] Remove print statement --- sphinxcontrib/serializinghtml/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index 8b3497e..622d12d 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -83,7 +83,6 @@ def init(self) -> None: self.use_index = self.get_builder_config('use_index', 'html') def get_target_uri(self, docname: str, typ: str | None = None) -> str: - print(f"get_target_uri: {docname}") # For the Solutions Hub, we want all URIs to be absolute. They will # all be /library// except that "index" will # get trimmed off From 11ea0ae28bf6d10be30559a54cbfcd2f8671de5a Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Tue, 6 Aug 2024 14:57:32 +0100 Subject: [PATCH 07/36] Trying another URI approach --- sphinxcontrib/serializinghtml/__init__.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index 622d12d..a58bef0 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -83,15 +83,11 @@ def init(self) -> None: self.use_index = self.get_builder_config('use_index', 'html') def get_target_uri(self, docname: str, typ: str | None = None) -> str: - # For the Solutions Hub, we want all URIs to be absolute. They will - # all be /library// except that "index" will - # get trimmed off - project_name = self.get_builder_config('project_name', 'html') if docname == 'index': - return f"/library/{project_name}/{project_name}" + return "/" if docname.endswith(SEP + 'index'): - return f"/library/{project_name}/{docname[:-5]}" # up to sep - return f"/library/{project_name}/{docname}" + return f"/{docname[:-5]}" # up to sep + return f"/{docname}" def dump_context(self, context: dict[str, Any], filename: str | os.PathLike[str]) -> None: context = context.copy() From d5e8f30dfe996aff47485d90ec1edcabf50d406b Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Tue, 6 Aug 2024 15:06:33 +0100 Subject: [PATCH 08/36] Completely revert URI changes. Didn't work. --- sphinxcontrib/serializinghtml/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index a58bef0..f037d4b 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -84,10 +84,10 @@ def init(self) -> None: def get_target_uri(self, docname: str, typ: str | None = None) -> str: if docname == 'index': - return "/" + return "" if docname.endswith(SEP + 'index'): - return f"/{docname[:-5]}" # up to sep - return f"/{docname}" + return docname[:-5] # up to sep + return docname def dump_context(self, context: dict[str, Any], filename: str | os.PathLike[str]) -> None: context = context.copy() From 559f8d49ec78d3bb5a21fe56a28bb9b5e3479293 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Tue, 6 Aug 2024 16:13:49 +0100 Subject: [PATCH 09/36] Clean up navigation hrefs --- sphinxcontrib/serializinghtml/nav_html_to_json.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sphinxcontrib/serializinghtml/nav_html_to_json.py b/sphinxcontrib/serializinghtml/nav_html_to_json.py index ff9b3d3..9cbf8af 100644 --- a/sphinxcontrib/serializinghtml/nav_html_to_json.py +++ b/sphinxcontrib/serializinghtml/nav_html_to_json.py @@ -1,6 +1,14 @@ from bs4 import BeautifulSoup, element import json +def clean_href(href: str) -> str: + """ Make sure the href doesn't start or end with a / """ + if href[0] == "/": + href = href[1:] + if href[-1] == "/": + href = href[:-1] + return href + def section_links(parent_entry: element.Tag, list_entry: element.Tag) -> dict: section_result = [] for child in list_entry.children: @@ -9,7 +17,7 @@ def section_links(parent_entry: element.Tag, list_entry: element.Tag) -> dict: return { "type": "expandable-link-group", "text": parent_entry.contents[0].contents[0], - "href": parent_entry.contents[0]["href"], + "href": clean_href(parent_entry.contents[0]["href"]), "items": section_result } @@ -20,7 +28,7 @@ def convert_tag_to_link(item_entry: element.Tag) -> dict: return { "type": "link", "text": a_tag.contents[0], - "href": a_tag["href"] + "href": clean_href(a_tag["href"]) } def convert_nav_html_to_json(html: str) -> list: From 6ec2de085a2cf74338063cde1ab859bd61201fe3 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Wed, 7 Aug 2024 15:22:21 +0100 Subject: [PATCH 10/36] Fix UL bug and bump date --- sphinxcontrib/serializinghtml/__init__.py | 2 +- .../serializinghtml/nav_html_to_json.py | 41 ++++++++++--------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index f037d4b..95ef5d1 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro-240806' +__version__ = '2.0.0+Linaro-240807' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) diff --git a/sphinxcontrib/serializinghtml/nav_html_to_json.py b/sphinxcontrib/serializinghtml/nav_html_to_json.py index 9cbf8af..98a92a0 100644 --- a/sphinxcontrib/serializinghtml/nav_html_to_json.py +++ b/sphinxcontrib/serializinghtml/nav_html_to_json.py @@ -39,24 +39,25 @@ def convert_nav_html_to_json(html: str) -> list: ul = soup.ul pending_divider = False # Iterate through list items - for child in ul.children: - if type(child) is element.Tag and child.name == "li": - # Is there a new unordered list within this section? - section = child.find_all("ul", limit=1) - if section != []: - # Yes, there is, so we have a sub-section. If we've got some content - # already, add a divider. - if result != []: - result.append({ "type": "divider" }) - # Now append the current page and the section links. The - # ul tag is the only child returned, hence [0] - result.append(section_links(child, section[0])) - # If there are any "normal" entries after this section - # add a divider first - pending_divider = True - else: - if pending_divider: - result.append({ "type": "divider" }) - pending_divider = False - result.append(convert_tag_to_link(child)) + if ul is not None: + for child in ul.children: + if type(child) is element.Tag and child.name == "li": + # Is there a new unordered list within this section? + section = child.find_all("ul", limit=1) + if section != []: + # Yes, there is, so we have a sub-section. If we've got some content + # already, add a divider. + if result != []: + result.append({ "type": "divider" }) + # Now append the current page and the section links. The + # ul tag is the only child returned, hence [0] + result.append(section_links(child, section[0])) + # If there are any "normal" entries after this section + # add a divider first + pending_divider = True + else: + if pending_divider: + result.append({ "type": "divider" }) + pending_divider = False + result.append(convert_tag_to_link(child)) return result From 73c32d51b69a1156ad6eb158c1753f8475c46587 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Wed, 4 Sep 2024 09:19:50 +0100 Subject: [PATCH 11/36] Handle TOCs with multiple UL sections --- sphinxcontrib/serializinghtml/__init__.py | 2 +- sphinxcontrib/serializinghtml/nav_html_to_json.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index 95ef5d1..373af65 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro-240807' +__version__ = '2.0.0+Linaro-240904' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) diff --git a/sphinxcontrib/serializinghtml/nav_html_to_json.py b/sphinxcontrib/serializinghtml/nav_html_to_json.py index 98a92a0..b212c78 100644 --- a/sphinxcontrib/serializinghtml/nav_html_to_json.py +++ b/sphinxcontrib/serializinghtml/nav_html_to_json.py @@ -1,5 +1,6 @@ from bs4 import BeautifulSoup, element import json +import sys def clean_href(href: str) -> str: """ Make sure the href doesn't start or end with a / """ @@ -39,7 +40,7 @@ def convert_nav_html_to_json(html: str) -> list: ul = soup.ul pending_divider = False # Iterate through list items - if ul is not None: + while ul is not None: for child in ul.children: if type(child) is element.Tag and child.name == "li": # Is there a new unordered list within this section? @@ -60,4 +61,9 @@ def convert_nav_html_to_json(html: str) -> list: result.append({ "type": "divider" }) pending_divider = False result.append(convert_tag_to_link(child)) + while True: + ul = ul.next_sibling + if ul is None or type(ul) is element.Tag: + break + # Not an acceptable type - loop and get the next sibling return result From 766614f02af0c2b4921ae2bfa7d2666a098711cc Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Wed, 4 Sep 2024 14:44:53 +0100 Subject: [PATCH 12/36] Implement fix for entities in img alt text --- sphinxcontrib/serializinghtml/__init__.py | 11 +++++++++-- .../{nav_html_to_json.py => html_assists.py} | 16 +++++++++++++++- 2 files changed, 24 insertions(+), 3 deletions(-) rename sphinxcontrib/serializinghtml/{nav_html_to_json.py => html_assists.py} (81%) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index 373af65..72537a1 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -11,7 +11,7 @@ from sphinx.locale import get_translation from sphinx.util.osutil import SEP, copyfile, ensuredir, os_path -from sphinxcontrib.serializinghtml import jsonimpl, nav_html_to_json +from sphinxcontrib.serializinghtml import html_assists, jsonimpl if TYPE_CHECKING: from collections.abc import Sequence @@ -109,7 +109,7 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p self.add_sidebars(pagename, ctx) # Add the toc tree as a JSON dictionary - ctx['toctree'] = nav_html_to_json.convert_nav_html_to_json(self._get_local_toctree(pagename)) + ctx['toctree'] = html_assists.convert_nav_html_to_json(self._get_local_toctree(pagename)) if not outfilename: # PJC: Ensure that index files are actually written under the name of the @@ -136,6 +136,13 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p if isinstance(ctx[key], types.FunctionType): del ctx[key] + # PJC: Some Linaro documentation has encoded attributes in image ALT text + # which then gets decoded when the HTML is loaded into the DOM, so + # we need to alter it by "escaping" the ampersands with & to + # prevent the decoding. + if "body" in ctx: + ctx['body'] = html_assists.escape_encoded_alt_text(ctx['body']) + ensuredir(path.dirname(outfilename)) self.dump_context(ctx, outfilename) diff --git a/sphinxcontrib/serializinghtml/nav_html_to_json.py b/sphinxcontrib/serializinghtml/html_assists.py similarity index 81% rename from sphinxcontrib/serializinghtml/nav_html_to_json.py rename to sphinxcontrib/serializinghtml/html_assists.py index b212c78..3679940 100644 --- a/sphinxcontrib/serializinghtml/nav_html_to_json.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -1,6 +1,6 @@ from bs4 import BeautifulSoup, element -import json import sys +from html import escape def clean_href(href: str) -> str: """ Make sure the href doesn't start or end with a / """ @@ -32,6 +32,20 @@ def convert_tag_to_link(item_entry: element.Tag) -> dict: "href": clean_href(a_tag["href"]) } +def escape_encoded_alt_text(html: str) -> str: + soup = BeautifulSoup(html, "html.parser") + images = soup.find_all('img') + for img in images: + if img['alt'] != "": + # At this point, Beautiful Soup has done what a browser does - decode + # any encoded attributes. So we need to re-encode the string, see if + # there are any ampersands and, if so, re-encode them again. + interim = escape(img['alt']) + if interim.find("&") != -1: + img['alt'] = escape(interim) + + return html + def convert_nav_html_to_json(html: str) -> list: result = [] soup = BeautifulSoup(html, "html.parser") From 2e0a128df58163634e27e3215cebb861f590d2c7 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Wed, 4 Sep 2024 14:45:13 +0100 Subject: [PATCH 13/36] Bump version --- sphinxcontrib/serializinghtml/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index 72537a1..a973d72 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro-240904' +__version__ = '2.0.0+Linaro-240904a' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) From 6e35e86a12719a9bc757802fb96dc4eccc4aa249 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Wed, 4 Sep 2024 14:57:46 +0100 Subject: [PATCH 14/36] Fix bug around editing img alt text --- sphinxcontrib/serializinghtml/html_assists.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py index 3679940..e3bcf49 100644 --- a/sphinxcontrib/serializinghtml/html_assists.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -33,6 +33,7 @@ def convert_tag_to_link(item_entry: element.Tag) -> dict: } def escape_encoded_alt_text(html: str) -> str: + edited = False soup = BeautifulSoup(html, "html.parser") images = soup.find_all('img') for img in images: @@ -43,7 +44,10 @@ def escape_encoded_alt_text(html: str) -> str: interim = escape(img['alt']) if interim.find("&") != -1: img['alt'] = escape(interim) + edited = True + if edited: + html = str(soup) return html def convert_nav_html_to_json(html: str) -> list: From 4585595ce5a8ef872601a3c5abb22b4bb9956d33 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Wed, 4 Sep 2024 15:03:05 +0100 Subject: [PATCH 15/36] Bump version --- sphinxcontrib/serializinghtml/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index a973d72..b360030 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro-240904a' +__version__ = '2.0.0+Linaro-240904b' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) From 9445c448e5132a60c44cdaa53972dc4addedbe7c Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Thu, 24 Oct 2024 12:38:29 +0100 Subject: [PATCH 16/36] Fix encoded attributes in pre sections --- sphinxcontrib/serializinghtml/__init__.py | 15 ++- sphinxcontrib/serializinghtml/html_assists.py | 98 ++++++++++++------- 2 files changed, 74 insertions(+), 39 deletions(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index b360030..bc83403 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro-240904b' +__version__ = '2.0.0+Linaro-241024' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) @@ -104,6 +104,7 @@ def dump_context(self, context: dict[str, Any], filename: str | os.PathLike[str] def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'page.html', outfilename: str | None = None, event_arg: Any = None) -> None: + print(f"handle_page: {pagename}") ctx['current_page_name'] = pagename ctx.setdefault('pathto', lambda p: p) self.add_sidebars(pagename, ctx) @@ -136,12 +137,16 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p if isinstance(ctx[key], types.FunctionType): del ctx[key] - # PJC: Some Linaro documentation has encoded attributes in image ALT text - # which then gets decoded when the HTML is loaded into the DOM, so - # we need to alter it by "escaping" the ampersands with & to - # prevent the decoding. if "body" in ctx: + # PJC: Some Linaro documentation has encoded attributes in image ALT text + # which then gets decoded when the HTML is loaded into the DOM, so + # we need to alter it by "escaping" the ampersands with & to + # prevent the decoding. ctx['body'] = html_assists.escape_encoded_alt_text(ctx['body']) + # PJC: Furthermore, if there is any formatted code with encoded attributes, + # e.g. < changed to < then that also needs to be escaped because it is + # also getting decoded. + ctx['body'] = html_assists.escape_encoded_pre_text(ctx['body']) ensuredir(path.dirname(outfilename)) self.dump_context(ctx, outfilename) diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py index e3bcf49..b1e6204 100644 --- a/sphinxcontrib/serializinghtml/html_assists.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -22,7 +22,6 @@ def section_links(parent_entry: element.Tag, list_entry: element.Tag) -> dict: "items": section_result } - def convert_tag_to_link(item_entry: element.Tag) -> dict: # The a tag is a child of the li tag a_tag = item_entry.contents[0] @@ -32,6 +31,48 @@ def convert_tag_to_link(item_entry: element.Tag) -> dict: "href": clean_href(a_tag["href"]) } +def process_section(result, child, section, pending_divider) -> bool: + if section != []: + # Yes, there is, so we have a sub-section. If we've got some content + # already, add a divider. + if result != []: + result.append({ "type": "divider" }) + # Now append the current page and the section links. The + # ul tag is the only child returned, hence [0] + result.append(section_links(child, section[0])) + # If there are any "normal" entries after this section + # add a divider first + pending_divider = True + else: + if pending_divider: + result.append({ "type": "divider" }) + pending_divider = False + result.append(convert_tag_to_link(child)) + +def process_ul_children(result, ul): + pending_divider = False + for child in ul.children: + if type(child) is element.Tag and child.name == "li": + # Is there a new unordered list within this section? + section = child.find_all("ul", limit=1) + pending_divider = process_section(result, child, section, pending_divider) + +def convert_nav_html_to_json(html: str) -> list: + result = [] + soup = BeautifulSoup(html, "html.parser") + + # Start with the unordered list + ul = soup.ul + # Iterate through list items + while ul is not None: + process_ul_children(result, ul) + while True: + ul = ul.next_sibling + if ul is None or type(ul) is element.Tag: + break + # Not an acceptable type - loop and get the next sibling + return result + def escape_encoded_alt_text(html: str) -> str: edited = False soup = BeautifulSoup(html, "html.parser") @@ -50,38 +91,27 @@ def escape_encoded_alt_text(html: str) -> str: html = str(soup) return html -def convert_nav_html_to_json(html: str) -> list: - result = [] +def escape_encoded_pre_text(html: str) -> str: + print("escape_encoded_pre_text") + edited = False soup = BeautifulSoup(html, "html.parser") + spans = soup.find_all('span') + for span in spans: + classes = span["class"] + matched_pre = False + for this_class in classes: + if this_class == "pre": + matched_pre = True + if matched_pre: + # At this point, Beautiful Soup has done what a browser does - decode + # any encoded attributes. So we need to re-encode the string, see if + # there are any ampersands and, if so, re-encode them again. + interim = escape(span.string) + if interim.find("&") != -1: + span.string = escape(interim) + edited = True - # Start with the unordered list - ul = soup.ul - pending_divider = False - # Iterate through list items - while ul is not None: - for child in ul.children: - if type(child) is element.Tag and child.name == "li": - # Is there a new unordered list within this section? - section = child.find_all("ul", limit=1) - if section != []: - # Yes, there is, so we have a sub-section. If we've got some content - # already, add a divider. - if result != []: - result.append({ "type": "divider" }) - # Now append the current page and the section links. The - # ul tag is the only child returned, hence [0] - result.append(section_links(child, section[0])) - # If there are any "normal" entries after this section - # add a divider first - pending_divider = True - else: - if pending_divider: - result.append({ "type": "divider" }) - pending_divider = False - result.append(convert_tag_to_link(child)) - while True: - ul = ul.next_sibling - if ul is None or type(ul) is element.Tag: - break - # Not an acceptable type - loop and get the next sibling - return result + if edited: + html = str(soup) + print(html) + return html From c47f1eb651401db3f0230dfa8ab20758ae116baf Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Thu, 24 Oct 2024 13:55:30 +0100 Subject: [PATCH 17/36] Fix bugs --- sphinxcontrib/serializinghtml/__init__.py | 2 +- sphinxcontrib/serializinghtml/html_assists.py | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index bc83403..545fce0 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro-241024' +__version__ = '2.0.0+Linaro-241024a' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py index b1e6204..36f5fc1 100644 --- a/sphinxcontrib/serializinghtml/html_assists.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -91,18 +91,22 @@ def escape_encoded_alt_text(html: str) -> str: html = str(soup) return html +def matched_pre(span) -> bool: + """ Check if this span is specifying the "pre" class """ + if "class" not in span: + return False + classes = span["class"] + for this_class in classes: + if this_class == "pre": + return True + return False + def escape_encoded_pre_text(html: str) -> str: - print("escape_encoded_pre_text") edited = False soup = BeautifulSoup(html, "html.parser") spans = soup.find_all('span') for span in spans: - classes = span["class"] - matched_pre = False - for this_class in classes: - if this_class == "pre": - matched_pre = True - if matched_pre: + if matched_pre(span): # At this point, Beautiful Soup has done what a browser does - decode # any encoded attributes. So we need to re-encode the string, see if # there are any ampersands and, if so, re-encode them again. @@ -113,5 +117,4 @@ def escape_encoded_pre_text(html: str) -> str: if edited: html = str(soup) - print(html) return html From 05dd739f49a9ededa88b9f800e1396d4f6c932ca Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Mon, 28 Oct 2024 09:20:26 +0000 Subject: [PATCH 18/36] Fix pre handling --- sphinxcontrib/serializinghtml/__init__.py | 3 +-- sphinxcontrib/serializinghtml/html_assists.py | 27 ++++++------------- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index 545fce0..36d5a22 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro-241024a' +__version__ = '2.0.0+Linaro-241028' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) @@ -104,7 +104,6 @@ def dump_context(self, context: dict[str, Any], filename: str | os.PathLike[str] def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'page.html', outfilename: str | None = None, event_arg: Any = None) -> None: - print(f"handle_page: {pagename}") ctx['current_page_name'] = pagename ctx.setdefault('pathto', lambda p: p) self.add_sidebars(pagename, ctx) diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py index 36f5fc1..c818c5e 100644 --- a/sphinxcontrib/serializinghtml/html_assists.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -91,29 +91,18 @@ def escape_encoded_alt_text(html: str) -> str: html = str(soup) return html -def matched_pre(span) -> bool: - """ Check if this span is specifying the "pre" class """ - if "class" not in span: - return False - classes = span["class"] - for this_class in classes: - if this_class == "pre": - return True - return False - def escape_encoded_pre_text(html: str) -> str: edited = False soup = BeautifulSoup(html, "html.parser") - spans = soup.find_all('span') + spans = soup.find_all('span', class_="pre") for span in spans: - if matched_pre(span): - # At this point, Beautiful Soup has done what a browser does - decode - # any encoded attributes. So we need to re-encode the string, see if - # there are any ampersands and, if so, re-encode them again. - interim = escape(span.string) - if interim.find("&") != -1: - span.string = escape(interim) - edited = True + # At this point, Beautiful Soup has done what a browser does - decode + # any encoded attributes. So we need to re-encode the string, see if + # there are any ampersands and, if so, re-encode them again. + interim = escape(span.string) + if interim.find("&") != -1: + span.string = escape(interim) + edited = True if edited: html = str(soup) From 26cd446e097f3b5f494268ee24be09b4e6f269e9 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Tue, 29 Oct 2024 15:27:01 +0000 Subject: [PATCH 19/36] Return updated divider flag --- sphinxcontrib/serializinghtml/html_assists.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py index c818c5e..fb015da 100644 --- a/sphinxcontrib/serializinghtml/html_assists.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -48,6 +48,7 @@ def process_section(result, child, section, pending_divider) -> bool: result.append({ "type": "divider" }) pending_divider = False result.append(convert_tag_to_link(child)) + return pending_divider def process_ul_children(result, ul): pending_divider = False From 2aa3106f04801a07aee5f8b887e73f1670ccd5b6 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Thu, 8 May 2025 14:47:53 +0100 Subject: [PATCH 20/36] Add support to map external links to relative local links --- sphinxcontrib/serializinghtml/__init__.py | 12 ++++++++++++ sphinxcontrib/serializinghtml/html_assists.py | 17 +++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index 36d5a22..c44e1f7 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -81,6 +81,15 @@ def init(self) -> None: self.init_css_files() self.init_js_files() self.use_index = self.get_builder_config('use_index', 'html') + # + # PJC: New configuration to allow mapping of external links to + # relative Hub links. + link_mappings = None + try: + link_mappings = self.get_builder_config('link_mappings', 'html') + except AttributeError: + pass + self.link_mappings = link_mappings def get_target_uri(self, docname: str, typ: str | None = None) -> str: if docname == 'index': @@ -146,6 +155,9 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p # e.g. < changed to < then that also needs to be escaped because it is # also getting decoded. ctx['body'] = html_assists.escape_encoded_pre_text(ctx['body']) + # PJC: Go through the body, looking for any tags to see if they + # need to be re-mapped to a local Hub path. + ctx['body'] = html_assists.rewrite_hub_links(ctx['body'], self.link_mappings) ensuredir(path.dirname(outfilename)) self.dump_context(ctx, outfilename) diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py index fb015da..1a1940e 100644 --- a/sphinxcontrib/serializinghtml/html_assists.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -108,3 +108,20 @@ def escape_encoded_pre_text(html: str) -> str: if edited: html = str(soup) return html + +def rewrite_hub_links(html: str, link_mappings: dict) -> str: + edited = False + soup = BeautifulSoup(html, "html.parser") + links = soup.find_all('a') + for link in links: + for key in link_mappings: + if link['href'].startswith(key): + # We have a match, so replace the href with the new one + link['href'] = link['href'].replace(key, link_mappings[key]) + # We also have to remove ".html" from the end of the link + link['href'] = link['href'].replace(".html", "") + edited = True + + if edited: + html = str(soup) + return html From 53eb51a5c3f305e464cd57e3fc08d799b60a1127 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Thu, 8 May 2025 15:49:14 +0100 Subject: [PATCH 21/36] Set version number --- sphinxcontrib/serializinghtml/__init__.py | 2 +- sphinxcontrib/serializinghtml/html_assists.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index c44e1f7..26235cf 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro-241028' +__version__ = '2.0.0+Linaro-250508' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py index 1a1940e..471fcd8 100644 --- a/sphinxcontrib/serializinghtml/html_assists.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -115,12 +115,14 @@ def rewrite_hub_links(html: str, link_mappings: dict) -> str: links = soup.find_all('a') for link in links: for key in link_mappings: + # Check if the href starts with the key if link['href'].startswith(key): # We have a match, so replace the href with the new one link['href'] = link['href'].replace(key, link_mappings[key]) # We also have to remove ".html" from the end of the link link['href'] = link['href'].replace(".html", "") edited = True + break if edited: html = str(soup) From a1fa02e7aa29b0967eed94441a12c30b71aeaafe Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Fri, 9 May 2025 08:27:30 +0100 Subject: [PATCH 22/36] Cope with "index.html" --- sphinxcontrib/serializinghtml/__init__.py | 2 +- sphinxcontrib/serializinghtml/html_assists.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index 26235cf..76418cf 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro-250508' +__version__ = '2.0.0+Linaro-250509' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py index 471fcd8..ca93928 100644 --- a/sphinxcontrib/serializinghtml/html_assists.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -117,10 +117,15 @@ def rewrite_hub_links(html: str, link_mappings: dict) -> str: for key in link_mappings: # Check if the href starts with the key if link['href'].startswith(key): - # We have a match, so replace the href with the new one - link['href'] = link['href'].replace(key, link_mappings[key]) + # We have a match, so temporarily strip the key from the href + link['href'] = link['href'].replace(key, "") # We also have to remove ".html" from the end of the link link['href'] = link['href'].replace(".html", "") + # If we're just left with "index", replace it with the key + if link['href'] == "index": + link['href'] = key + # Now we can add the new href + link['href'] = link_mappings[key] + link['href'] edited = True break From 061ef450250f1993160c6553c0e2a5f5d485ea23 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Fri, 9 May 2025 09:00:35 +0100 Subject: [PATCH 23/36] Fix mapping logic --- sphinxcontrib/serializinghtml/__init__.py | 2 +- sphinxcontrib/serializinghtml/html_assists.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index 76418cf..d0926cc 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro-250509' +__version__ = '2.0.0+Linaro-250509a' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py index ca93928..2c52513 100644 --- a/sphinxcontrib/serializinghtml/html_assists.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -117,15 +117,19 @@ def rewrite_hub_links(html: str, link_mappings: dict) -> str: for key in link_mappings: # Check if the href starts with the key if link['href'].startswith(key): - # We have a match, so temporarily strip the key from the href + # We have a match, so strip the key from the href link['href'] = link['href'].replace(key, "") # We also have to remove ".html" from the end of the link link['href'] = link['href'].replace(".html", "") - # If we're just left with "index", replace it with the key + # If we're just left with "index", replace it with the value from the dictionary, + # which will also be the documentation root name if link['href'] == "index": - link['href'] = key - # Now we can add the new href - link['href'] = link_mappings[key] + link['href'] + link['href'] = link_mappings[key] + # Now put it all together ... + # So we should end up with something like: + # /library/onelab/onelab + # /library/laa/laa_getting_started + link['href'] = f"/library/{link_mappings[key]}/{link['href']}" edited = True break From 9d80d8e240b310345c930261e4b7c788ef167df2 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Fri, 9 May 2025 12:03:49 +0100 Subject: [PATCH 24/36] Fix links that END with "/index" --- sphinxcontrib/serializinghtml/__init__.py | 2 +- sphinxcontrib/serializinghtml/html_assists.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index d0926cc..4b87e00 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro-250509a' +__version__ = '2.0.0+Linaro-250509b' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py index 2c52513..f893ce1 100644 --- a/sphinxcontrib/serializinghtml/html_assists.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -125,6 +125,9 @@ def rewrite_hub_links(html: str, link_mappings: dict) -> str: # which will also be the documentation root name if link['href'] == "index": link['href'] = link_mappings[key] + # Do we have a link that ENDS with "/index"? If we do, remove it + if link['href'].endswith("/index"): + link['href'] = link['href'].replace("/index", "") # Now put it all together ... # So we should end up with something like: # /library/onelab/onelab From d94f2cbed45ed6336e0d19ca27fe0a7d00a834c2 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Fri, 9 May 2025 16:00:11 +0100 Subject: [PATCH 25/36] Rewrite nav to json converter --- sphinxcontrib/serializinghtml/__init__.py | 4 +-- sphinxcontrib/serializinghtml/html_assists.py | 28 ++++++++++++------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index 4b87e00..8830792 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro-250509b' +__version__ = '2.0.0+Linaro-250509c' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) @@ -118,7 +118,7 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p self.add_sidebars(pagename, ctx) # Add the toc tree as a JSON dictionary - ctx['toctree'] = html_assists.convert_nav_html_to_json(self._get_local_toctree(pagename)) + ctx['toctree'] = html_assists.convert_nav_html_to_json(self._get_local_toctree(pagename, includehidden=True)) if not outfilename: # PJC: Ensure that index files are actually written under the name of the diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py index f893ce1..623b899 100644 --- a/sphinxcontrib/serializinghtml/html_assists.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -61,17 +61,25 @@ def process_ul_children(result, ul): def convert_nav_html_to_json(html: str) -> list: result = [] soup = BeautifulSoup(html, "html.parser") + top_level_tags = soup.find_all(recursive=False) - # Start with the unordered list - ul = soup.ul - # Iterate through list items - while ul is not None: - process_ul_children(result, ul) - while True: - ul = ul.next_sibling - if ul is None or type(ul) is element.Tag: - break - # Not an acceptable type - loop and get the next sibling + caption = None + for tag in top_level_tags: + if type(tag) is element.Tag and tag.name == "p" and tag.has_attr("class") and "caption" in tag["class"]: + span = tag.findChild("span") + caption = span.text + elif type(tag) is element.Tag and tag.name == "ul": + if caption is not None: + local_result = [] + process_ul_children(local_result, tag) + result.append({ + "type": "section-group", + "title": caption, + "items": local_result + }) + caption = None + else: + process_ul_children(result, tag) return result def escape_encoded_alt_text(html: str) -> str: From 4f7d30d9e42eebb1ac09565fe597d3d86cbc53c6 Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Wed, 14 May 2025 16:47:18 +0100 Subject: [PATCH 26/36] Rework section code for greater consistency --- sphinxcontrib/serializinghtml/__init__.py | 2 +- sphinxcontrib/serializinghtml/html_assists.py | 27 ++++++------------- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py index 8830792..d7c7243 100644 --- a/sphinxcontrib/serializinghtml/__init__.py +++ b/sphinxcontrib/serializinghtml/__init__.py @@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ... def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ... def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ... -__version__ = '2.0.0+Linaro-250509c' +__version__ = '2.0.0+Linaro-250514d' __version_info__ = (2, 0, 0) package_dir = path.abspath(path.dirname(__file__)) diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py index 623b899..e2779e7 100644 --- a/sphinxcontrib/serializinghtml/html_assists.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -16,9 +16,8 @@ def section_links(parent_entry: element.Tag, list_entry: element.Tag) -> dict: if type(child) is element.Tag and child.name == "li": section_result.append(convert_tag_to_link(child)) return { - "type": "expandable-link-group", + "type": "section", "text": parent_entry.contents[0].contents[0], - "href": clean_href(parent_entry.contents[0]["href"]), "items": section_result } @@ -31,32 +30,22 @@ def convert_tag_to_link(item_entry: element.Tag) -> dict: "href": clean_href(a_tag["href"]) } -def process_section(result, child, section, pending_divider) -> bool: +def process_section(result, child, section): + # Is there a new unordered list within this section? if section != []: - # Yes, there is, so we have a sub-section. If we've got some content - # already, add a divider. - if result != []: - result.append({ "type": "divider" }) - # Now append the current page and the section links. The - # ul tag is the only child returned, hence [0] + result.append({ "type": "divider" }) + # Now append the current page and the section links. The + # ul tag is the only child returned, hence [0] result.append(section_links(child, section[0])) - # If there are any "normal" entries after this section - # add a divider first - pending_divider = True + result.append({ "type": "divider" }) else: - if pending_divider: - result.append({ "type": "divider" }) - pending_divider = False result.append(convert_tag_to_link(child)) - return pending_divider def process_ul_children(result, ul): - pending_divider = False for child in ul.children: if type(child) is element.Tag and child.name == "li": - # Is there a new unordered list within this section? section = child.find_all("ul", limit=1) - pending_divider = process_section(result, child, section, pending_divider) + process_section(result, child, section) def convert_nav_html_to_json(html: str) -> list: result = [] From 37997c76e9aa764e72407db769abcd61c56baf9d Mon Sep 17 00:00:00 2001 From: Philip Colmer Date: Tue, 3 Jun 2025 08:36:24 +0100 Subject: [PATCH 27/36] Improve checking of encodes in spans --- sphinxcontrib/serializinghtml/html_assists.py | 37 ++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py index e2779e7..fe417ac 100644 --- a/sphinxcontrib/serializinghtml/html_assists.py +++ b/sphinxcontrib/serializinghtml/html_assists.py @@ -89,18 +89,37 @@ def escape_encoded_alt_text(html: str) -> str: html = str(soup) return html +def re_encode_span_tags(span_tags, edited) -> bool: + for span_tag in span_tags: + interim = escape(span_tag.string) + if interim.find("&") != -1: + span_tag.string = escape(interim) + edited = True + return edited + def escape_encoded_pre_text(html: str) -> str: + # The reason for this function is because, when the browser loads the + # HTML from the JSON data, it decodes any encoded attributes, such as + # < and >, so we need to re-encode them to prevent the browser + # from decoding them. + # + # There are two separate search cases that are implemented here: + # + # 1. The tags that are used to format code in the HTML, which + # are used in the "pre" tags. + # 2. The
 tags themselves, which may contain code that has been
+    #    formatted with HTML entities, such as < and >.
+
     edited = False
     soup = BeautifulSoup(html, "html.parser")
-    spans = soup.find_all('span', class_="pre")
-    for span in spans:
-        # At this point, Beautiful Soup has done what a browser does - decode
-        # any encoded attributes. So we need to re-encode the string, see if
-        # there are any ampersands and, if so, re-encode them again.
-        interim = escape(span.string)
-        if interim.find("&") != -1:
-            span.string = escape(interim)
-            edited = True
+
+    span_tags = soup.find_all('span', class_="pre")
+    edited = re_encode_span_tags(span_tags, edited)
+
+    pre_tags = soup.find_all('pre')
+    for pre_tag in pre_tags:
+        span_tags = pre_tag.find_all("span")
+        edited = re_encode_span_tags(span_tags, edited)
 
     if edited:
         html = str(soup)

From 829391d8e3736ca6ae4e6150d9405d587e3f7222 Mon Sep 17 00:00:00 2001
From: Philip Colmer 
Date: Tue, 3 Jun 2025 08:38:12 +0100
Subject: [PATCH 28/36] Bump version

---
 sphinxcontrib/serializinghtml/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index d7c7243..9ece63f 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0+Linaro-250514d'
+__version__ = '2.0.0+Linaro-250603'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))

From 4f5e1671d63cfa674bc5712b58e96e971ee3459a Mon Sep 17 00:00:00 2001
From: Philip Colmer 
Date: Tue, 3 Jun 2025 09:06:10 +0100
Subject: [PATCH 29/36] Fix bug in span checking

---
 sphinxcontrib/serializinghtml/__init__.py     |  2 +-
 sphinxcontrib/serializinghtml/html_assists.py | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index 9ece63f..c020d85 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0+Linaro-250603'
+__version__ = '2.0.0+Linaro-250603a'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))
diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index fe417ac..8ff8a2a 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -91,10 +91,12 @@ def escape_encoded_alt_text(html: str) -> str:
 
 def re_encode_span_tags(span_tags, edited) -> bool:
     for span_tag in span_tags:
-        interim = escape(span_tag.string)
-        if interim.find("&") != -1:
-            span_tag.string = escape(interim)
-            edited = True
+        content = span_tag.string
+        if content is not None:
+            interim = escape(content)
+            if interim.find("&") != -1:
+                span_tag.string = escape(interim)
+                edited = True
     return edited
 
 def escape_encoded_pre_text(html: str) -> str:

From cf338a42306515baf4c5e481e148aec63c93002c Mon Sep 17 00:00:00 2001
From: Philip Colmer 
Date: Wed, 18 Jun 2025 15:49:11 +0100
Subject: [PATCH 30/36] Adjust relative links for better Next.js support

---
 sphinxcontrib/serializinghtml/__init__.py     |  4 ++--
 sphinxcontrib/serializinghtml/html_assists.py | 23 +++++++++++++++++--
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index c020d85..971744b 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0+Linaro-250603a'
+__version__ = '2.0.0+Linaro-250618'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))
@@ -157,7 +157,7 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p
             ctx['body'] = html_assists.escape_encoded_pre_text(ctx['body'])
             # PJC: Go through the body, looking for any  tags to see if they
             # need to be re-mapped to a local Hub path.
-            ctx['body'] = html_assists.rewrite_hub_links(ctx['body'], self.link_mappings)
+            ctx['body'] = html_assists.rewrite_hub_links(ctx['body'], self.link_mappings, page_filename)
 
         ensuredir(path.dirname(outfilename))
         self.dump_context(ctx, outfilename)
diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index 8ff8a2a..ecdb6fd 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -1,6 +1,10 @@
 from bs4 import BeautifulSoup, element
-import sys
 from html import escape
+from urllib.parse import urlparse
+
+def is_relative_url(url):
+    parsed = urlparse(url)
+    return not parsed.scheme and not parsed.netloc
 
 def clean_href(href: str) -> str:
     """ Make sure the href doesn't start or end with a / """
@@ -127,11 +131,26 @@ def escape_encoded_pre_text(html: str) -> str:
         html = str(soup)
     return html
 
-def rewrite_hub_links(html: str, link_mappings: dict) -> str:
+def rewrite_hub_links(html: str, link_mappings: dict, page_filename: str) -> str:
     edited = False
     soup = BeautifulSoup(html, "html.parser")
     links = soup.find_all('a')
+    # Need to calculate what the start of page_filename looks like
+    # up to the first separator.
+    page_filename_head, _, _ = page_filename.partition("/")
     for link in links:
+        # Check for relative links that need adjusting relative to where
+        # we are in the URL structure. Do this *before* performing the link
+        # mapping because the latter introduces more relative links to check.
+        if is_relative_url(link['href']) and link['href'][0] not in ['#', '/']:
+            if link['href'].startswith(page_filename):
+                # We need to drop the bit that goes up to the first / in
+                # the link because otherwise it gets duplicated when
+                # Next.js processes it.
+                link['href'] = link['href'][len(page_filename_head)+1:]
+                edited = True
+
+        # Now map from external URLs to internal relative URLs.
         for key in link_mappings:
             # Check if the href starts with the key
             if link['href'].startswith(key):

From 4e9a62bf60cdd2d37cde448939520bdd12a5a7b7 Mon Sep 17 00:00:00 2001
From: Philip Colmer 
Date: Fri, 20 Jun 2025 08:25:54 +0100
Subject: [PATCH 31/36] More work on relative links

---
 sphinxcontrib/serializinghtml/__init__.py     |  2 +-
 sphinxcontrib/serializinghtml/html_assists.py | 81 +++++++++++--------
 2 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index 971744b..aad2c48 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0+Linaro-250618'
+__version__ = '2.0.0+Linaro-250620'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))
diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index ecdb6fd..be79a19 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -131,47 +131,62 @@ def escape_encoded_pre_text(html: str) -> str:
         html = str(soup)
     return html
 
+def process_relative_links(link: dict, page_filename: str, page_filename_head: str) -> bool:
+    # Check for relative links that need adjusting relative to where
+    # we are in the URL structure. Do this *before* performing the link
+    # mapping because the latter introduces more relative links to check.
+    print(f"rewrite_hub_links: adjusting relative link: {link['href']}")
+    if page_filename_head != page_filename:
+        if is_relative_url(link['href']) and link['href'][0] not in ['#', '/']:
+            if link['href'].startswith(page_filename):
+                # We need to drop the bit that goes up to the first / in
+                # the link because otherwise it gets duplicated when
+                # Next.js processes it.
+                link['href'] = link['href'][len(page_filename_head)+1:]
+                print(f"rewrite_hub_links: adjusted relative link: {link['href']}")
+                return True
+            print("rewrite_hub_links: no change")
+    else:
+        print("rewrite_hub_links: no relative link adjustment needed")
+    return False
+
+def process_link_mappings(link: dict, link_mappings: dict) -> bool:
+    for key in link_mappings:
+        # Check if the href starts with the key
+        if link['href'].startswith(key):
+            # We have a match, so strip the key from the href
+            link['href'] = link['href'].replace(key, "")
+            # We also have to remove ".html" from the end of the link
+            link['href'] = link['href'].replace(".html", "")
+            # If we're just left with "index", replace it with the value from the dictionary,
+            # which will also be the documentation root name
+            if link['href'] == "index":
+                link['href'] = link_mappings[key]
+            # Do we have a link that ENDS with "/index"? If we do, remove it
+            if link['href'].endswith("/index"):
+                link['href'] = link['href'].replace("/index", "")
+            # Now put it all together ...
+            # So we should end up with something like:
+            # /library/onelab/onelab
+            # /library/laa/laa_getting_started
+            link['href'] = f"/library/{link_mappings[key]}/{link['href']}"
+            return True
+    return False
+
 def rewrite_hub_links(html: str, link_mappings: dict, page_filename: str) -> str:
+    print(f"rewrite_hub_links: page_filename={page_filename}")
     edited = False
     soup = BeautifulSoup(html, "html.parser")
     links = soup.find_all('a')
     # Need to calculate what the start of page_filename looks like
     # up to the first separator.
     page_filename_head, _, _ = page_filename.partition("/")
+    print(f"rewrite_hub_links: page_filename_head={page_filename_head}")
     for link in links:
-        # Check for relative links that need adjusting relative to where
-        # we are in the URL structure. Do this *before* performing the link
-        # mapping because the latter introduces more relative links to check.
-        if is_relative_url(link['href']) and link['href'][0] not in ['#', '/']:
-            if link['href'].startswith(page_filename):
-                # We need to drop the bit that goes up to the first / in
-                # the link because otherwise it gets duplicated when
-                # Next.js processes it.
-                link['href'] = link['href'][len(page_filename_head)+1:]
-                edited = True
-
-        # Now map from external URLs to internal relative URLs.
-        for key in link_mappings:
-            # Check if the href starts with the key
-            if link['href'].startswith(key):
-                # We have a match, so strip the key from the href
-                link['href'] = link['href'].replace(key, "")
-                # We also have to remove ".html" from the end of the link
-                link['href'] = link['href'].replace(".html", "")
-                # If we're just left with "index", replace it with the value from the dictionary,
-                # which will also be the documentation root name
-                if link['href'] == "index":
-                    link['href'] = link_mappings[key]
-                # Do we have a link that ENDS with "/index"? If we do, remove it
-                if link['href'].endswith("/index"):
-                    link['href'] = link['href'].replace("/index", "")
-                # Now put it all together ...
-                # So we should end up with something like:
-                # /library/onelab/onelab
-                # /library/laa/laa_getting_started
-                link['href'] = f"/library/{link_mappings[key]}/{link['href']}"
-                edited = True
-                break
+        if process_relative_links(link, page_filename, page_filename_head):
+            edited = True
+        if process_link_mappings(link, link_mappings):
+            edited = True
 
     if edited:
         html = str(soup)

From 3332839b5ffe7ac869ac4f2cac54de0fde8e1263 Mon Sep 17 00:00:00 2001
From: Philip Colmer 
Date: Wed, 9 Jul 2025 09:25:22 +0100
Subject: [PATCH 32/36] More work on relative link calculations

---
 sphinxcontrib/serializinghtml/html_assists.py | 33 ++++++++++++++++---
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index be79a19..c13deff 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -1,6 +1,8 @@
 from bs4 import BeautifulSoup, element
 from html import escape
 from urllib.parse import urlparse
+from pathlib import PurePosixPath
+import re
 
 def is_relative_url(url):
     parsed = urlparse(url)
@@ -131,19 +133,40 @@ def escape_encoded_pre_text(html: str) -> str:
         html = str(soup)
     return html
 
+def resolve_relative_path(base, relative):
+    base_parts = PurePosixPath(base).parts
+    rel_parts = PurePosixPath(relative).parts
+
+    stack = list(base_parts)
+
+    for part in rel_parts:
+        if part == "..":
+            if stack:
+                stack.pop()
+        elif part != ".":
+            stack.append(part)
+
+    return "/".join(stack)
+
 def process_relative_links(link: dict, page_filename: str, page_filename_head: str) -> bool:
     # Check for relative links that need adjusting relative to where
     # we are in the URL structure. Do this *before* performing the link
     # mapping because the latter introduces more relative links to check.
-    print(f"rewrite_hub_links: adjusting relative link: {link['href']}")
+    href_link = link['href']
+    print(f"rewrite_hub_links: adjusting relative link: {href_link}")
     if page_filename_head != page_filename:
-        if is_relative_url(link['href']) and link['href'][0] not in ['#', '/']:
-            if link['href'].startswith(page_filename):
+        if is_relative_url(href_link) and href_link[0] not in ['#', '/']:
+            if href_link.startswith(page_filename):
                 # We need to drop the bit that goes up to the first / in
                 # the link because otherwise it gets duplicated when
                 # Next.js processes it.
-                link['href'] = link['href'][len(page_filename_head)+1:]
-                print(f"rewrite_hub_links: adjusted relative link: {link['href']}")
+                link['href'] = href_link[len(page_filename_head)+1:]
+                print(f"rewrite_hub_links: new relative link: {link['href']}")
+                return True
+            if href_link.startswith("../"):
+                # Calculate the eventual path
+                link['href'] = resolve_relative_path(page_filename_head, href_link)
+                print(f"rewrite_hub_links: new relative link: {link['href']}")
                 return True
             print("rewrite_hub_links: no change")
     else:

From 42fea795df965281f45c594efa7ad2bf4d6b1350 Mon Sep 17 00:00:00 2001
From: Philip Colmer 
Date: Wed, 9 Jul 2025 10:00:13 +0100
Subject: [PATCH 33/36] Revert last commit

---
 sphinxcontrib/serializinghtml/html_assists.py | 22 -------------------
 1 file changed, 22 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index c13deff..a5e84f3 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -1,8 +1,6 @@
 from bs4 import BeautifulSoup, element
 from html import escape
 from urllib.parse import urlparse
-from pathlib import PurePosixPath
-import re
 
 def is_relative_url(url):
     parsed = urlparse(url)
@@ -133,21 +131,6 @@ def escape_encoded_pre_text(html: str) -> str:
         html = str(soup)
     return html
 
-def resolve_relative_path(base, relative):
-    base_parts = PurePosixPath(base).parts
-    rel_parts = PurePosixPath(relative).parts
-
-    stack = list(base_parts)
-
-    for part in rel_parts:
-        if part == "..":
-            if stack:
-                stack.pop()
-        elif part != ".":
-            stack.append(part)
-
-    return "/".join(stack)
-
 def process_relative_links(link: dict, page_filename: str, page_filename_head: str) -> bool:
     # Check for relative links that need adjusting relative to where
     # we are in the URL structure. Do this *before* performing the link
@@ -163,11 +146,6 @@ def process_relative_links(link: dict, page_filename: str, page_filename_head: s
                 link['href'] = href_link[len(page_filename_head)+1:]
                 print(f"rewrite_hub_links: new relative link: {link['href']}")
                 return True
-            if href_link.startswith("../"):
-                # Calculate the eventual path
-                link['href'] = resolve_relative_path(page_filename_head, href_link)
-                print(f"rewrite_hub_links: new relative link: {link['href']}")
-                return True
             print("rewrite_hub_links: no change")
     else:
         print("rewrite_hub_links: no relative link adjustment needed")

From 8c33079d8f7724d785cfe77eb5c3f2cbd93ca24a Mon Sep 17 00:00:00 2001
From: Philip Colmer 
Date: Thu, 10 Jul 2025 07:49:58 +0100
Subject: [PATCH 34/36] Process more relative links by checking head not full
 filename

---
 sphinxcontrib/serializinghtml/html_assists.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index a5e84f3..d6fdb1c 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -139,7 +139,7 @@ def process_relative_links(link: dict, page_filename: str, page_filename_head: s
     print(f"rewrite_hub_links: adjusting relative link: {href_link}")
     if page_filename_head != page_filename:
         if is_relative_url(href_link) and href_link[0] not in ['#', '/']:
-            if href_link.startswith(page_filename):
+            if href_link.startswith(page_filename_head):
                 # We need to drop the bit that goes up to the first / in
                 # the link because otherwise it gets duplicated when
                 # Next.js processes it.

From edbbb4638440560dfaabeac6ef491142439961f8 Mon Sep 17 00:00:00 2001
From: Philip Colmer 
Date: Thu, 10 Jul 2025 11:04:03 +0100
Subject: [PATCH 35/36] Calculate traversal if Sphinx hasn't

---
 sphinxcontrib/serializinghtml/html_assists.py | 34 ++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index d6fdb1c..3f0e40b 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -1,6 +1,7 @@
 from bs4 import BeautifulSoup, element
 from html import escape
 from urllib.parse import urlparse
+from pathlib import PurePosixPath
 
 def is_relative_url(url):
     parsed = urlparse(url)
@@ -37,7 +38,9 @@ def convert_tag_to_link(item_entry: element.Tag) -> dict:
 def process_section(result, child, section):
     # Is there a new unordered list within this section?
     if section != []:
-        result.append({ "type": "divider" })
+        # Only add a starting divider if there is already content
+        if result != []:
+            result.append({ "type": "divider" })
         # Now append the current page and the section links. The
         # ul tag is the only child returned, hence [0]
         result.append(section_links(child, section[0]))
@@ -131,6 +134,27 @@ def escape_encoded_pre_text(html: str) -> str:
         html = str(soup)
     return html
 
+def relative_traversal(from_path, to_path):
+    from_parts = PurePosixPath(from_path).parts
+    to_parts = PurePosixPath(to_path).parts
+
+    # Find common prefix length
+    common_length = 0
+    for f, t in zip(from_parts, to_parts):
+        if f == t:
+            common_length += 1
+        else:
+            break
+
+    # Steps up from 'from_path' to common ancestor
+    # Need to reduce the step count by one because of the way
+    # Next.js handles routes.
+    up_steps = len(from_parts) - common_length - 1
+    down_path = to_parts[common_length:]
+
+    result = "../" * up_steps + "/".join(down_path)
+    return result
+
 def process_relative_links(link: dict, page_filename: str, page_filename_head: str) -> bool:
     # Check for relative links that need adjusting relative to where
     # we are in the URL structure. Do this *before* performing the link
@@ -146,6 +170,14 @@ def process_relative_links(link: dict, page_filename: str, page_filename_head: s
                 link['href'] = href_link[len(page_filename_head)+1:]
                 print(f"rewrite_hub_links: new relative link: {link['href']}")
                 return True
+            # If we aren't on the same path, and we don't have any traversal
+            # at the start of the path, calculate the traversal required.
+            if not href_link.startswith("../"):
+                new_path = relative_traversal(page_filename, href_link)
+                if new_path != href_link:
+                    link['href'] = new_path
+                    print(f"rewrite_hub_links: new relative link: {link['href']}")
+                    return True
             print("rewrite_hub_links: no change")
     else:
         print("rewrite_hub_links: no relative link adjustment needed")

From 2b6e821018606a75abd96f3f7b78291a6f5b3705 Mon Sep 17 00:00:00 2001
From: Philip Colmer 
Date: Thu, 28 Aug 2025 12:01:23 +0100
Subject: [PATCH 36/36] Fix link mappings

---
 sphinxcontrib/serializinghtml/__init__.py     |  2 +-
 sphinxcontrib/serializinghtml/html_assists.py | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index aad2c48..2adf9ae 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0+Linaro-250620'
+__version__ = '2.0.0+Linaro-250828'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))
diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index 3f0e40b..5978574 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -187,13 +187,16 @@ def process_link_mappings(link: dict, link_mappings: dict) -> bool:
     for key in link_mappings:
         # Check if the href starts with the key
         if link['href'].startswith(key):
+            print(f"process_link_mappings: matched {link['href']} against mapping")
             # We have a match, so strip the key from the href
             link['href'] = link['href'].replace(key, "")
+            print(f"process_link_mappings: after removing key, we're left with {link['href']}")
             # We also have to remove ".html" from the end of the link
             link['href'] = link['href'].replace(".html", "")
-            # If we're just left with "index", replace it with the value from the dictionary,
-            # which will also be the documentation root name
-            if link['href'] == "index":
+            # If we're just left with "index", or if we have nothing left, replace it
+            # with the value from the dictionary, which will also be the documentation
+            # root name
+            if link['href'] == "index" or link['href'] == "":
                 link['href'] = link_mappings[key]
             # Do we have a link that ENDS with "/index"? If we do, remove it
             if link['href'].endswith("/index"):
@@ -203,6 +206,7 @@ def process_link_mappings(link: dict, link_mappings: dict) -> bool:
             # /library/onelab/onelab
             # /library/laa/laa_getting_started
             link['href'] = f"/library/{link_mappings[key]}/{link['href']}"
+            print(f"process_link_mappings: mapped to {link['href']}")
             return True
     return False