From f33a11e10f1206256612868dead62aab130d610b Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Wed, 31 Jul 2024 15:17:57 +0100
Subject: [PATCH 01/20] Linaro changes for JSON files consumed with React

---
 README.rst                                    | 23 ++------
 pyproject.toml                                |  2 +-
 sphinxcontrib/serializinghtml/__init__.py     | 20 +++++--
 .../serializinghtml/nav_html_to_json.py       | 54 +++++++++++++++++++
 4 files changed, 75 insertions(+), 24 deletions(-)
 create mode 100644 sphinxcontrib/serializinghtml/nav_html_to_json.py

diff --git a/README.rst b/README.rst
index d9ed40a..78eb63a 100644
--- a/README.rst
+++ b/README.rst
@@ -1,22 +1,5 @@
-=============================
-sphinxcontrib-serializinghtml
-=============================
+This is a fork of https://github.com/sphinx-doc/sphinxcontrib-serializinghtml
 
-sphinxcontrib-serializinghtml is a sphinx extension which outputs
-"serialized" HTML files (json and pickle).
+Changes made to this fork are to facilitate the creation of JSON files suitable for consumption by React.
 
-For more details, please visit http://www.sphinx-doc.org/.
-
-Installing
-==========
-
-Install from PyPI::
-
-   pip install -U sphinxcontrib-serializinghtml
-
-Contributing
-============
-
-See `CONTRIBUTING.rst`__
-
-.. __: https://github.com/sphinx-doc/sphinx/blob/master/CONTRIBUTING.rst
+Since those changes are very specific, they have not been contributed back to the original repo.
diff --git a/pyproject.toml b/pyproject.toml
index f14054e..a8a4329 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,7 @@ classifiers = [
     "Topic :: Text Processing",
     "Topic :: Utilities",
 ]
-dependencies = []
+dependencies = ["beautifulsoup4"]
 dynamic = ["version"]
 
 [project.optional-dependencies]
diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index bdbeb6f..d6e83ab 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -11,7 +11,7 @@
 from sphinx.locale import get_translation
 from sphinx.util.osutil import SEP, copyfile, ensuredir, os_path
 
-from sphinxcontrib.serializinghtml import jsonimpl
+from sphinxcontrib.serializinghtml import jsonimpl, nav_html_to_json
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -91,9 +91,23 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p
         ctx.setdefault('pathto', lambda p: p)
         self.add_sidebars(pagename, ctx)
 
+        # Add the toc tree as a JSON dictionary
+        ctx['toctree'] = nav_html_to_json.convert_nav_html_to_json(self._get_local_toctree(pagename))
+
         if not outfilename:
+            # PJC: Ensure that index files are actually written under the name of the
+            #      directory leafname.
+            parts = pagename.split(SEP)
+            if parts[len(parts)-1] == "index":
+                if len(parts) == 1:
+                    # Use the project name
+                    page_filename = self.get_builder_config('project_name', 'html')
+                else:
+                    page_filename = SEP.join(parts[:-1])
+            else:
+                page_filename = pagename
             outfilename = path.join(self.outdir,
-                                    os_path(pagename) + self.out_suffix)
+                                    os_path(page_filename) + self.out_suffix)
 
         # we're not taking the return value here, since no template is
         # actually rendered
@@ -161,7 +175,7 @@ class JSONHTMLBuilder(SerializingHTMLBuilder):
     implementation_dumps_unicode = True
     indexer_format = jsonimpl
     indexer_dumps_unicode = True
-    out_suffix = '.fjson'
+    out_suffix = '.json'
     globalcontext_filename = 'globalcontext.json'
     searchindex_filename = 'searchindex.json'
 
diff --git a/sphinxcontrib/serializinghtml/nav_html_to_json.py b/sphinxcontrib/serializinghtml/nav_html_to_json.py
new file mode 100644
index 0000000..ff9b3d3
--- /dev/null
+++ b/sphinxcontrib/serializinghtml/nav_html_to_json.py
@@ -0,0 +1,54 @@
+from bs4 import BeautifulSoup, element
+import json
+
+def section_links(parent_entry: element.Tag, list_entry: element.Tag) -> dict:
+    section_result = []
+    for child in list_entry.children:
+        if type(child) is element.Tag and child.name == "li":
+            section_result.append(convert_tag_to_link(child))
+    return {
+                "type": "expandable-link-group",
+                "text": parent_entry.contents[0].contents[0],
+                "href": parent_entry.contents[0]["href"],
+                "items": section_result
+            }
+
+
+def convert_tag_to_link(item_entry: element.Tag) -> dict:
+    # The a tag is a child of the li tag
+    a_tag = item_entry.contents[0]
+    return {
+            "type": "link",
+            "text": a_tag.contents[0],
+            "href": a_tag["href"]
+        }
+
+def convert_nav_html_to_json(html: str) -> list:
+    result = []
+    soup = BeautifulSoup(html, "html.parser")
+
+    # Start with the unordered list
+    ul = soup.ul
+    pending_divider = False
+    # Iterate through list items
+    for child in ul.children:
+        if type(child) is element.Tag and child.name == "li":
+            # Is there a new unordered list within this section?
+            section = child.find_all("ul", limit=1)
+            if section != []:
+                # Yes, there is, so we have a sub-section. If we've got some content
+                # already, add a divider.
+                if result != []:
+                    result.append({ "type": "divider" })
+                # Now append the current page and the section links. The
+                # ul tag is the only child returned, hence [0]
+                result.append(section_links(child, section[0]))
+                # If there are any "normal" entries after this section
+                # add a divider first
+                pending_divider = True
+            else:
+                if pending_divider:
+                    result.append({ "type": "divider" })
+                    pending_divider = False
+                result.append(convert_tag_to_link(child))
+    return result

From d2ea55f51560b399a7357e1994735868b5c17154 Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Wed, 31 Jul 2024 15:25:56 +0100
Subject: [PATCH 02/20] Update current_page_name to lose "/index"

---
 sphinxcontrib/serializinghtml/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index d6e83ab..747b72d 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -104,6 +104,7 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p
                     page_filename = self.get_builder_config('project_name', 'html')
                 else:
                     page_filename = SEP.join(parts[:-1])
+                ctx['current_page_name'] = page_filename
             else:
                 page_filename = pagename
             outfilename = path.join(self.outdir,

From 28e7ebdb55aa9ebff75c12b8ed98e8069a11d3dc Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Mon, 5 Aug 2024 08:45:04 +0100
Subject: [PATCH 03/20] Add dir/path support

---
 sphinxcontrib/serializinghtml/__init__.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index 747b72d..d037db4 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -55,7 +55,24 @@ class SerializingHTMLBuilder(StandaloneHTMLBuilder):
 
     def init(self) -> None:
         self.build_info = BuildInfo(self.config, self.tags)
-        self.imagedir = '_images'
+        # Cope with whether or not Sphinx has the required configuration variables
+        # set.
+        # See HTML Builder comments for explanation of image setup & handling
+        html_image_dir = None
+        try:
+            html_image_dir = self.get_builder_config('image_dir', 'html')
+        except AttributeError:
+            pass
+        if html_image_dir is not None:
+            self.imagedir = html_image_dir
+        else:
+            self.imagedir = '_images'
+        html_image_path = None
+        try:
+            html_image_path = self.get_builder_config('image_path', 'html')
+        except AttributeError:
+            pass
+        self.imagepath = html_image_path
         self.current_docname = ''
         self.theme = None  # type: ignore[assignment] # no theme necessary
         self.templates = None  # no template bridge necessary

From b2d90cbfe0e1b6ca8a67bce8a58399fcd11c8ebf Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Mon, 5 Aug 2024 09:35:14 +0100
Subject: [PATCH 04/20] Add Linaro to version string

---
 sphinxcontrib/serializinghtml/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index d037db4..06dc166 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0'
+__version__ = '2.0.0+Linaro'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))

From 62d87c1b89fad89034cb53e9d32ea40042884f59 Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Tue, 6 Aug 2024 14:47:12 +0100
Subject: [PATCH 05/20] Make URIs absolute for Solutions Hub

---
 sphinxcontrib/serializinghtml/__init__.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index 06dc166..8b3497e 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0+Linaro'
+__version__ = '2.0.0+Linaro-240806'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))
@@ -83,11 +83,16 @@ def init(self) -> None:
         self.use_index = self.get_builder_config('use_index', 'html')
 
     def get_target_uri(self, docname: str, typ: str | None = None) -> str:
+        print(f"get_target_uri: {docname}")
+        # For the Solutions Hub, we want all URIs to be absolute. They will
+        # all be /library/<project_name>/<docname> except that "index" will
+        # get trimmed off
+        project_name = self.get_builder_config('project_name', 'html')
         if docname == 'index':
-            return ''
+            return f"/library/{project_name}/{project_name}"
         if docname.endswith(SEP + 'index'):
-            return docname[:-5]  # up to sep
-        return docname + SEP
+            return f"/library/{project_name}/{docname[:-5]}"  # up to sep
+        return f"/library/{project_name}/{docname}"
 
     def dump_context(self, context: dict[str, Any], filename: str | os.PathLike[str]) -> None:
         context = context.copy()

From e8a198143c11a37f842c085d3cb0c91e33e7ff2a Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Tue, 6 Aug 2024 14:51:31 +0100
Subject: [PATCH 06/20] Remove print statement

---
 sphinxcontrib/serializinghtml/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index 8b3497e..622d12d 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -83,7 +83,6 @@ def init(self) -> None:
         self.use_index = self.get_builder_config('use_index', 'html')
 
     def get_target_uri(self, docname: str, typ: str | None = None) -> str:
-        print(f"get_target_uri: {docname}")
         # For the Solutions Hub, we want all URIs to be absolute. They will
         # all be /library/<project_name>/<docname> except that "index" will
         # get trimmed off

From 11ea0ae28bf6d10be30559a54cbfcd2f8671de5a Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Tue, 6 Aug 2024 14:57:32 +0100
Subject: [PATCH 07/20] Trying another URI approach

---
 sphinxcontrib/serializinghtml/__init__.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index 622d12d..a58bef0 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -83,15 +83,11 @@ def init(self) -> None:
         self.use_index = self.get_builder_config('use_index', 'html')
 
     def get_target_uri(self, docname: str, typ: str | None = None) -> str:
-        # For the Solutions Hub, we want all URIs to be absolute. They will
-        # all be /library/<project_name>/<docname> except that "index" will
-        # get trimmed off
-        project_name = self.get_builder_config('project_name', 'html')
         if docname == 'index':
-            return f"/library/{project_name}/{project_name}"
+            return "/"
         if docname.endswith(SEP + 'index'):
-            return f"/library/{project_name}/{docname[:-5]}"  # up to sep
-        return f"/library/{project_name}/{docname}"
+            return f"/{docname[:-5]}"  # up to sep
+        return f"/{docname}"
 
     def dump_context(self, context: dict[str, Any], filename: str | os.PathLike[str]) -> None:
         context = context.copy()

From d5e8f30dfe996aff47485d90ec1edcabf50d406b Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Tue, 6 Aug 2024 15:06:33 +0100
Subject: [PATCH 08/20] Completely revert URI changes. Didn't work.

---
 sphinxcontrib/serializinghtml/__init__.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index a58bef0..f037d4b 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -84,10 +84,10 @@ def init(self) -> None:
 
     def get_target_uri(self, docname: str, typ: str | None = None) -> str:
         if docname == 'index':
-            return "/"
+            return ""
         if docname.endswith(SEP + 'index'):
-            return f"/{docname[:-5]}"  # up to sep
-        return f"/{docname}"
+            return docname[:-5]  # up to sep
+        return docname
 
     def dump_context(self, context: dict[str, Any], filename: str | os.PathLike[str]) -> None:
         context = context.copy()

From 559f8d49ec78d3bb5a21fe56a28bb9b5e3479293 Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Tue, 6 Aug 2024 16:13:49 +0100
Subject: [PATCH 09/20] Clean up navigation hrefs

---
 sphinxcontrib/serializinghtml/nav_html_to_json.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/nav_html_to_json.py b/sphinxcontrib/serializinghtml/nav_html_to_json.py
index ff9b3d3..9cbf8af 100644
--- a/sphinxcontrib/serializinghtml/nav_html_to_json.py
+++ b/sphinxcontrib/serializinghtml/nav_html_to_json.py
@@ -1,6 +1,14 @@
 from bs4 import BeautifulSoup, element
 import json
 
+def clean_href(href: str) -> str:
+    """ Make sure the href doesn't start or end with a / """
+    if href[0] == "/":
+        href = href[1:]
+    if href[-1] == "/":
+        href = href[:-1]
+    return href
+
 def section_links(parent_entry: element.Tag, list_entry: element.Tag) -> dict:
     section_result = []
     for child in list_entry.children:
@@ -9,7 +17,7 @@ def section_links(parent_entry: element.Tag, list_entry: element.Tag) -> dict:
     return {
                 "type": "expandable-link-group",
                 "text": parent_entry.contents[0].contents[0],
-                "href": parent_entry.contents[0]["href"],
+                "href": clean_href(parent_entry.contents[0]["href"]),
                 "items": section_result
             }
 
@@ -20,7 +28,7 @@ def convert_tag_to_link(item_entry: element.Tag) -> dict:
     return {
             "type": "link",
             "text": a_tag.contents[0],
-            "href": a_tag["href"]
+            "href": clean_href(a_tag["href"])
         }
 
 def convert_nav_html_to_json(html: str) -> list:

From 6ec2de085a2cf74338063cde1ab859bd61201fe3 Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Wed, 7 Aug 2024 15:22:21 +0100
Subject: [PATCH 10/20] Fix UL bug and bump date

---
 sphinxcontrib/serializinghtml/__init__.py     |  2 +-
 .../serializinghtml/nav_html_to_json.py       | 41 ++++++++++---------
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index f037d4b..95ef5d1 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0+Linaro-240806'
+__version__ = '2.0.0+Linaro-240807'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))
diff --git a/sphinxcontrib/serializinghtml/nav_html_to_json.py b/sphinxcontrib/serializinghtml/nav_html_to_json.py
index 9cbf8af..98a92a0 100644
--- a/sphinxcontrib/serializinghtml/nav_html_to_json.py
+++ b/sphinxcontrib/serializinghtml/nav_html_to_json.py
@@ -39,24 +39,25 @@ def convert_nav_html_to_json(html: str) -> list:
     ul = soup.ul
     pending_divider = False
     # Iterate through list items
-    for child in ul.children:
-        if type(child) is element.Tag and child.name == "li":
-            # Is there a new unordered list within this section?
-            section = child.find_all("ul", limit=1)
-            if section != []:
-                # Yes, there is, so we have a sub-section. If we've got some content
-                # already, add a divider.
-                if result != []:
-                    result.append({ "type": "divider" })
-                # Now append the current page and the section links. The
-                # ul tag is the only child returned, hence [0]
-                result.append(section_links(child, section[0]))
-                # If there are any "normal" entries after this section
-                # add a divider first
-                pending_divider = True
-            else:
-                if pending_divider:
-                    result.append({ "type": "divider" })
-                    pending_divider = False
-                result.append(convert_tag_to_link(child))
+    if ul is not None:
+        for child in ul.children:
+            if type(child) is element.Tag and child.name == "li":
+                # Is there a new unordered list within this section?
+                section = child.find_all("ul", limit=1)
+                if section != []:
+                    # Yes, there is, so we have a sub-section. If we've got some content
+                    # already, add a divider.
+                    if result != []:
+                        result.append({ "type": "divider" })
+                    # Now append the current page and the section links. The
+                    # ul tag is the only child returned, hence [0]
+                    result.append(section_links(child, section[0]))
+                    # If there are any "normal" entries after this section
+                    # add a divider first
+                    pending_divider = True
+                else:
+                    if pending_divider:
+                        result.append({ "type": "divider" })
+                        pending_divider = False
+                    result.append(convert_tag_to_link(child))
     return result

From 73c32d51b69a1156ad6eb158c1753f8475c46587 Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Wed, 4 Sep 2024 09:19:50 +0100
Subject: [PATCH 11/20] Handle TOCs with multiple UL sections

---
 sphinxcontrib/serializinghtml/__init__.py         | 2 +-
 sphinxcontrib/serializinghtml/nav_html_to_json.py | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index 95ef5d1..373af65 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0+Linaro-240807'
+__version__ = '2.0.0+Linaro-240904'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))
diff --git a/sphinxcontrib/serializinghtml/nav_html_to_json.py b/sphinxcontrib/serializinghtml/nav_html_to_json.py
index 98a92a0..b212c78 100644
--- a/sphinxcontrib/serializinghtml/nav_html_to_json.py
+++ b/sphinxcontrib/serializinghtml/nav_html_to_json.py
@@ -1,5 +1,6 @@
 from bs4 import BeautifulSoup, element
 import json
+import sys
 
 def clean_href(href: str) -> str:
     """ Make sure the href doesn't start or end with a / """
@@ -39,7 +40,7 @@ def convert_nav_html_to_json(html: str) -> list:
     ul = soup.ul
     pending_divider = False
     # Iterate through list items
-    if ul is not None:
+    while ul is not None:
         for child in ul.children:
             if type(child) is element.Tag and child.name == "li":
                 # Is there a new unordered list within this section?
@@ -60,4 +61,9 @@ def convert_nav_html_to_json(html: str) -> list:
                         result.append({ "type": "divider" })
                         pending_divider = False
                     result.append(convert_tag_to_link(child))
+        while True:
+            ul = ul.next_sibling
+            if ul is None or type(ul) is element.Tag:
+                break
+            # Not an acceptable type - loop and get the next sibling
     return result

From 766614f02af0c2b4921ae2bfa7d2666a098711cc Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Wed, 4 Sep 2024 14:44:53 +0100
Subject: [PATCH 12/20] Implement fix for entities in img alt text

---
 sphinxcontrib/serializinghtml/__init__.py        | 11 +++++++++--
 .../{nav_html_to_json.py => html_assists.py}     | 16 +++++++++++++++-
 2 files changed, 24 insertions(+), 3 deletions(-)
 rename sphinxcontrib/serializinghtml/{nav_html_to_json.py => html_assists.py} (81%)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index 373af65..72537a1 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -11,7 +11,7 @@
 from sphinx.locale import get_translation
 from sphinx.util.osutil import SEP, copyfile, ensuredir, os_path
 
-from sphinxcontrib.serializinghtml import jsonimpl, nav_html_to_json
+from sphinxcontrib.serializinghtml import html_assists, jsonimpl
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -109,7 +109,7 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p
         self.add_sidebars(pagename, ctx)
 
         # Add the toc tree as a JSON dictionary
-        ctx['toctree'] = nav_html_to_json.convert_nav_html_to_json(self._get_local_toctree(pagename))
+        ctx['toctree'] = html_assists.convert_nav_html_to_json(self._get_local_toctree(pagename))
 
         if not outfilename:
             # PJC: Ensure that index files are actually written under the name of the
@@ -136,6 +136,13 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p
             if isinstance(ctx[key], types.FunctionType):
                 del ctx[key]
 
+        # PJC: Some Linaro documentation has encoded attributes in image ALT text
+        # which then gets decoded when the HTML is loaded into the DOM, so
+        # we need to alter it by "escaping" the ampersands with &amp; to
+        # prevent the decoding.
+        if "body" in ctx:
+            ctx['body'] = html_assists.escape_encoded_alt_text(ctx['body'])
+
         ensuredir(path.dirname(outfilename))
         self.dump_context(ctx, outfilename)
 
diff --git a/sphinxcontrib/serializinghtml/nav_html_to_json.py b/sphinxcontrib/serializinghtml/html_assists.py
similarity index 81%
rename from sphinxcontrib/serializinghtml/nav_html_to_json.py
rename to sphinxcontrib/serializinghtml/html_assists.py
index b212c78..3679940 100644
--- a/sphinxcontrib/serializinghtml/nav_html_to_json.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -1,6 +1,6 @@
 from bs4 import BeautifulSoup, element
-import json
 import sys
+from html import escape
 
 def clean_href(href: str) -> str:
     """ Make sure the href doesn't start or end with a / """
@@ -32,6 +32,20 @@ def convert_tag_to_link(item_entry: element.Tag) -> dict:
             "href": clean_href(a_tag["href"])
         }
 
+def escape_encoded_alt_text(html: str) -> str:
+    soup = BeautifulSoup(html, "html.parser")
+    images = soup.find_all('img')
+    for img in images:
+        if img['alt'] != "":
+            # At this point, Beautiful Soup has done what a browser does - decode
+            # any encoded attributes. So we need to re-encode the string, see if
+            # there are any ampersands and, if so, re-encode them again.
+            interim = escape(img['alt'])
+            if interim.find("&") != -1:
+                img['alt'] = escape(interim)
+
+    return html
+
 def convert_nav_html_to_json(html: str) -> list:
     result = []
     soup = BeautifulSoup(html, "html.parser")

From 2e0a128df58163634e27e3215cebb861f590d2c7 Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Wed, 4 Sep 2024 14:45:13 +0100
Subject: [PATCH 13/20] Bump version

---
 sphinxcontrib/serializinghtml/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index 72537a1..a973d72 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0+Linaro-240904'
+__version__ = '2.0.0+Linaro-240904a'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))

From 6e35e86a12719a9bc757802fb96dc4eccc4aa249 Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Wed, 4 Sep 2024 14:57:46 +0100
Subject: [PATCH 14/20] Fix bug around editing img alt text

---
 sphinxcontrib/serializinghtml/html_assists.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index 3679940..e3bcf49 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -33,6 +33,7 @@ def convert_tag_to_link(item_entry: element.Tag) -> dict:
         }
 
 def escape_encoded_alt_text(html: str) -> str:
+    edited = False
     soup = BeautifulSoup(html, "html.parser")
     images = soup.find_all('img')
     for img in images:
@@ -43,7 +44,10 @@ def escape_encoded_alt_text(html: str) -> str:
             interim = escape(img['alt'])
             if interim.find("&") != -1:
                 img['alt'] = escape(interim)
+                edited = True
 
+    if edited:
+        html = str(soup)
     return html
 
 def convert_nav_html_to_json(html: str) -> list:

From 4585595ce5a8ef872601a3c5abb22b4bb9956d33 Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Wed, 4 Sep 2024 15:03:05 +0100
Subject: [PATCH 15/20] Bump version

---
 sphinxcontrib/serializinghtml/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index a973d72..b360030 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0+Linaro-240904a'
+__version__ = '2.0.0+Linaro-240904b'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))

From 9445c448e5132a60c44cdaa53972dc4addedbe7c Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Thu, 24 Oct 2024 12:38:29 +0100
Subject: [PATCH 16/20] Fix encoded attributes in pre sections

---
 sphinxcontrib/serializinghtml/__init__.py     | 15 ++-
 sphinxcontrib/serializinghtml/html_assists.py | 98 ++++++++++++-------
 2 files changed, 74 insertions(+), 39 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index b360030..bc83403 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0+Linaro-240904b'
+__version__ = '2.0.0+Linaro-241024'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))
@@ -104,6 +104,7 @@ def dump_context(self, context: dict[str, Any], filename: str | os.PathLike[str]
 
     def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'page.html',
                     outfilename: str | None = None, event_arg: Any = None) -> None:
+        print(f"handle_page: {pagename}")
         ctx['current_page_name'] = pagename
         ctx.setdefault('pathto', lambda p: p)
         self.add_sidebars(pagename, ctx)
@@ -136,12 +137,16 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p
             if isinstance(ctx[key], types.FunctionType):
                 del ctx[key]
 
-        # PJC: Some Linaro documentation has encoded attributes in image ALT text
-        # which then gets decoded when the HTML is loaded into the DOM, so
-        # we need to alter it by "escaping" the ampersands with &amp; to
-        # prevent the decoding.
         if "body" in ctx:
+            # PJC: Some Linaro documentation has encoded attributes in image ALT text
+            # which then gets decoded when the HTML is loaded into the DOM, so
+            # we need to alter it by "escaping" the ampersands with &amp; to
+            # prevent the decoding.
             ctx['body'] = html_assists.escape_encoded_alt_text(ctx['body'])
+            # PJC: Furthermore, if there is any formatted code with encoded attributes,
+            # e.g. < changed to &lt; then that also needs to be escaped because it is
+            # also getting decoded.
+            ctx['body'] = html_assists.escape_encoded_pre_text(ctx['body'])
 
         ensuredir(path.dirname(outfilename))
         self.dump_context(ctx, outfilename)
diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index e3bcf49..b1e6204 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -22,7 +22,6 @@ def section_links(parent_entry: element.Tag, list_entry: element.Tag) -> dict:
                 "items": section_result
             }
 
-
 def convert_tag_to_link(item_entry: element.Tag) -> dict:
     # The a tag is a child of the li tag
     a_tag = item_entry.contents[0]
@@ -32,6 +31,48 @@ def convert_tag_to_link(item_entry: element.Tag) -> dict:
             "href": clean_href(a_tag["href"])
         }
 
+def process_section(result, child, section, pending_divider) -> bool:
+    if section != []:
+                # Yes, there is, so we have a sub-section. If we've got some content
+                # already, add a divider.
+        if result != []:
+            result.append({ "type": "divider" })
+                # Now append the current page and the section links. The
+                # ul tag is the only child returned, hence [0]
+        result.append(section_links(child, section[0]))
+                # If there are any "normal" entries after this section
+                # add a divider first
+        pending_divider = True
+    else:
+        if pending_divider:
+            result.append({ "type": "divider" })
+            pending_divider = False
+        result.append(convert_tag_to_link(child))
+
+def process_ul_children(result, ul):
+    pending_divider = False
+    for child in ul.children:
+        if type(child) is element.Tag and child.name == "li":
+            # Is there a new unordered list within this section?
+            section = child.find_all("ul", limit=1)
+            pending_divider = process_section(result, child, section, pending_divider)
+
+def convert_nav_html_to_json(html: str) -> list:
+    result = []
+    soup = BeautifulSoup(html, "html.parser")
+
+    # Start with the unordered list
+    ul = soup.ul
+    # Iterate through list items
+    while ul is not None:
+        process_ul_children(result, ul)
+        while True:
+            ul = ul.next_sibling
+            if ul is None or type(ul) is element.Tag:
+                break
+            # Not an acceptable type - loop and get the next sibling
+    return result
+
 def escape_encoded_alt_text(html: str) -> str:
     edited = False
     soup = BeautifulSoup(html, "html.parser")
@@ -50,38 +91,27 @@ def escape_encoded_alt_text(html: str) -> str:
         html = str(soup)
     return html
 
-def convert_nav_html_to_json(html: str) -> list:
-    result = []
+def escape_encoded_pre_text(html: str) -> str:
+    print("escape_encoded_pre_text")
+    edited = False
     soup = BeautifulSoup(html, "html.parser")
+    spans = soup.find_all('span')
+    for span in spans:
+        classes = span["class"]
+        matched_pre = False
+        for this_class in classes:
+            if this_class == "pre":
+                matched_pre = True
+        if matched_pre:
+            # At this point, Beautiful Soup has done what a browser does - decode
+            # any encoded attributes. So we need to re-encode the string, see if
+            # there are any ampersands and, if so, re-encode them again.
+            interim = escape(span.string)
+            if interim.find("&") != -1:
+                span.string = escape(interim)
+                edited = True
 
-    # Start with the unordered list
-    ul = soup.ul
-    pending_divider = False
-    # Iterate through list items
-    while ul is not None:
-        for child in ul.children:
-            if type(child) is element.Tag and child.name == "li":
-                # Is there a new unordered list within this section?
-                section = child.find_all("ul", limit=1)
-                if section != []:
-                    # Yes, there is, so we have a sub-section. If we've got some content
-                    # already, add a divider.
-                    if result != []:
-                        result.append({ "type": "divider" })
-                    # Now append the current page and the section links. The
-                    # ul tag is the only child returned, hence [0]
-                    result.append(section_links(child, section[0]))
-                    # If there are any "normal" entries after this section
-                    # add a divider first
-                    pending_divider = True
-                else:
-                    if pending_divider:
-                        result.append({ "type": "divider" })
-                        pending_divider = False
-                    result.append(convert_tag_to_link(child))
-        while True:
-            ul = ul.next_sibling
-            if ul is None or type(ul) is element.Tag:
-                break
-            # Not an acceptable type - loop and get the next sibling
-    return result
+    if edited:
+        html = str(soup)
+        print(html)
+    return html

From c47f1eb651401db3f0230dfa8ab20758ae116baf Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Thu, 24 Oct 2024 13:55:30 +0100
Subject: [PATCH 17/20] Fix bugs

---
 sphinxcontrib/serializinghtml/__init__.py     |  2 +-
 sphinxcontrib/serializinghtml/html_assists.py | 19 +++++++++++--------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index bc83403..545fce0 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0+Linaro-241024'
+__version__ = '2.0.0+Linaro-241024a'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))
diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index b1e6204..36f5fc1 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -91,18 +91,22 @@ def escape_encoded_alt_text(html: str) -> str:
         html = str(soup)
     return html
 
+def matched_pre(span) -> bool:
+    """ Check if this span is specifying the "pre" class """
+    if "class" not in span:
+        return False
+    classes = span["class"]
+    for this_class in classes:
+        if this_class == "pre":
+            return True
+    return False
+
 def escape_encoded_pre_text(html: str) -> str:
-    print("escape_encoded_pre_text")
     edited = False
     soup = BeautifulSoup(html, "html.parser")
     spans = soup.find_all('span')
     for span in spans:
-        classes = span["class"]
-        matched_pre = False
-        for this_class in classes:
-            if this_class == "pre":
-                matched_pre = True
-        if matched_pre:
+        if matched_pre(span):
             # At this point, Beautiful Soup has done what a browser does - decode
             # any encoded attributes. So we need to re-encode the string, see if
             # there are any ampersands and, if so, re-encode them again.
@@ -113,5 +117,4 @@ def escape_encoded_pre_text(html: str) -> str:
 
     if edited:
         html = str(soup)
-        print(html)
     return html

From 05dd739f49a9ededa88b9f800e1396d4f6c932ca Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Mon, 28 Oct 2024 09:20:26 +0000
Subject: [PATCH 18/20] Fix pre handling

---
 sphinxcontrib/serializinghtml/__init__.py     |  3 +--
 sphinxcontrib/serializinghtml/html_assists.py | 27 ++++++-------------
 2 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index 545fce0..36d5a22 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -23,7 +23,7 @@ def dumps(self, obj: Any, *args: Any, **kwargs: Any) -> str | bytes: ...
         def load(self, file: Any, *args: Any, **kwargs: Any) -> Any: ...
         def loads(self, data: Any, *args: Any, **kwargs: Any) -> Any: ...
 
-__version__ = '2.0.0+Linaro-241024a'
+__version__ = '2.0.0+Linaro-241028'
 __version_info__ = (2, 0, 0)
 
 package_dir = path.abspath(path.dirname(__file__))
@@ -104,7 +104,6 @@ def dump_context(self, context: dict[str, Any], filename: str | os.PathLike[str]
 
     def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'page.html',
                     outfilename: str | None = None, event_arg: Any = None) -> None:
-        print(f"handle_page: {pagename}")
         ctx['current_page_name'] = pagename
         ctx.setdefault('pathto', lambda p: p)
         self.add_sidebars(pagename, ctx)
diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index 36f5fc1..c818c5e 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -91,29 +91,18 @@ def escape_encoded_alt_text(html: str) -> str:
         html = str(soup)
     return html
 
-def matched_pre(span) -> bool:
-    """ Check if this span is specifying the "pre" class """
-    if "class" not in span:
-        return False
-    classes = span["class"]
-    for this_class in classes:
-        if this_class == "pre":
-            return True
-    return False
-
 def escape_encoded_pre_text(html: str) -> str:
     edited = False
     soup = BeautifulSoup(html, "html.parser")
-    spans = soup.find_all('span')
+    spans = soup.find_all('span', class_="pre")
     for span in spans:
-        if matched_pre(span):
-            # At this point, Beautiful Soup has done what a browser does - decode
-            # any encoded attributes. So we need to re-encode the string, see if
-            # there are any ampersands and, if so, re-encode them again.
-            interim = escape(span.string)
-            if interim.find("&") != -1:
-                span.string = escape(interim)
-                edited = True
+        # At this point, Beautiful Soup has done what a browser does - decode
+        # any encoded attributes. So we need to re-encode the string, see if
+        # there are any ampersands and, if so, re-encode them again.
+        interim = escape(span.string)
+        if interim.find("&") != -1:
+            span.string = escape(interim)
+            edited = True
 
     if edited:
         html = str(soup)

From 26cd446e097f3b5f494268ee24be09b4e6f269e9 Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Tue, 29 Oct 2024 15:27:01 +0000
Subject: [PATCH 19/20] Return updated divider flag

---
 sphinxcontrib/serializinghtml/html_assists.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index c818c5e..fb015da 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -48,6 +48,7 @@ def process_section(result, child, section, pending_divider) -> bool:
             result.append({ "type": "divider" })
             pending_divider = False
         result.append(convert_tag_to_link(child))
+    return pending_divider
 
 def process_ul_children(result, ul):
     pending_divider = False

From 2aa3106f04801a07aee5f8b887e73f1670ccd5b6 Mon Sep 17 00:00:00 2001
From: Philip Colmer <philip.colmer@linaro.org>
Date: Thu, 8 May 2025 14:47:53 +0100
Subject: [PATCH 20/20] Add support to map external links to relative local
 links

---
 sphinxcontrib/serializinghtml/__init__.py     | 12 ++++++++++++
 sphinxcontrib/serializinghtml/html_assists.py | 17 +++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/sphinxcontrib/serializinghtml/__init__.py b/sphinxcontrib/serializinghtml/__init__.py
index 36d5a22..c44e1f7 100644
--- a/sphinxcontrib/serializinghtml/__init__.py
+++ b/sphinxcontrib/serializinghtml/__init__.py
@@ -81,6 +81,15 @@ def init(self) -> None:
         self.init_css_files()
         self.init_js_files()
         self.use_index = self.get_builder_config('use_index', 'html')
+        #
+        # PJC: New configuration to allow mapping of external links to
+        # relative Hub links.
+        link_mappings = None
+        try:
+            link_mappings = self.get_builder_config('link_mappings', 'html')
+        except AttributeError:
+            pass
+        self.link_mappings = link_mappings
 
     def get_target_uri(self, docname: str, typ: str | None = None) -> str:
         if docname == 'index':
@@ -146,6 +155,9 @@ def handle_page(self, pagename: str, ctx: dict[str, Any], templatename: str = 'p
             # e.g. < changed to &lt; then that also needs to be escaped because it is
             # also getting decoded.
             ctx['body'] = html_assists.escape_encoded_pre_text(ctx['body'])
+            # PJC: Go through the body, looking for any <a> tags to see if they
+            # need to be re-mapped to a local Hub path.
+            ctx['body'] = html_assists.rewrite_hub_links(ctx['body'], self.link_mappings)
 
         ensuredir(path.dirname(outfilename))
         self.dump_context(ctx, outfilename)
diff --git a/sphinxcontrib/serializinghtml/html_assists.py b/sphinxcontrib/serializinghtml/html_assists.py
index fb015da..1a1940e 100644
--- a/sphinxcontrib/serializinghtml/html_assists.py
+++ b/sphinxcontrib/serializinghtml/html_assists.py
@@ -108,3 +108,20 @@ def escape_encoded_pre_text(html: str) -> str:
     if edited:
         html = str(soup)
     return html
+
+def rewrite_hub_links(html: str, link_mappings: dict) -> str:
+    edited = False
+    soup = BeautifulSoup(html, "html.parser")
+    links = soup.find_all('a')
+    for link in links:
+        for key in link_mappings:
+            if link['href'].startswith(key):
+                # We have a match, so replace the href with the new one
+                link['href'] = link['href'].replace(key, link_mappings[key])
+                # We also have to remove ".html" from the end of the link
+                link['href'] = link['href'].replace(".html", "")
+                edited = True
+
+    if edited:
+        html = str(soup)
+    return html