diff --git a/pyproject.toml b/pyproject.toml
index 7d6f07d..f8ab0b1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,6 @@ dependencies = [
"vl-convert-python",
"selenium",
"cairosvg",
- "maturin"
]
classifiers = [
"Programming Language :: Rust",
@@ -37,5 +36,5 @@ package-data = {"website_diff" = ["*.css", "*.js"]}
[tool.maturin]
features = ["pyo3/extension-module"]
-module-name = "website_diff.htmldiff"
+module-name = "website_diff._htmldiff_rs"
diff --git a/src/lib.rs b/src/lib.rs
index d9ae83b..21f22c3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -15,7 +15,7 @@ fn _htmldiff(a: &str, b: &str) -> String {
}
#[pymodule]
-#[pyo3(name="htmldiff")]
+#[pyo3(name="_htmldiff_rs")]
fn htmldiff(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(_htmldiff, m)?)?;
Ok(())
diff --git a/website_diff/_htmldiff_py.py b/website_diff/_htmldiff_py.py
new file mode 100644
index 0000000..f4bf42a
--- /dev/null
+++ b/website_diff/_htmldiff_py.py
@@ -0,0 +1,186 @@
+"""Pure Python HTML diff — fallback for when the Rust extension is unavailable.
+
+Implements the same HTML tokenizer and Wu-Manber-Myers O(NP) diff algorithm
+that the Rust extension provides, producing byte-identical output.
+"""
+
+import enum
+
+__all__ = ["_htmldiff"]
+
+
+# ---------------------------------------------------------------------------
+# HTML tokenizer (equivalent to src/html.rs)
+# ---------------------------------------------------------------------------
+
+class _Mode(enum.Enum):
+ CHAR = 0
+ TAG = 1
+ WHITESPACE = 2
+
+
+def _split_html(s: str) -> list[str]:
+ words: list[str] = []
+ start = 0
+ mode = _Mode.CHAR
+
+ for i, c in enumerate(s):
+ if mode == _Mode.CHAR:
+ if c == "<":
+ if start != i:
+ words.append(s[start:i])
+ start = i
+ mode = _Mode.TAG
+ elif c.isspace():
+ if start != i:
+ words.append(s[start:i])
+ start = i
+ mode = _Mode.WHITESPACE
+ elif mode == _Mode.TAG:
+ if c == ">":
+ words.append(s[start : i + 1])
+ start = i + 1
+ mode = _Mode.CHAR
+ elif mode == _Mode.WHITESPACE:
+ if c == "<":
+ if start != i:
+ words.append(s[start:i])
+ start = i
+ mode = _Mode.TAG
+ elif not c.isspace():
+ if start != i:
+ words.append(s[start:i])
+ start = i
+ mode = _Mode.CHAR
+
+ if start < len(s):
+ words.append(s[start:])
+ return words
+
+
+# ---------------------------------------------------------------------------
+# Wu-Manber-Myers O(NP) diff (equivalent to src/wu.rs)
+# ---------------------------------------------------------------------------
+
+class _Edit:
+ __slots__ = ()
+
+
+class _Common(_Edit):
+ __slots__ = ("old", "new")
+
+ def __init__(self, old: int, new: int):
+ self.old = old
+ self.new = new
+
+
+class _Add(_Edit):
+ __slots__ = ("new",)
+
+ def __init__(self, new: int):
+ self.new = new
+
+
+class _Delete(_Edit):
+ __slots__ = ("old",)
+
+ def __init__(self, old: int):
+ self.old = old
+
+
+def _diff(a: list[str], b: list[str]) -> list[_Edit]:
+ reverse = len(a) > len(b)
+ if reverse:
+ a, b = b, a
+
+ m = len(a)
+ n = len(b)
+ delta = n - m
+ offset = m + 1
+ sz = m + n + 3
+
+ ids: list[int] = [-1] * sz
+ points: list[tuple[int, int, int]] = [] # (x, y, prev)
+
+ def snake(k: int, fp1: int, fp2: int) -> int:
+ fp = max(fp1, fp2)
+ y = fp
+ x = fp - k
+ while x < m and y < n and a[x] == b[y]:
+ x += 1
+ y += 1
+ ko = k + offset
+ prev = ids[ko - 1] if fp1 >= fp2 else ids[ko + 1]
+ ids[ko] = len(points)
+ points.append((x, y, prev))
+ return y
+
+ fp = [-1] * sz
+ p = -1
+ delta_offset = delta + offset
+ while True:
+ p += 1
+ for k in range(-p, delta):
+ ko = k + offset
+ fp[ko] = snake(k, fp[ko - 1] + 1, fp[ko + 1])
+ for k in range(delta + p, delta, -1):
+ ko = k + offset
+ fp[ko] = snake(k, fp[ko - 1] + 1, fp[ko + 1])
+ fp[delta_offset] = snake(
+ delta, fp[delta_offset - 1] + 1, fp[delta_offset + 1]
+ )
+ if fp[delta_offset] >= n:
+ break
+
+ route: list[tuple[int, int]] = []
+ prev = ids[delta_offset]
+ while prev != -1:
+ px, py, prev = points[prev]
+ route.append((px, py))
+
+ ses: list[_Edit] = []
+ cx, cy = 0, 0
+ for px, py in reversed(route):
+ while cx < px or cy < py:
+ if py + cx > px + cy:
+ ses.append(_Delete(cy) if reverse else _Add(cy))
+ cy += 1
+ elif py + cx < px + cy:
+ ses.append(_Add(cx) if reverse else _Delete(cx))
+ cx += 1
+ else:
+ ses.append(
+ _Common(cy, cx) if reverse else _Common(cx, cy)
+ )
+ cx += 1
+ cy += 1
+ return ses
+
+
+# ---------------------------------------------------------------------------
+# HTML diff builder (equivalent to src/builder.rs)
+# ---------------------------------------------------------------------------
+
+def _htmldiff(old_html: str, new_html: str) -> str:
+ """Return *new_html* with ```` / ```` tags marking changes."""
+ old_words = _split_html(old_html)
+ new_words = _split_html(new_html)
+ edits = _diff(old_words, new_words)
+
+ parts: list[str] = []
+ for edit in edits:
+ if isinstance(edit, _Common):
+ parts.append(old_words[edit.old])
+ elif isinstance(edit, _Add):
+ word = new_words[edit.new]
+ if word.startswith("<") and not word.startswith("
", word, ""])
+ elif isinstance(edit, _Delete):
+ word = old_words[edit.old]
+ if word.startswith("<") and not word.startswith("
", word, ""])
+ return "".join(parts)
diff --git a/website_diff/htmldiff.py b/website_diff/htmldiff.py
new file mode 100644
index 0000000..667fa08
--- /dev/null
+++ b/website_diff/htmldiff.py
@@ -0,0 +1,8 @@
+"""HTML diff — uses the Rust extension if available, otherwise pure Python."""
+
+try:
+ from website_diff._htmldiff_rs import _htmldiff
+except ImportError:
+ from website_diff._htmldiff_py import _htmldiff
+
+__all__ = ["_htmldiff"]