From 98aad97c0a340a28e52d2e547942f950087c1f20 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 26 Apr 2026 11:22:29 +0100 Subject: [PATCH 1/3] Add color to ASTs --- tests/test_web_driver.py | 7 +- web/driver.py | 197 ++++++++++++++++++++++++++++----------- web/index.html | 14 ++- 3 files changed, 159 insertions(+), 59 deletions(-) diff --git a/tests/test_web_driver.py b/tests/test_web_driver.py index 688cfdd..691e718 100644 --- a/tests/test_web_driver.py +++ b/tests/test_web_driver.py @@ -21,8 +21,11 @@ def test_view_tokens_includes_token_names(self) -> None: def test_view_ast_returns_module_dump(self) -> None: rendered = driver.view_ast("x = 1\n", optimize=False) - self.assertIn("Module(", rendered) - self.assertIn("Assign(", rendered) + self.assertTrue(rendered["html"]) + self.assertIn(">Module(", rendered["text"]) + self.assertIn(">Assign(", rendered["text"]) + # Every row maps back to source line 1 via lineno propagation. + self.assertTrue(all(ln == 1 for ln in rendered["lines"] if ln is not None)) def test_view_pseudo_smoke(self) -> None: rendered = driver.view_pseudo("def f(x):\n return x\n\nprint(f(42))\n") diff --git a/web/driver.py b/web/driver.py index 183489c..4e4f6f0 100644 --- a/web/driver.py +++ b/web/driver.py @@ -2,8 +2,10 @@ import ast import dis +import html import io import json +import re import sys import tokenize import traceback @@ -42,67 +44,150 @@ def view_tokens(code: str) -> dict[str, Any]: return _as_view(rows) -def _has_ast_children(node: ast.AST) -> bool: - if isinstance(node, ast.Name): - return False - SENTINEL = object() - for name in node._fields: - value = getattr(node, name, SENTINEL) - if isinstance(value, (list, ast.AST)): - return True - return False - - -def _ast_attr_repr(node: ast.AST, attr: str) -> str: - value = getattr(node, attr, ...) - if isinstance(value, (ast.Load, ast.Store, ast.Del)): - return value.__class__.__name__ - return repr(value) - - -def _dump_ast(tree: ast.AST) -> Iterator[tuple[str, int | None]]: - SENTINEL = object() - indent = " " - - def walk( - node: Any, level: int = 0, last_line: int = 0, prepend: str = "" - ) -> Iterator[tuple[str, int]]: - prefix = f"{indent * level}{prepend}" - if isinstance(node, ast.AST): - fields = node._fields - start = getattr(node, "lineno", last_line) or last_line - if not _has_ast_children(node): - args = ", ".join(f"{n}={_ast_attr_repr(node, n)}" for n in fields) - yield f"{prefix}{node.__class__.__name__}({args})", start - else: - yield f"{prefix}{node.__class__.__name__}()", start - for name in fields: - value = getattr(node, name, SENTINEL) - if value is SENTINEL: - continue - yield from walk(value, level + 1, start, f"{name}=") - elif isinstance(node, list): - if len(node) == 1 and not _has_ast_children(node[0]): - inner = list(walk(node[0], level, last_line, prepend + "[")) - if len(inner) == 1: - text, line = inner[0] - yield text + "]", line - return - yield from inner - else: - yield f"{prefix}[]", last_line - for value in node: - yield from walk(value, level + 1, last_line) - else: - yield f"{prefix}{node!r}", last_line +_ANSI_RE = re.compile(r"\x1b\[([0-9;]*)m") +_LINENO_RE = re.compile(r"\blineno=(\d+)") +_ATTR_ROW_RE = re.compile( + r"^\s*(?:lineno|col_offset|end_lineno|end_col_offset)=\d+" +) +_ANSI_CLASS = { + "36": "ast-node", + "34": "ast-field", + "90": "ast-attribute", + "32": "ast-string", + "33": "ast-number", + "1;34": "ast-keyword", +} - for text, line in walk(tree): - yield text, (line if line and line > 0 else None) + +def _ansi_to_html(s: str) -> str: + out: list[str] = [] + pos = 0 + open_span = False + for m in _ANSI_RE.finditer(s): + out.append(html.escape(s[pos : m.start()])) + if open_span: + out.append("") + open_span = False + code = m.group(1) + cls = _ANSI_CLASS.get(code) if code and code != "0" else None + if cls: + out.append(f'') + open_span = True + pos = m.end() + out.append(html.escape(s[pos:])) + if open_span: + out.append("") + return "".join(out) + + +def _attach_linenos(plain_lines: list[str]) -> list[int | None]: + # `lineno=N` appears as a leaf field row inside the node, not on the + # opener row. So: extract own lineno per row, then propagate bottom-up + # (openers inherit from a child) and top-down (attribute rows inherit + # from the enclosing opener). + n = len(plain_lines) + result: list[int | None] = [None] * n + indents = [len(line) - len(line.lstrip(" ")) for line in plain_lines] + + for i, line in enumerate(plain_lines): + m = _LINENO_RE.search(line) + if m: + result[i] = int(m.group(1)) + + for i in range(n - 1, -1, -1): + if result[i] is not None: + continue + my_indent = indents[i] + for j in range(i + 1, n): + if indents[j] <= my_indent: + break + if indents[j] == my_indent + 4 and result[j] is not None: + result[i] = result[j] + break + + for i in range(n): + if result[i] is not None: + continue + for j in range(i - 1, -1, -1): + if indents[j] < indents[i] and result[j] is not None: + result[i] = result[j] + break + + return result + + +_END_COL_RE = re.compile(r"^\s*end_col_offset=\d+([)\]]*)(,?)\s*$") + + +def _strip_attribute_rows( + plain_lines: list[str], + html_lines: list[str], + lineno_map: list[int | None], +) -> tuple[list[str], list[str], list[int | None]]: + # Drop pure-attribute rows. Only end_col_offset rows carry closing + # punctuation; when several nodes' attribute groups run back-to-back, + # each end_col_offset row contributes its own ")"/"]" chars. Collect + # them all and graft onto the previous kept row, replacing that row's + # trailing field-separator comma. + n = len(plain_lines) + keep = [True] * n + plain_lines = list(plain_lines) + html_lines = list(html_lines) + + i = 0 + while i < n: + if not _ATTR_ROW_RE.match(plain_lines[i]): + i += 1 + continue + start = i + struct = "" + trailing_comma = False + while i < n and _ATTR_ROW_RE.match(plain_lines[i]): + keep[i] = False + m = _END_COL_RE.match(plain_lines[i]) + if m: + struct += m.group(1) + trailing_comma = bool(m.group(2)) + i += 1 + tail = struct + ("," if trailing_comma else "") + prev = start - 1 + if prev >= 0 and tail: + plain_lines[prev] = _replace_trailing_comma(plain_lines[prev], tail) + html_lines[prev] = _replace_trailing_comma(html_lines[prev], tail) + + new_plain = [l for i, l in enumerate(plain_lines) if keep[i]] + new_html = [l for i, l in enumerate(html_lines) if keep[i]] + new_lineno = [l for i, l in enumerate(lineno_map) if keep[i]] + return new_plain, new_html, new_lineno + + +def _replace_trailing_comma(line: str, tail: str) -> str: + rstripped = line.rstrip() + if rstripped.endswith(","): + rstripped = rstripped[:-1] + return rstripped + tail def view_ast(code: str, *, optimize: bool = False) -> dict[str, Any]: tree = ast.parse(code, optimize=1) if optimize else ast.parse(code) - return _as_view(list(_dump_ast(tree))) + colored = ast.dump( + tree, + indent=4, + color=True, + include_attributes=True, + show_empty=True, + ) + plain_lines = _ANSI_RE.sub("", colored).split("\n") + html_lines = [_ansi_to_html(line) for line in colored.split("\n")] + lineno_map = _attach_linenos(plain_lines) + _, html_lines, lineno_map = _strip_attribute_rows( + plain_lines, html_lines, lineno_map + ) + return { + "text": "\n".join(html_lines), + "lines": lineno_map, + "html": True, + } class _PseudoArgResolver(dis.ArgResolver): diff --git a/web/index.html b/web/index.html index a23b275..67e2380 100644 --- a/web/index.html +++ b/web/index.html @@ -132,6 +132,12 @@ .panel > .content .line.highlight { background: rgba(255, 215, 0, 0.35); } + .ast-node { color: #5fc1e0; } + .ast-field { color: #6cb6ff; } + .ast-attribute { color: #8a93a0; } + .ast-string { color: #b5e890; } + .ast-number { color: #f0c674; } + .ast-keyword { color: #b9a0ff; font-weight: bold; } .ace-codoscope-highlight { position: absolute; background: rgba(255, 215, 0, 0.35); @@ -233,13 +239,19 @@

Code Object


                 content.innerHTML = "";
                 const text = typeof data === "string" ? data : data.text;
                 const lines = typeof data === "string" ? [] : (data.lines || []);
+                const isHtml = typeof data === "object" && data !== null && data.html === true;
                 const parts = text.split("\n");
                 const wrap = document.createElement("div");
                 wrap.className = "lines";
                 for (let i = 0; i < parts.length; i++) {
                     const div = document.createElement("div");
                     div.className = "line";
-                    div.textContent = parts[i] === "" ? " " : parts[i];
+                    const part = parts[i] === "" ? " " : parts[i];
+                    if (isHtml) {
+                        div.innerHTML = part;
+                    } else {
+                        div.textContent = part;
+                    }
                     const srcLine = lines[i];
                     if (typeof srcLine === "number" && srcLine > 0) {
                         div.dataset.srcLine = String(srcLine);

From 33e2fb3e63fcc323cfc07282c4f7295bb2b7befa Mon Sep 17 00:00:00 2001
From: Stan Ulbrych 
Date: Sun, 26 Apr 2026 11:42:26 +0100
Subject: [PATCH 2/3] don't add color for 3.14

---
 web/driver.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/web/driver.py b/web/driver.py
index 4e4f6f0..8382e58 100644
--- a/web/driver.py
+++ b/web/driver.py
@@ -155,9 +155,9 @@ def _strip_attribute_rows(
             plain_lines[prev] = _replace_trailing_comma(plain_lines[prev], tail)
             html_lines[prev] = _replace_trailing_comma(html_lines[prev], tail)
 
-    new_plain = [l for i, l in enumerate(plain_lines) if keep[i]]
-    new_html = [l for i, l in enumerate(html_lines) if keep[i]]
-    new_lineno = [l for i, l in enumerate(lineno_map) if keep[i]]
+    new_plain = [line for i, line in enumerate(plain_lines) if keep[i]]
+    new_html = [line for i, line in enumerate(html_lines) if keep[i]]
+    new_lineno = [ln for i, ln in enumerate(lineno_map) if keep[i]]
     return new_plain, new_html, new_lineno
 
 
@@ -170,13 +170,12 @@ def _replace_trailing_comma(line: str, tail: str) -> str:
 
 def view_ast(code: str, *, optimize: bool = False) -> dict[str, Any]:
     tree = ast.parse(code, optimize=1) if optimize else ast.parse(code)
-    colored = ast.dump(
-        tree,
-        indent=4,
-        color=True,
-        include_attributes=True,
-        show_empty=True,
+    dump_kwargs: dict[str, Any] = dict(
+        indent=4, include_attributes=True, show_empty=True
     )
+    if sys.version_info >= (3, 15):
+        dump_kwargs["color"] = True
+    colored = ast.dump(tree, **dump_kwargs)
     plain_lines = _ANSI_RE.sub("", colored).split("\n")
     html_lines = [_ansi_to_html(line) for line in colored.split("\n")]
     lineno_map = _attach_linenos(plain_lines)

From 6d93f68840ce88bf4e1809eec5bec82b8ebf866d Mon Sep 17 00:00:00 2001
From: Stan Ulbrych 
Date: Sun, 26 Apr 2026 11:45:30 +0100
Subject: [PATCH 3/3] pre-commit

---
 web/driver.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/web/driver.py b/web/driver.py
index 8382e58..3750c00 100644
--- a/web/driver.py
+++ b/web/driver.py
@@ -46,9 +46,7 @@ def view_tokens(code: str) -> dict[str, Any]:
 
 _ANSI_RE = re.compile(r"\x1b\[([0-9;]*)m")
 _LINENO_RE = re.compile(r"\blineno=(\d+)")
-_ATTR_ROW_RE = re.compile(
-    r"^\s*(?:lineno|col_offset|end_lineno|end_col_offset)=\d+"
-)
+_ATTR_ROW_RE = re.compile(r"^\s*(?:lineno|col_offset|end_lineno|end_col_offset)=\d+")
 _ANSI_CLASS = {
     "36": "ast-node",
     "34": "ast-field",
@@ -81,10 +79,6 @@ def _ansi_to_html(s: str) -> str:
 
 
 def _attach_linenos(plain_lines: list[str]) -> list[int | None]:
-    # `lineno=N` appears as a leaf field row inside the node, not on the
-    # opener row. So: extract own lineno per row, then propagate bottom-up
-    # (openers inherit from a child) and top-down (attribute rows inherit
-    # from the enclosing opener).
     n = len(plain_lines)
     result: list[int | None] = [None] * n
     indents = [len(line) - len(line.lstrip(" ")) for line in plain_lines]
@@ -124,11 +118,6 @@ def _strip_attribute_rows(
     html_lines: list[str],
     lineno_map: list[int | None],
 ) -> tuple[list[str], list[str], list[int | None]]:
-    # Drop pure-attribute rows. Only end_col_offset rows carry closing
-    # punctuation; when several nodes' attribute groups run back-to-back,
-    # each end_col_offset row contributes its own ")"/"]" chars. Collect
-    # them all and graft onto the previous kept row, replacing that row's
-    # trailing field-separator comma.
     n = len(plain_lines)
     keep = [True] * n
     plain_lines = list(plain_lines)