From 735dcf7af118fbd8af4a2e6c572db74132ef77e0 Mon Sep 17 00:00:00 2001 From: Jean Date: Wed, 17 Jun 2026 14:45:14 +0200 Subject: [PATCH] fix(audit): catch tokenize.TokenError, not the nonexistent TokenizeError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - tokenize has no TokenizeError — the three except clauses (claims, suppressions, dup) raised AttributeError on a tokenization failure instead of catching it - Surfaced by pyrefly; add a regression test feeding unterminated source - Bump plugin.json to 1.23.3 --- .claude-plugin/plugin.json | 2 +- src/forge/audit/claims.py | 2 +- src/forge/audit/dup.py | 2 +- src/forge/audit/suppressions.py | 2 +- tests/audit/test_dup.py | 12 ++++++++++++ 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index b14009c..086eb55 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "forge", - "version": "1.23.2", + "version": "1.23.3", "description": "Automate Python CI/CD and code-quality standards — deterministic CLIs + a drop-in pre-commit hook, runnable with or without an AI agent. This optional Claude Code plugin orchestrates them; the gate is the CLI, never the model.", "author": { "name": "Jean Simonnet", diff --git a/src/forge/audit/claims.py b/src/forge/audit/claims.py index 8ff3dec..535e8c7 100644 --- a/src/forge/audit/claims.py +++ b/src/forge/audit/claims.py @@ -318,7 +318,7 @@ def _comment_findings( evidence=(comment_body[:COMMENT_PREVIEW],), ), ) - except tokenize.TokenizeError as exc: + except tokenize.TokenError as exc: logger.debug("tokenize failed in %s: %s", rel, exc) return findings diff --git a/src/forge/audit/dup.py b/src/forge/audit/dup.py index 67bc19e..20e8e04 100644 --- a/src/forge/audit/dup.py +++ b/src/forge/audit/dup.py @@ -177,7 +177,7 @@ def _tokenize_body(source: str) -> list[str]: tokens.append(tok.string if keyword.iskeyword(tok.string) else "ID") else: tokens.append(tok.string) - except tokenize.TokenizeError: + except tokenize.TokenError: logger.debug("tokenize failed on a snippet — skipping") return [] return tokens diff --git a/src/forge/audit/suppressions.py b/src/forge/audit/suppressions.py index 13205ff..263468b 100644 --- a/src/forge/audit/suppressions.py +++ b/src/forge/audit/suppressions.py @@ -283,7 +283,7 @@ def _iter_comments(text: str) -> list[tuple[int, str]]: seen_lines.add(line_no) if 1 <= line_no <= len(source_lines): pairs.append((line_no, source_lines[line_no - 1])) - except tokenize.TokenizeError as exc: + except tokenize.TokenError as exc: logger.debug("tokenize failed: %s", exc) return pairs diff --git a/tests/audit/test_dup.py b/tests/audit/test_dup.py index 8ceab1e..b0e5418 100644 --- a/tests/audit/test_dup.py +++ b/tests/audit/test_dup.py @@ -107,6 +107,18 @@ def test_tokenize_body_collapses_strings_and_numbers() -> None: assert "42" not in tokens +def test_tokenize_body_returns_empty_on_token_error() -> None: + """Unterminated source is swallowed and yields ``[]``, not an exception. + + Regression: the handler caught ``tokenize.TokenizeError`` — a name that + does not exist in the stdlib (the real exception is + ``tokenize.TokenError``) — so a tokenization failure raised + ``AttributeError`` instead of being caught. The unclosed parenthesis + below makes ``tokenize`` raise ``TokenError`` at EOF. + """ + assert _tokenize_body("x = (1 +") == [] + + def test_shingles_returns_empty_when_shorter_than_k() -> None: """A token sequence below ``k`` length yields no shingles.""" assert _shingles(["a", "b"], k=5) == frozenset()