From 1069f60466e94a163ef1eba3379a1623ae6d7e71 Mon Sep 17 00:00:00 2001 From: Samuel Weldemariam Date: Wed, 6 May 2026 12:51:18 -0500 Subject: [PATCH 1/8] hrw4u: fail fast when the antlr generator is missing Adds a check-antlr Make target that errors with an install hint (brew/dnf/apt/bootstrap.sh) before attempting parser generation, instead of surfacing a cryptic rule failure deep in gen-fwd on a fresh checkout. --- tools/hrw4u/Makefile | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/hrw4u/Makefile b/tools/hrw4u/Makefile index 7b929356af3..966a21c49ba 100644 --- a/tools/hrw4u/Makefile +++ b/tools/hrw4u/Makefile @@ -104,10 +104,21 @@ INIT_HRW4U=$(PKG_DIR_HRW4U)/__init__.py INIT_U4WRH=$(PKG_DIR_U4WRH)/__init__.py INIT_LSP=$(PKG_DIR_LSP)/__init__.py -.PHONY: all gen gen-fwd gen-inv copy-src test clean build package env setup-deps activate update coverage coverage-open +.PHONY: all gen gen-fwd gen-inv copy-src check-antlr test clean build package env setup-deps activate update coverage coverage-open all: gen +# Fail fast with a helpful message if the ANTLR generator is not on PATH. +# Install is intentionally left to the user / bootstrap.sh — installers vary +# by OS (brew on macOS, dnf/apt on Linux, CI images pin their own). +check-antlr: + @command -v $(ANTLR) >/dev/null 2>&1 || { \ + echo "Error: '$(ANTLR)' not found on PATH."; \ + echo "Install it first (e.g. 'brew install antlr' on macOS),"; \ + echo "or run ./bootstrap.sh which also sets up Python dependencies."; \ + exit 1; \ + } + # Orchestrate generation then copy sources and drop __main__.py in each package gen: gen-fwd gen-inv copy-src $(MAIN_HRW4U) $(MAIN_U4WRH) $(MAIN_LSP) $(INIT_HRW4U) $(INIT_U4WRH) $(INIT_LSP) @@ -135,7 +146,7 @@ $(INIT_LSP): | $(PKG_DIR_LSP) touch $@ # Generate forward parser/lexer into build/hrw4u and build/hrw4u-lsp -gen-fwd: $(ANTLR_FILES_FWD) +gen-fwd: check-antlr $(ANTLR_FILES_FWD) $(ANTLR_FILES_FWD): $(GRAMMAR_FWD) @mkdir -p $(PKG_DIR_HRW4U) From 90a618ad155f063e089d43b02b8b6092248cfeb4 Mon Sep 17 00:00:00 2001 From: Samuel Weldemariam Date: Wed, 6 May 2026 12:53:37 -0500 Subject: [PATCH 2/8] hrw4u: extract error rendering into pluggable formatters Introduces an ErrorFormatter ABC with PlainText, JSON, and Markdown implementations. ErrorCollector now delegates rendering instead of building the output string inline, with PlainText as the default so existing callers see byte-for-byte identical output. The JSON schema is versioned (v1) and emitted on a single line so bulk-mode runs produce NDJSON on stderr. --- tools/hrw4u/Makefile | 1 + tools/hrw4u/src/errors.py | 49 +++---- tools/hrw4u/src/formatters.py | 241 +++++++++++++++++++++++++++++++ tools/hrw4u/tests/test_errors.py | 159 +++++++++++++++++++- 4 files changed, 418 insertions(+), 32 deletions(-) create mode 100644 tools/hrw4u/src/formatters.py diff --git a/tools/hrw4u/Makefile b/tools/hrw4u/Makefile index 966a21c49ba..c5c98733758 100644 --- a/tools/hrw4u/Makefile +++ b/tools/hrw4u/Makefile @@ -36,6 +36,7 @@ SCRIPT_KG=scripts/hrw4u-kg SHARED_FILES=src/common.py \ src/debugging.py \ src/errors.py \ + src/formatters.py \ src/states.py \ src/tables.py \ src/types.py \ diff --git a/tools/hrw4u/src/errors.py b/tools/hrw4u/src/errors.py index d4001b0b922..95444fab9d9 100644 --- a/tools/hrw4u/src/errors.py +++ b/tools/hrw4u/src/errors.py @@ -19,10 +19,13 @@ import re from dataclasses import dataclass -from typing import Final +from typing import Final, TYPE_CHECKING from antlr4.error.ErrorListener import ErrorListener +if TYPE_CHECKING: + from hrw4u.formatters import ErrorFormatter + _TOKEN_NAMES: Final[dict[str, str]] = { 'QUALIFIED_IDENT': "qualified name (e.g. 'Namespace::Name')", 'IDENT': 'identifier', @@ -111,6 +114,7 @@ def __init__(self, filename: str, line: int, column: int, message: str, source_l self.filename = filename self.line = line self.column = column + self.message = message self.source_line = source_line @@ -166,11 +170,12 @@ def from_ctx(cls, filename: str, ctx: object, message: str) -> Warning: class ErrorCollector: """Collects multiple syntax errors and warnings for comprehensive reporting.""" - def __init__(self, max_errors: int = 5) -> None: + def __init__(self, max_errors: int = 5, formatter: "ErrorFormatter | None" = None) -> None: self.errors: list[Hrw4uSyntaxError] = [] self.max_errors = max_errors self.warnings: list[Warning] = [] self._sandbox_message: str | None = None + self._formatter = formatter def add_error(self, error: Hrw4uSyntaxError) -> None: self.errors.append(error) @@ -194,35 +199,17 @@ def has_warnings(self) -> bool: return bool(self.warnings) def get_error_summary(self) -> str: - if not self.errors and not self.warnings: - return "No errors found." - - lines: list[str] = [] - - if self.errors: - count = len(self.errors) - lines.append(f"Found {count} error{'s' if count > 1 else ''}:") - - for error in self.errors: - lines.append(str(error)) - if hasattr(error, '__notes__') and error.__notes__: - lines.extend(error.__notes__) - - if self.warnings: - if self.errors: - lines.append("") - count = len(self.warnings) - lines.append(f"{count} warning{'s' if count > 1 else ''}:") - lines.extend(w.format() for w in self.warnings) - - if self.at_limit: - lines.append(f"(stopped after {self.max_errors} errors)") - - if self._sandbox_message: - lines.append("") - lines.append(self._sandbox_message) - - return "\n".join(lines) + formatter = self._formatter + if formatter is None: + from hrw4u.formatters import PlainTextFormatter + formatter = PlainTextFormatter() + return formatter.format_errors( + self.errors, + self.warnings, + self._sandbox_message, + self.at_limit, + self.max_errors, + ) class CollectingErrorListener(ErrorListener): diff --git a/tools/hrw4u/src/formatters.py b/tools/hrw4u/src/formatters.py new file mode 100644 index 00000000000..9abbe47cf85 --- /dev/null +++ b/tools/hrw4u/src/formatters.py @@ -0,0 +1,241 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Error/warning output formatters for hrw4u and u4wrh. + +Columns are 0-based across every format (matching the internal representation +used by the ANTLR-driven listeners). The JSON schema is versioned so downstream +consumers (UIs, CI tools) can guard against future changes. +""" + +from __future__ import annotations + +import json +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from hrw4u.errors import Hrw4uSyntaxError, Warning + +JSON_SCHEMA_VERSION = 1 + + +class ErrorFormatter(ABC): + """Renders a collected batch of errors and warnings into a single string.""" + + @abstractmethod + def format_errors( + self, + errors: list["Hrw4uSyntaxError"], + warnings: list["Warning"], + sandbox_message: str | None, + at_limit: bool, + max_errors: int, + ) -> str: + ... + + +class PlainTextFormatter(ErrorFormatter): + """Current CLI output: human-readable diagnostics with caret pointers.""" + + def format_errors( + self, + errors: list["Hrw4uSyntaxError"], + warnings: list["Warning"], + sandbox_message: str | None, + at_limit: bool, + max_errors: int, + ) -> str: + if not errors and not warnings: + return "No errors found." + + lines: list[str] = [] + + if errors: + count = len(errors) + lines.append(f"Found {count} error{'s' if count > 1 else ''}:") + + for error in errors: + lines.append(str(error)) + notes = getattr(error, '__notes__', None) + if notes: + lines.extend(notes) + + if warnings: + if errors: + lines.append("") + count = len(warnings) + lines.append(f"{count} warning{'s' if count > 1 else ''}:") + lines.extend(w.format() for w in warnings) + + if at_limit: + lines.append(f"(stopped after {max_errors} errors)") + + if sandbox_message: + lines.append("") + lines.append(sandbox_message) + + return "\n".join(lines) + + +class JSONFormatter(ErrorFormatter): + """Machine-readable output. Emits a single compact JSON object per call. + + Suitable for NDJSON pipelines: in bulk mode each input file produces exactly + one object on one line of stderr. + """ + + def format_errors( + self, + errors: list["Hrw4uSyntaxError"], + warnings: list["Warning"], + sandbox_message: str | None, + at_limit: bool, + max_errors: int, + ) -> str: + payload = { + "version": JSON_SCHEMA_VERSION, + "errors": [_diag_to_dict(e, "error") for e in errors], + "warnings": [_diag_to_dict(w, "warning") for w in warnings], + "summary": { + "error_count": len(errors), + "warning_count": len(warnings), + "truncated": at_limit, + "max_errors": max_errors, + }, + "sandbox_message": sandbox_message, + } + return json.dumps(payload, separators=(",", ":"), ensure_ascii=False) + + +class MarkdownFormatter(ErrorFormatter): + """Markdown report suitable for PR comments, chat, and docs.""" + + def format_errors( + self, + errors: list["Hrw4uSyntaxError"], + warnings: list["Warning"], + sandbox_message: str | None, + at_limit: bool, + max_errors: int, + ) -> str: + if not errors and not warnings: + return "_No errors found._" + + parts: list[str] = [] + parts.append(_markdown_heading(len(errors), len(warnings))) + + for error in errors: + parts.append( + _markdown_diagnostic( + severity="Error", + filename=error.filename, + line=error.line, + column=error.column, + message=_extract_plain_message(error), + source_line=error.source_line, + notes=list(getattr(error, '__notes__', None) or []), + )) + + for warning in warnings: + parts.append( + _markdown_diagnostic( + severity="Warning", + filename=warning.filename, + line=warning.line, + column=warning.column, + message=warning.message, + source_line=warning.source_line, + notes=[], + )) + + if at_limit: + parts.append(f"> _Stopped after {max_errors} errors._") + + if sandbox_message: + parts.append(f"> **Sandbox:** {sandbox_message}") + + return "\n\n".join(parts) + + +def _diag_to_dict(diag: "Hrw4uSyntaxError | Warning", severity: str) -> dict: + notes = list(getattr(diag, '__notes__', None) or []) + message = _extract_plain_message(diag) + return { + "filename": diag.filename, + "line": diag.line, + "column": diag.column, + "severity": severity, + "message": message, + "source_line": diag.source_line, + "notes": notes, + } + + +def _extract_plain_message(diag: "Hrw4uSyntaxError | Warning") -> str: + """Return just the message text, without the file:line:col: prefix or caret art. + + ``Hrw4uSyntaxError`` pre-formats a full diagnostic into ``args[0]``; Warnings + carry the raw message on ``.message``. + """ + message = getattr(diag, 'message', None) + if message is not None: + return message + raw = str(diag.args[0]) if diag.args else "" + header = raw.split("\n", 1)[0] + prefix = f"{diag.filename}:{diag.line}:{diag.column}: error: " + if header.startswith(prefix): + return header[len(prefix):] + return header + + +def _markdown_heading(error_count: int, warning_count: int) -> str: + bits: list[str] = [] + if error_count: + bits.append(f"{error_count} error{'s' if error_count != 1 else ''}") + if warning_count: + bits.append(f"{warning_count} warning{'s' if warning_count != 1 else ''}") + return f"## hrw4u: {', '.join(bits)}" if bits else "## hrw4u" + + +def _markdown_diagnostic( + *, + severity: str, + filename: str, + line: int, + column: int, + message: str, + source_line: str, + notes: list[str], +) -> str: + lines = [f"### {severity} — `{filename}:{line}:{column}`", message] + + if source_line: + pointer = f"{' ' * column}^" + code_block = f"```\n{line:4d} | {source_line}\n | {pointer}\n```" + lines.append(code_block) + + for note in notes: + lines.append(f"> {note.strip()}") + + return "\n\n".join(lines) + + +FORMATTERS: dict[str, type[ErrorFormatter]] = { + "plain": PlainTextFormatter, + "json": JSONFormatter, + "markdown": MarkdownFormatter, +} diff --git a/tools/hrw4u/tests/test_errors.py b/tools/hrw4u/tests/test_errors.py index c11e2cba974..de16f04a810 100644 --- a/tools/hrw4u/tests/test_errors.py +++ b/tools/hrw4u/tests/test_errors.py @@ -16,8 +16,10 @@ # limitations under the License. from hrw4u.errors import ErrorCollector, Hrw4uSyntaxError, SymbolResolutionError, \ - ThrowingErrorListener, hrw4u_error, CollectingErrorListener + ThrowingErrorListener, hrw4u_error, CollectingErrorListener, Warning +from hrw4u.formatters import FORMATTERS, JSON_SCHEMA_VERSION, JSONFormatter, MarkdownFormatter, PlainTextFormatter from hrw4u.validation import Validator, ValidatorChain +import json import pytest @@ -323,5 +325,160 @@ def test_quote_if_needed(self): assert Validator.quote_if_needed("has space") == '"has space"' +class TestPlainTextFormatterParity: + """The plain formatter must preserve current CLI output byte-for-byte.""" + + def test_registry_has_expected_formats(self): + assert set(FORMATTERS.keys()) == {"plain", "json", "markdown"} + + def test_empty_returns_no_errors_found(self): + ec = ErrorCollector(formatter=PlainTextFormatter()) + assert ec.get_error_summary() == "No errors found." + + def test_single_error_matches_legacy_shape(self): + ec = ErrorCollector(formatter=PlainTextFormatter()) + err = Hrw4uSyntaxError("f.hrw4u", 1, 4, "oops", "foo bar") + ec.add_error(err) + out = ec.get_error_summary() + assert out.startswith("Found 1 error:\n") + assert "f.hrw4u:1:4: error: oops" in out + assert " 1 | foo bar" in out + + def test_at_limit_marker(self): + ec = ErrorCollector(max_errors=2, formatter=PlainTextFormatter()) + err = Hrw4uSyntaxError("f.hrw4u", 1, 0, "x", "") + ec.add_error(err) + ec.add_error(err) + assert "(stopped after 2 errors)" in ec.get_error_summary() + + def test_sandbox_message_appended(self): + ec = ErrorCollector(formatter=PlainTextFormatter()) + err = Hrw4uSyntaxError("f.hrw4u", 1, 0, "x", "") + ec.add_error(err) + ec.set_sandbox_message("sandbox blocked thing") + assert ec.get_error_summary().endswith("sandbox blocked thing") + + def test_default_formatter_is_plain(self): + """ErrorCollector() with no formatter must produce the legacy output.""" + legacy = ErrorCollector() + custom = ErrorCollector(formatter=PlainTextFormatter()) + err = Hrw4uSyntaxError("f.hrw4u", 2, 3, "m", "src") + err.add_note("hint") + legacy.add_error(err) + custom.add_error(err) + assert legacy.get_error_summary() == custom.get_error_summary() + + +class TestJSONFormatter: + """JSON output is the stable contract for downstream UIs (edgeconf, etc.).""" + + def _collect(self) -> ErrorCollector: + ec = ErrorCollector(formatter=JSONFormatter()) + err = Hrw4uSyntaxError("f.hrw4u", 3, 4, "unexpected '('", "if foo ( {") + err.add_note("hint: try X") + ec.add_error(err) + ec.add_warning(Warning(filename="f.hrw4u", line=7, column=0, message="deprecated", source_line="old;")) + return ec + + def test_output_is_valid_json(self): + payload = json.loads(self._collect().get_error_summary()) + assert payload["version"] == JSON_SCHEMA_VERSION + + def test_error_fields_are_preserved(self): + payload = json.loads(self._collect().get_error_summary()) + err = payload["errors"][0] + assert err["filename"] == "f.hrw4u" + assert err["line"] == 3 + assert err["column"] == 4 + assert err["severity"] == "error" + assert err["message"] == "unexpected '('" + assert err["source_line"] == "if foo ( {" + assert err["notes"] == ["hint: try X"] + + def test_warning_severity_and_message(self): + payload = json.loads(self._collect().get_error_summary()) + w = payload["warnings"][0] + assert w["severity"] == "warning" + assert w["message"] == "deprecated" + assert w["notes"] == [] + + def test_summary_counts_and_truncation(self): + payload = json.loads(self._collect().get_error_summary()) + assert payload["summary"]["error_count"] == 1 + assert payload["summary"]["warning_count"] == 1 + assert payload["summary"]["truncated"] is False + assert payload["summary"]["max_errors"] == 5 + + def test_truncated_flag_flips_at_limit(self): + ec = ErrorCollector(max_errors=2, formatter=JSONFormatter()) + err = Hrw4uSyntaxError("f.hrw4u", 1, 0, "x", "") + ec.add_error(err) + ec.add_error(err) + payload = json.loads(ec.get_error_summary()) + assert payload["summary"]["truncated"] is True + assert payload["summary"]["max_errors"] == 2 + + def test_sandbox_message_is_top_level(self): + ec = self._collect() + ec.set_sandbox_message("sandbox blocked x") + payload = json.loads(ec.get_error_summary()) + assert payload["sandbox_message"] == "sandbox blocked x" + + def test_empty_collector_still_emits_valid_schema(self): + ec = ErrorCollector(formatter=JSONFormatter()) + payload = json.loads(ec.get_error_summary()) + assert payload["errors"] == [] + assert payload["warnings"] == [] + assert payload["sandbox_message"] is None + + def test_single_line_output_for_ndjson(self): + out = self._collect().get_error_summary() + assert "\n" not in out, "JSON output must be single-line for NDJSON streaming" + + +class TestMarkdownFormatter: + """Markdown output is pure markdown — no ANSI, no colors.""" + + def _collect(self) -> ErrorCollector: + ec = ErrorCollector(formatter=MarkdownFormatter()) + err = Hrw4uSyntaxError("f.hrw4u", 3, 4, "unexpected '('", "if foo ( {") + err.add_note("hint: try X") + ec.add_error(err) + return ec + + def test_has_top_level_heading(self): + assert self._collect().get_error_summary().startswith("## hrw4u:") + + def test_error_heading_includes_location(self): + assert "### Error — `f.hrw4u:3:4`" in self._collect().get_error_summary() + + def test_contains_fenced_code_block_with_caret(self): + md = self._collect().get_error_summary() + assert "```" in md + assert " 3 | if foo ( {" in md + assert "^" in md + + def test_notes_render_as_blockquotes(self): + assert "> hint: try X" in self._collect().get_error_summary() + + def test_empty_collector_friendly_message(self): + ec = ErrorCollector(formatter=MarkdownFormatter()) + assert ec.get_error_summary() == "_No errors found._" + + def test_no_source_line_skips_code_block(self): + ec = ErrorCollector(formatter=MarkdownFormatter()) + ec.add_error(Hrw4uSyntaxError("f.hrw4u", 0, 0, "file not found", "")) + md = ec.get_error_summary() + assert "```" not in md + assert "file not found" in md + + def test_at_limit_marker(self): + ec = ErrorCollector(max_errors=2, formatter=MarkdownFormatter()) + err = Hrw4uSyntaxError("f.hrw4u", 1, 0, "x", "src") + ec.add_error(err) + ec.add_error(err) + assert "Stopped after 2 errors" in ec.get_error_summary() + + if __name__ == "__main__": pytest.main([__file__, "-v"]) From 70975dad2ea62a81fd5f3b4ea99a556e7c6e1cbb Mon Sep 17 00:00:00 2001 From: Samuel Weldemariam Date: Wed, 6 May 2026 12:54:04 -0500 Subject: [PATCH 3/8] hrw4u: add --error-format flag for json and markdown output Wires the formatter abstraction into the CLI with a new --error-format {plain,json,markdown} option (default: plain, so existing consumers are unaffected). Non-syntax errors (file I/O, argument errors) now flow through the same formatter path via new emit_fatal_message / emit_fatal_error helpers, so downstream tools see one stable schema regardless of where the error came from. --- tools/hrw4u/src/common.py | 101 ++++++++++++++++++++++++---------- tools/hrw4u/tests/test_cli.py | 62 +++++++++++++++++++++ 2 files changed, 135 insertions(+), 28 deletions(-) diff --git a/tools/hrw4u/src/common.py b/tools/hrw4u/src/common.py index 680a4f94429..9db40feb064 100644 --- a/tools/hrw4u/src/common.py +++ b/tools/hrw4u/src/common.py @@ -27,6 +27,7 @@ from antlr4 import InputStream, CommonTokenStream from hrw4u.errors import Hrw4uSyntaxError, ThrowingErrorListener, ErrorCollector, CollectingErrorListener +from hrw4u.formatters import FORMATTERS from hrw4u.types import MagicStrings @@ -112,6 +113,43 @@ def fatal(message: str) -> NoReturn: sys.exit(1) +def _build_formatter(error_format: str): + """Instantiate the configured error formatter, falling back to plain.""" + return FORMATTERS.get(error_format, FORMATTERS["plain"])() + + +def emit_fatal_message(error_format: str, message: str, filename: str = SystemDefaults.DEFAULT_FILENAME) -> NoReturn: + """Emit a non-syntax error (I/O, argument) via the chosen formatter and exit. + + Plain mode preserves the legacy bare-string output. Structured formats wrap + the message as a synthetic diagnostic so downstream consumers always see the + same schema regardless of where the error originated. + """ + if error_format == 'plain': + print(message, file=sys.stderr) + else: + err = Hrw4uSyntaxError(filename, 0, 0, message, "") + collector = ErrorCollector(formatter=_build_formatter(error_format)) + collector.add_error(err) + print(collector.get_error_summary(), file=sys.stderr) + sys.exit(1) + + +def emit_fatal_error(error_format: str, error: Hrw4uSyntaxError) -> NoReturn: + """Emit a single Hrw4uSyntaxError via the chosen formatter and exit. + + Plain mode keeps the legacy ``str(error)`` output (no ``Found 1 error:`` + prefix) so existing CLI consumers see byte-identical output. + """ + if error_format == 'plain': + print(str(error), file=sys.stderr) + else: + collector = ErrorCollector(formatter=_build_formatter(error_format)) + collector.add_error(error) + print(collector.get_error_summary(), file=sys.stderr) + sys.exit(1) + + def create_base_parser(description: str) -> tuple[argparse.ArgumentParser, argparse._MutuallyExclusiveGroup]: """Create base argument parser with common options.""" parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawDescriptionHelpFormatter) @@ -147,13 +185,14 @@ def create_parse_tree( parser_class: type[ParserProtocol], error_prefix: str, collect_errors: bool = True, - max_errors: int = 5) -> tuple[Any, ParserProtocol, ErrorCollector | None]: + max_errors: int = 5, + error_format: str = "plain") -> tuple[Any, ParserProtocol, ErrorCollector | None]: """Create ANTLR parse tree from input content with optional error collection.""" input_stream = InputStream(content) error_collector = None if collect_errors: - error_collector = ErrorCollector(max_errors=max_errors) + error_collector = ErrorCollector(max_errors=max_errors, formatter=_build_formatter(error_format)) error_listener = CollectingErrorListener(filename=filename, error_collector=error_collector) else: error_listener = ThrowingErrorListener(filename=filename) @@ -181,7 +220,7 @@ def create_parse_tree( error_collector.add_error(e) return None, parser_obj, error_collector else: - fatal(str(e)) + emit_fatal_error(error_format, e) except Exception as e: if collect_errors: if error_collector: @@ -189,7 +228,7 @@ def create_parse_tree( error_collector.add_error(syntax_error) return None, parser_obj, error_collector else: - fatal(f"{filename}:0:0 - {error_prefix} error: {e}") + emit_fatal_message(error_format, f"{filename}:0:0 - {error_prefix} error: {e}", filename=filename) def generate_output( @@ -233,7 +272,9 @@ def generate_output( syntax_error.add_note(note) error_collector.add_error(syntax_error) else: - fatal(str(e)) + visitor_err = e if isinstance(e, Hrw4uSyntaxError) else Hrw4uSyntaxError( + filename, 0, 0, f"Visitor error: {e}", "") + emit_fatal_error(getattr(args, 'error_format', 'plain'), visitor_err) if error_collector and (error_collector.has_errors() or error_collector.has_warnings()): print(error_collector.get_error_summary(), file=sys.stderr) @@ -289,6 +330,16 @@ def run_main( default=5, dest="max_errors", help="Maximum number of errors to report before stopping (default: 5; ignored with --stop-on-error)") + parser.add_argument( + "--error-format", + choices=sorted(FORMATTERS.keys()), + default="plain", + dest="error_format", + help=( + "Format used for error and warning output on stderr (default: plain). " + "'json' emits one compact JSON object per input (NDJSON-friendly in bulk mode); " + "'markdown' emits a rendered report suitable for PR comments and chat. " + "Columns are always 0-based.")) if add_args is not None: add_args(parser, output_group) @@ -309,20 +360,19 @@ def run_main( try: content = pre_process(content, filename, args) except Hrw4uSyntaxError as e: - print(str(e), file=sys.stderr) - sys.exit(1) + emit_fatal_error(args.error_format, e) tree, parser_obj, error_collector = create_parse_tree( - content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors) + content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors, + args.error_format) generate_output(tree, parser_obj, visitor_class, filename, args, error_collector, extra_kwargs) return if any(':' in f for f in args.files): for pair in args.files: if ':' not in pair: - print( - f"Error: Mixed formats not allowed. All files must use 'input:output' format for bulk compilation.", - file=sys.stderr) - sys.exit(1) + emit_fatal_message( + args.error_format, + "Error: Mixed formats not allowed. All files must use 'input:output' format for bulk compilation.") input_path, output_path = pair.split(':', 1) @@ -331,20 +381,18 @@ def run_main( content = input_file.read() filename = input_path except FileNotFoundError: - print(f"Error: Input file '{input_path}' not found", file=sys.stderr) - sys.exit(1) + emit_fatal_message(args.error_format, f"Error: Input file '{input_path}' not found", filename=input_path) except Exception as e: - print(f"Error reading '{input_path}': {e}", file=sys.stderr) - sys.exit(1) + emit_fatal_message(args.error_format, f"Error reading '{input_path}': {e}", filename=input_path) if pre_process is not None: try: content = pre_process(content, filename, args) except Hrw4uSyntaxError as e: - print(str(e), file=sys.stderr) - sys.exit(1) + emit_fatal_error(args.error_format, e) tree, parser_obj, error_collector = create_parse_tree( - content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors) + content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors, + args.error_format) try: with open(output_path, 'w', encoding='utf-8') as output_file: @@ -355,8 +403,7 @@ def run_main( finally: sys.stdout = original_stdout except Exception as e: - print(f"Error writing to '{output_path}': {e}", file=sys.stderr) - sys.exit(1) + emit_fatal_message(args.error_format, f"Error writing to '{output_path}': {e}", filename=output_path) else: for i, input_path in enumerate(args.files): if i > 0: @@ -367,19 +414,17 @@ def run_main( content = input_file.read() filename = input_path except FileNotFoundError: - print(f"Error: Input file '{input_path}' not found", file=sys.stderr) - sys.exit(1) + emit_fatal_message(args.error_format, f"Error: Input file '{input_path}' not found", filename=input_path) except Exception as e: - print(f"Error reading '{input_path}': {e}", file=sys.stderr) - sys.exit(1) + emit_fatal_message(args.error_format, f"Error reading '{input_path}': {e}", filename=input_path) if pre_process is not None: try: content = pre_process(content, filename, args) except Hrw4uSyntaxError as e: - print(str(e), file=sys.stderr) - sys.exit(1) + emit_fatal_error(args.error_format, e) tree, parser_obj, error_collector = create_parse_tree( - content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors) + content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors, + args.error_format) generate_output(tree, parser_obj, visitor_class, filename, args, error_collector, extra_kwargs) diff --git a/tools/hrw4u/tests/test_cli.py b/tools/hrw4u/tests/test_cli.py index 30afb30a740..f2b1431fe0e 100644 --- a/tools/hrw4u/tests/test_cli.py +++ b/tools/hrw4u/tests/test_cli.py @@ -16,6 +16,7 @@ # limitations under the License. from __future__ import annotations +import json import subprocess import sys import tempfile @@ -182,3 +183,64 @@ def test_u4wrh_bulk_mode(tmp_path: Path) -> None: assert out2.exists() assert "X-Test" in out1.read_text() assert "404" in out2.read_text() + + +def test_cli_error_format_json_on_parse_error(tmp_path: Path) -> None: + """With --error-format json, stderr must be a single JSON object matching the schema.""" + bad = tmp_path / "bad.hrw4u" + bad.write_text("REMAP { this is not valid syntax ( {\n") + + result = run_hrw4u(["--error-format", "json", str(bad)]) + + assert result.returncode != 0 or result.stderr + payload = json.loads(result.stderr.strip().splitlines()[-1]) + assert payload["version"] == 1 + assert payload["summary"]["error_count"] >= 1 + err = payload["errors"][0] + for field in ("filename", "line", "column", "severity", "message", "source_line", "notes"): + assert field in err + + +def test_cli_error_format_json_on_missing_file() -> None: + """File-I/O errors must also be wrapped in the JSON envelope.""" + result = run_hrw4u(["--error-format", "json", "nonexistent_file.hrw4u"]) + + assert result.returncode != 0 + payload = json.loads(result.stderr.strip().splitlines()[-1]) + assert payload["version"] == 1 + assert payload["summary"]["error_count"] == 1 + assert "not found" in payload["errors"][0]["message"] + + +def test_cli_error_format_markdown_on_parse_error(tmp_path: Path) -> None: + """Markdown format must include heading, fenced code block, and location.""" + bad = tmp_path / "bad.hrw4u" + bad.write_text("REMAP { this is not valid syntax ( {\n") + + result = run_hrw4u(["--error-format", "markdown", str(bad)]) + + assert "## hrw4u:" in result.stderr + assert "### Error" in result.stderr + assert "```" in result.stderr + + +def test_cli_default_error_format_is_plain(tmp_path: Path) -> None: + """Omitting --error-format must leave the legacy plain-text output unchanged.""" + bad = tmp_path / "bad.hrw4u" + bad.write_text("REMAP { this is not valid syntax ( {\n") + + result = run_hrw4u([str(bad)]) + + assert "Found" in result.stderr and "error" in result.stderr + assert "## hrw4u" not in result.stderr + assert not result.stderr.strip().startswith("{") + + +def test_cli_help_lists_error_format_flag() -> None: + """--help must advertise the new flag.""" + result = run_hrw4u(["--help"]) + + assert result.returncode == 0 + assert "--error-format" in result.stdout + for choice in ("plain", "json", "markdown"): + assert choice in result.stdout From 7dd083344c2c070b707444ea7eaf51db894421f8 Mon Sep 17 00:00:00 2001 From: Samuel Weldemariam Date: Wed, 6 May 2026 13:02:43 -0500 Subject: [PATCH 4/8] hrw4u: omit 'Found 1 error:' preamble for single-error output When only one error is produced, the 'Found 1 error:' summary line adds noise without adding information. Skip it in that case and keep the 'Found N errors:' header for counts of two or more. --- tools/hrw4u/src/formatters.py | 3 ++- tools/hrw4u/tests/test_cli.py | 2 +- tools/hrw4u/tests/test_errors.py | 14 +++++++++++--- tools/hrw4u/tests/test_units.py | 2 +- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/tools/hrw4u/src/formatters.py b/tools/hrw4u/src/formatters.py index 9abbe47cf85..7fb5417b15c 100644 --- a/tools/hrw4u/src/formatters.py +++ b/tools/hrw4u/src/formatters.py @@ -66,7 +66,8 @@ def format_errors( if errors: count = len(errors) - lines.append(f"Found {count} error{'s' if count > 1 else ''}:") + if count > 1: + lines.append(f"Found {count} errors:") for error in errors: lines.append(str(error)) diff --git a/tools/hrw4u/tests/test_cli.py b/tools/hrw4u/tests/test_cli.py index f2b1431fe0e..886728de052 100644 --- a/tools/hrw4u/tests/test_cli.py +++ b/tools/hrw4u/tests/test_cli.py @@ -231,7 +231,7 @@ def test_cli_default_error_format_is_plain(tmp_path: Path) -> None: result = run_hrw4u([str(bad)]) - assert "Found" in result.stderr and "error" in result.stderr + assert ": error:" in result.stderr assert "## hrw4u" not in result.stderr assert not result.stderr.strip().startswith("{") diff --git a/tools/hrw4u/tests/test_errors.py b/tools/hrw4u/tests/test_errors.py index de16f04a810..d3c096078cc 100644 --- a/tools/hrw4u/tests/test_errors.py +++ b/tools/hrw4u/tests/test_errors.py @@ -335,15 +335,23 @@ def test_empty_returns_no_errors_found(self): ec = ErrorCollector(formatter=PlainTextFormatter()) assert ec.get_error_summary() == "No errors found." - def test_single_error_matches_legacy_shape(self): + def test_single_error_omits_found_preamble(self): + """A single error should not be prefixed with 'Found 1 error:'.""" ec = ErrorCollector(formatter=PlainTextFormatter()) err = Hrw4uSyntaxError("f.hrw4u", 1, 4, "oops", "foo bar") ec.add_error(err) out = ec.get_error_summary() - assert out.startswith("Found 1 error:\n") - assert "f.hrw4u:1:4: error: oops" in out + assert not out.startswith("Found") + assert out.startswith("f.hrw4u:1:4: error: oops") assert " 1 | foo bar" in out + def test_multiple_errors_include_found_preamble(self): + """Two or more errors keep the 'Found N errors:' summary line.""" + ec = ErrorCollector(formatter=PlainTextFormatter()) + ec.add_error(Hrw4uSyntaxError("f.hrw4u", 1, 0, "a", "")) + ec.add_error(Hrw4uSyntaxError("f.hrw4u", 2, 0, "b", "")) + assert ec.get_error_summary().startswith("Found 2 errors:\n") + def test_at_limit_marker(self): ec = ErrorCollector(max_errors=2, formatter=PlainTextFormatter()) err = Hrw4uSyntaxError("f.hrw4u", 1, 0, "x", "") diff --git a/tools/hrw4u/tests/test_units.py b/tools/hrw4u/tests/test_units.py index 162681ce49e..1058ff1ab50 100644 --- a/tools/hrw4u/tests/test_units.py +++ b/tools/hrw4u/tests/test_units.py @@ -95,7 +95,7 @@ def test_error_collector_basic(self): error_summary = self.error_collector.get_error_summary() assert "Test error" in error_summary - assert "Found 1 error:" in error_summary + assert "Found" not in error_summary def test_error_collector_multiple_errors(self): error1 = Hrw4uSyntaxError("test1.hrw4u", 1, 0, "Error 1", "line 1") From 6c38b52a58b24a5667beaa144e8640ae2e23c3b7 Mon Sep 17 00:00:00 2001 From: Samuel Weldemariam Date: Fri, 8 May 2026 14:16:59 -0500 Subject: [PATCH 5/8] hrw4u: reflow trailing-comma signatures so yapf can collapse them A trailing comma on the last parameter forced yapf to keep one argument per line, which clashes with the project's 132-column horizontal style. Drop the trailing commas in the new formatter signatures, call sites, and dict literals so yapf collapses them. --- tools/hrw4u/src/common.py | 3 +- tools/hrw4u/src/errors.py | 8 +---- tools/hrw4u/src/formatters.py | 58 +++++++++-------------------------- 3 files changed, 16 insertions(+), 53 deletions(-) diff --git a/tools/hrw4u/src/common.py b/tools/hrw4u/src/common.py index 9db40feb064..8defcbc3739 100644 --- a/tools/hrw4u/src/common.py +++ b/tools/hrw4u/src/common.py @@ -362,8 +362,7 @@ def run_main( except Hrw4uSyntaxError as e: emit_fatal_error(args.error_format, e) tree, parser_obj, error_collector = create_parse_tree( - content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors, - args.error_format) + content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error, args.max_errors, args.error_format) generate_output(tree, parser_obj, visitor_class, filename, args, error_collector, extra_kwargs) return diff --git a/tools/hrw4u/src/errors.py b/tools/hrw4u/src/errors.py index 95444fab9d9..4af26ff3691 100644 --- a/tools/hrw4u/src/errors.py +++ b/tools/hrw4u/src/errors.py @@ -203,13 +203,7 @@ def get_error_summary(self) -> str: if formatter is None: from hrw4u.formatters import PlainTextFormatter formatter = PlainTextFormatter() - return formatter.format_errors( - self.errors, - self.warnings, - self._sandbox_message, - self.at_limit, - self.max_errors, - ) + return formatter.format_errors(self.errors, self.warnings, self._sandbox_message, self.at_limit, self.max_errors) class CollectingErrorListener(ErrorListener): diff --git a/tools/hrw4u/src/formatters.py b/tools/hrw4u/src/formatters.py index 7fb5417b15c..72c324b0a02 100644 --- a/tools/hrw4u/src/formatters.py +++ b/tools/hrw4u/src/formatters.py @@ -38,13 +38,8 @@ class ErrorFormatter(ABC): @abstractmethod def format_errors( - self, - errors: list["Hrw4uSyntaxError"], - warnings: list["Warning"], - sandbox_message: str | None, - at_limit: bool, - max_errors: int, - ) -> str: + self, errors: list["Hrw4uSyntaxError"], warnings: list["Warning"], sandbox_message: str | None, at_limit: bool, + max_errors: int) -> str: ... @@ -52,13 +47,8 @@ class PlainTextFormatter(ErrorFormatter): """Current CLI output: human-readable diagnostics with caret pointers.""" def format_errors( - self, - errors: list["Hrw4uSyntaxError"], - warnings: list["Warning"], - sandbox_message: str | None, - at_limit: bool, - max_errors: int, - ) -> str: + self, errors: list["Hrw4uSyntaxError"], warnings: list["Warning"], sandbox_message: str | None, at_limit: bool, + max_errors: int) -> str: if not errors and not warnings: return "No errors found." @@ -100,13 +90,8 @@ class JSONFormatter(ErrorFormatter): """ def format_errors( - self, - errors: list["Hrw4uSyntaxError"], - warnings: list["Warning"], - sandbox_message: str | None, - at_limit: bool, - max_errors: int, - ) -> str: + self, errors: list["Hrw4uSyntaxError"], warnings: list["Warning"], sandbox_message: str | None, at_limit: bool, + max_errors: int) -> str: payload = { "version": JSON_SCHEMA_VERSION, "errors": [_diag_to_dict(e, "error") for e in errors], @@ -115,9 +100,9 @@ def format_errors( "error_count": len(errors), "warning_count": len(warnings), "truncated": at_limit, - "max_errors": max_errors, + "max_errors": max_errors }, - "sandbox_message": sandbox_message, + "sandbox_message": sandbox_message } return json.dumps(payload, separators=(",", ":"), ensure_ascii=False) @@ -126,13 +111,8 @@ class MarkdownFormatter(ErrorFormatter): """Markdown report suitable for PR comments, chat, and docs.""" def format_errors( - self, - errors: list["Hrw4uSyntaxError"], - warnings: list["Warning"], - sandbox_message: str | None, - at_limit: bool, - max_errors: int, - ) -> str: + self, errors: list["Hrw4uSyntaxError"], warnings: list["Warning"], sandbox_message: str | None, at_limit: bool, + max_errors: int) -> str: if not errors and not warnings: return "_No errors found._" @@ -148,8 +128,7 @@ def format_errors( column=error.column, message=_extract_plain_message(error), source_line=error.source_line, - notes=list(getattr(error, '__notes__', None) or []), - )) + notes=list(getattr(error, '__notes__', None) or []))) for warning in warnings: parts.append( @@ -160,8 +139,7 @@ def format_errors( column=warning.column, message=warning.message, source_line=warning.source_line, - notes=[], - )) + notes=[])) if at_limit: parts.append(f"> _Stopped after {max_errors} errors._") @@ -182,7 +160,7 @@ def _diag_to_dict(diag: "Hrw4uSyntaxError | Warning", severity: str) -> dict: "severity": severity, "message": message, "source_line": diag.source_line, - "notes": notes, + "notes": notes } @@ -213,15 +191,7 @@ def _markdown_heading(error_count: int, warning_count: int) -> str: def _markdown_diagnostic( - *, - severity: str, - filename: str, - line: int, - column: int, - message: str, - source_line: str, - notes: list[str], -) -> str: + *, severity: str, filename: str, line: int, column: int, message: str, source_line: str, notes: list[str]) -> str: lines = [f"### {severity} — `{filename}:{line}:{column}`", message] if source_line: From 5ba939f08e19d5602c6332d5e7cf4f35d8c68f16 Mon Sep 17 00:00:00 2001 From: Samuel Weldemariam Date: Mon, 11 May 2026 09:35:50 -0500 Subject: [PATCH 6/8] hrw4u: extend antlr-on-PATH check to inverse generation gen-fwd already gated on check-antlr, but make gen-inv (and any target depending only on it) still failed with the cryptic original error when the generator was missing. --- tools/hrw4u/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hrw4u/Makefile b/tools/hrw4u/Makefile index c5c98733758..8775258cc16 100644 --- a/tools/hrw4u/Makefile +++ b/tools/hrw4u/Makefile @@ -156,7 +156,7 @@ $(ANTLR_FILES_FWD): $(GRAMMAR_FWD) # LSP no longer generates its own ANTLR files - it imports from hrw4u # Generate inverse parser/lexer into build/u4wrh -gen-inv: $(ANTLR_FILES_INV) +gen-inv: check-antlr $(ANTLR_FILES_INV) $(ANTLR_FILES_INV): $(GRAMMAR_INV) @mkdir -p $(PKG_DIR_U4WRH) From 24552c81d8f1e3b2a2e7630e2db3c883d40f363b Mon Sep 17 00:00:00 2001 From: Samuel Weldemariam Date: Mon, 11 May 2026 09:36:00 -0500 Subject: [PATCH 7/8] hrw4u: annotate _build_formatter return type Restores the typed-signature convention used elsewhere in this module so type-checkers and IDEs can follow the formatter plumbing. --- tools/hrw4u/src/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/hrw4u/src/common.py b/tools/hrw4u/src/common.py index 8defcbc3739..2347b08e3a8 100644 --- a/tools/hrw4u/src/common.py +++ b/tools/hrw4u/src/common.py @@ -27,7 +27,7 @@ from antlr4 import InputStream, CommonTokenStream from hrw4u.errors import Hrw4uSyntaxError, ThrowingErrorListener, ErrorCollector, CollectingErrorListener -from hrw4u.formatters import FORMATTERS +from hrw4u.formatters import FORMATTERS, ErrorFormatter from hrw4u.types import MagicStrings @@ -113,7 +113,7 @@ def fatal(message: str) -> NoReturn: sys.exit(1) -def _build_formatter(error_format: str): +def _build_formatter(error_format: str) -> ErrorFormatter: """Instantiate the configured error formatter, falling back to plain.""" return FORMATTERS.get(error_format, FORMATTERS["plain"])() From 4308a7a2348889077d38b2c9fd1266fd7a7e350f Mon Sep 17 00:00:00 2001 From: Samuel Weldemariam Date: Mon, 11 May 2026 09:36:11 -0500 Subject: [PATCH 8/8] hrw4u: drop duplicated location prefix on parse-time fatal errors The stop-on-error path embedded "{filename}:0:0 - " into the message itself, so JSON/Markdown formatters double-encoded the location (which already lives in the filename/line/column fields). Pass the plain message and let the formatter own the location, matching the collecting-errors branch above. --- tools/hrw4u/src/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hrw4u/src/common.py b/tools/hrw4u/src/common.py index 2347b08e3a8..15f1885f4bd 100644 --- a/tools/hrw4u/src/common.py +++ b/tools/hrw4u/src/common.py @@ -228,7 +228,7 @@ def create_parse_tree( error_collector.add_error(syntax_error) return None, parser_obj, error_collector else: - emit_fatal_message(error_format, f"{filename}:0:0 - {error_prefix} error: {e}", filename=filename) + emit_fatal_message(error_format, f"{error_prefix} error: {e}", filename=filename) def generate_output(