From 92c1ad1673d679d288e9422a755f641b38754e06 Mon Sep 17 00:00:00 2001 From: marky224 Date: Sat, 2 May 2026 14:04:40 -0500 Subject: [PATCH 1/2] Normalize HAR httpVersion to match the live runner's protocol format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Browser HARs disagree on the httpVersion string format: - Chromium writes 'http/2.0' (lowercase, with .0) - Firefox writes 'HTTP/2.0' (uppercase, with .0) - Some tools use the ALPN identifier 'h2' httpx's response.http_version is always 'HTTP/1.1' or 'HTTP/2' (uppercase, no .0 on h2). The Report's response.protocol field was therefore visually inconsistent depending on whether the surface that produced it was parse_har (passed through verbatim) or the live runner (httpx-formatted). Add _normalize_http_version that maps the common HAR variants to the runner's form and apply it at the parse_har call site. Unknown values pass through unchanged — better to display a surprising value than to misrepresent it. None / empty defaults to HTTP/1.1 to preserve the prior fallback. 11 tests cover the matrix (parametrized): http/2.0, HTTP/2.0, http/2, h2, http/1.1, HTTP/1.1, http/1.0, h3, http/3.0, plus unknown-passthrough and missing-defaults-to-1.1. Cosmetic-only — no Report shape change, schema_version unchanged, no engine logic touched. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 1 + src/api_medic/core/parser.py | 25 +++++++++++++++++- tests/unit/test_parser.py | 49 ++++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9b84b0..17b629e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ All notable changes to api-medic are documented here. The format follows [Keep a - Report action buttons (`Re-run`, `Export markdown`) are now wired up — they previously rendered but did nothing on click. `Share report` is intentionally hidden in v1: it requires persistence, which is out of scope. - Lambda `/api/analyze` now returns 400 with a useful `detail` for malformed HAR entries (missing `request.method`, missing `request.url`, non-integer `response.status`, out-of-range status codes) instead of a silent 500. The HAR parser also validates required request fields up front rather than letting `KeyError` escape. This was hitting any browser-extension capture path that produced a partial entry — the panel previously surfaced "Analyze failed: 500 Internal Server Error" with no actionable hint. - HAR parser error messages for `request.method` / `request.url` now distinguish *missing key*, *wrong type*, and *empty string* (previously all three collapsed into one "missing" message), and prefix the failure with `HAR entry[0] ():` when the entry's URL is parseable. With multi-entry HARs the URL is what tells the user which captured request actually failed; the label degrades to `HAR entry[0]:` when the URL itself is the bad field. +- HAR parser now normalizes the response `httpVersion` field to the same form `httpx` produces in the live runner — `http/2.0`, `HTTP/2.0`, and `h2` all become `HTTP/2`; `http/1.1` becomes `HTTP/1.1`. Browser HARs disagree on casing and version notation (Chromium writes `http/2.0` lowercase, Firefox writes `HTTP/2.0` uppercase), which previously produced visually inconsistent Protocol values in the rendered Report depending on which surface produced it. Unknown values pass through unchanged. ## [1.1.0] - 2026-05-01 diff --git a/src/api_medic/core/parser.py b/src/api_medic/core/parser.py index 91515ee..5b8ddaf 100644 --- a/src/api_medic/core/parser.py +++ b/src/api_medic/core/parser.py @@ -19,6 +19,29 @@ from .captured import CapturedRequest, CapturedResponse from .models import TimingBreakdown +# HAR `httpVersion` strings vary by browser: Chromium writes 'http/2.0' lowercase +# with .0, Firefox writes 'HTTP/2.0' uppercase with .0, and some tools use the +# ALPN identifier 'h2'. httpx's response.http_version is always 'HTTP/1.1' or +# 'HTTP/2' (uppercase, no .0 on h2). Normalising on parse keeps the rendered +# Report's Protocol field visually consistent regardless of which surface +# produced it. Unknown values pass through unchanged (key-missing in this map). +_HTTP_VERSION_NORMALIZATIONS = { + "http/1.0": "HTTP/1.0", + "http/1.1": "HTTP/1.1", + "http/2": "HTTP/2", + "http/2.0": "HTTP/2", + "h2": "HTTP/2", + "http/3": "HTTP/3", + "http/3.0": "HTTP/3", + "h3": "HTTP/3", +} + + +def _normalize_http_version(raw: Any) -> str: + if not isinstance(raw, str) or not raw: + return "HTTP/1.1" + return _HTTP_VERSION_NORMALIZATIONS.get(raw.strip().lower(), raw) + def parse_har(raw: str | dict[str, Any]) -> CapturedRequest: """Parse a HAR 1.2 archive's first entry into a CapturedRequest. @@ -94,7 +117,7 @@ def parse_har(raw: str | dict[str, Any]) -> CapturedRequest: status_text=str(status_text_raw) if isinstance(status_text_raw, str) else "", headers=resp_headers, body=resp_body, - protocol=str(response_obj.get("httpVersion", "HTTP/1.1")), + protocol=_normalize_http_version(response_obj.get("httpVersion")), ) timing = _timing_from_har(entry.get("timings") or {}) diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py index e16a80d..91e94f2 100644 --- a/tests/unit/test_parser.py +++ b/tests/unit/test_parser.py @@ -284,6 +284,55 @@ def test_chrome_devtools_401_entry_yields_auth_missing(self): ids = [f.id for f in report.findings] assert "auth.missing" in ids + @pytest.mark.parametrize( + "raw_http_version,expected", + [ + ("http/2.0", "HTTP/2"), # Chromium HAR format + ("HTTP/2.0", "HTTP/2"), # Firefox HAR format + ("http/2", "HTTP/2"), + ("h2", "HTTP/2"), # ALPN identifier + ("http/1.1", "HTTP/1.1"), + ("HTTP/1.1", "HTTP/1.1"), + ("http/1.0", "HTTP/1.0"), + ("h3", "HTTP/3"), + ("http/3.0", "HTTP/3"), + ], + ) + def test_normalizes_http_version_to_runner_format(self, raw_http_version, expected): + # Browser HARs disagree on httpVersion casing/formatting (Chromium + # 'http/2.0', Firefox 'HTTP/2.0', some tools use ALPN 'h2'). httpx's + # response.http_version is always 'HTTP/1.1' or 'HTTP/2'. Normalising + # on parse keeps Reports visually consistent across surfaces. + har = _minimal_har() + har["log"]["entries"][0]["response"] = { + "status": 200, + "headers": [], + "httpVersion": raw_http_version, + } + cap = parse_har(har) + assert cap.response is not None + assert cap.response.protocol == expected + + def test_unknown_http_version_passes_through(self): + # Don't misrepresent values we don't recognise — a surprising display + # is better than a wrong one. + har = _minimal_har() + har["log"]["entries"][0]["response"] = { + "status": 200, + "headers": [], + "httpVersion": "QUIC-v3-experimental", + } + cap = parse_har(har) + assert cap.response is not None + assert cap.response.protocol == "QUIC-v3-experimental" + + def test_missing_http_version_defaults_to_http_1_1(self): + har = _minimal_har() + har["log"]["entries"][0]["response"] = {"status": 200, "headers": []} + cap = parse_har(har) + assert cap.response is not None + assert cap.response.protocol == "HTTP/1.1" + class TestParseCurl: def test_post_with_data_and_headers(self): From 4b9f926cab8ee6d3f242c520c94d1333121086cc Mon Sep 17 00:00:00 2001 From: marky224 Date: Sat, 2 May 2026 14:08:46 -0500 Subject: [PATCH 2/2] Tighten _normalize_http_version: handle whitespace-only and strip pass-through Self-review of #3 turned up an edge: whitespace-only input (' ') fell through the truthy check, then the lookup missed (strip+lower made it ''), and the function returned the raw whitespace string. That would have rendered as literal whitespace in the Protocol cell of the Report. Now the strip happens first; whitespace-only input takes the same HTTP/1.1 default as empty/None. Pass-through for unknown values also returns the stripped form so leading/trailing whitespace doesn't leak through that path either. Three additional tests cover whitespace-only input, trailing whitespace on a recognised value, and trailing whitespace on the unknown-value pass-through. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/api_medic/core/parser.py | 7 +++++-- tests/unit/test_parser.py | 37 ++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/api_medic/core/parser.py b/src/api_medic/core/parser.py index 5b8ddaf..1ca803b 100644 --- a/src/api_medic/core/parser.py +++ b/src/api_medic/core/parser.py @@ -38,9 +38,12 @@ def _normalize_http_version(raw: Any) -> str: - if not isinstance(raw, str) or not raw: + if not isinstance(raw, str): return "HTTP/1.1" - return _HTTP_VERSION_NORMALIZATIONS.get(raw.strip().lower(), raw) + stripped = raw.strip() + if not stripped: + return "HTTP/1.1" + return _HTTP_VERSION_NORMALIZATIONS.get(stripped.lower(), stripped) def parse_har(raw: str | dict[str, Any]) -> CapturedRequest: diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py index 91e94f2..f97ea64 100644 --- a/tests/unit/test_parser.py +++ b/tests/unit/test_parser.py @@ -333,6 +333,43 @@ def test_missing_http_version_defaults_to_http_1_1(self): assert cap.response is not None assert cap.response.protocol == "HTTP/1.1" + def test_whitespace_only_http_version_defaults_to_http_1_1(self): + # Whitespace-only is treated like empty rather than passed through — + # otherwise the Report would render leaky whitespace in the Protocol cell. + har = _minimal_har() + har["log"]["entries"][0]["response"] = { + "status": 200, + "headers": [], + "httpVersion": " ", + } + cap = parse_har(har) + assert cap.response is not None + assert cap.response.protocol == "HTTP/1.1" + + def test_normalizes_recognized_http_version_with_trailing_whitespace(self): + har = _minimal_har() + har["log"]["entries"][0]["response"] = { + "status": 200, + "headers": [], + "httpVersion": " http/2.0 ", + } + cap = parse_har(har) + assert cap.response is not None + assert cap.response.protocol == "HTTP/2" + + def test_unknown_http_version_pass_through_strips_whitespace(self): + # Unknown values pass through, but stripped — leaking surrounding + # whitespace into the Protocol display would be cosmetically broken. + har = _minimal_har() + har["log"]["entries"][0]["response"] = { + "status": 200, + "headers": [], + "httpVersion": " QUIC-experimental ", + } + cap = parse_har(har) + assert cap.response is not None + assert cap.response.protocol == "QUIC-experimental" + class TestParseCurl: def test_post_with_data_and_headers(self):