diff --git a/CHANGELOG.md b/CHANGELOG.md index a9b84b0..17b629e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ All notable changes to api-medic are documented here. The format follows [Keep a - Report action buttons (`Re-run`, `Export markdown`) are now wired up — they previously rendered but did nothing on click. `Share report` is intentionally hidden in v1: it requires persistence, which is out of scope. - Lambda `/api/analyze` now returns 400 with a useful `detail` for malformed HAR entries (missing `request.method`, missing `request.url`, non-integer `response.status`, out-of-range status codes) instead of a silent 500. The HAR parser also validates required request fields up front rather than letting `KeyError` escape. This was hitting any browser-extension capture path that produced a partial entry — the panel previously surfaced "Analyze failed: 500 Internal Server Error" with no actionable hint. - HAR parser error messages for `request.method` / `request.url` now distinguish *missing key*, *wrong type*, and *empty string* (previously all three collapsed into one "missing" message), and prefix the failure with `HAR entry[0] ():` when the entry's URL is parseable. With multi-entry HARs the URL is what tells the user which captured request actually failed; the label degrades to `HAR entry[0]:` when the URL itself is the bad field. +- HAR parser now normalizes the response `httpVersion` field to the same form `httpx` produces in the live runner — `http/2.0`, `HTTP/2.0`, and `h2` all become `HTTP/2`; `http/1.1` becomes `HTTP/1.1`. Browser HARs disagree on casing and version notation (Chromium writes `http/2.0` lowercase, Firefox writes `HTTP/2.0` uppercase), which previously produced visually inconsistent Protocol values in the rendered Report depending on which surface produced it. Unknown values pass through unchanged. ## [1.1.0] - 2026-05-01 diff --git a/src/api_medic/core/parser.py b/src/api_medic/core/parser.py index 91515ee..1ca803b 100644 --- a/src/api_medic/core/parser.py +++ b/src/api_medic/core/parser.py @@ -19,6 +19,32 @@ from .captured import CapturedRequest, CapturedResponse from .models import TimingBreakdown +# HAR `httpVersion` strings vary by browser: Chromium writes 'http/2.0' lowercase +# with .0, Firefox writes 'HTTP/2.0' uppercase with .0, and some tools use the +# ALPN identifier 'h2'. httpx's response.http_version is always 'HTTP/1.1' or +# 'HTTP/2' (uppercase, no .0 on h2). Normalising on parse keeps the rendered +# Report's Protocol field visually consistent regardless of which surface +# produced it. Unknown values pass through unchanged (key-missing in this map). +_HTTP_VERSION_NORMALIZATIONS = { + "http/1.0": "HTTP/1.0", + "http/1.1": "HTTP/1.1", + "http/2": "HTTP/2", + "http/2.0": "HTTP/2", + "h2": "HTTP/2", + "http/3": "HTTP/3", + "http/3.0": "HTTP/3", + "h3": "HTTP/3", +} + + +def _normalize_http_version(raw: Any) -> str: + if not isinstance(raw, str): + return "HTTP/1.1" + stripped = raw.strip() + if not stripped: + return "HTTP/1.1" + return _HTTP_VERSION_NORMALIZATIONS.get(stripped.lower(), stripped) + def parse_har(raw: str | dict[str, Any]) -> CapturedRequest: """Parse a HAR 1.2 archive's first entry into a CapturedRequest. @@ -94,7 +120,7 @@ def parse_har(raw: str | dict[str, Any]) -> CapturedRequest: status_text=str(status_text_raw) if isinstance(status_text_raw, str) else "", headers=resp_headers, body=resp_body, - protocol=str(response_obj.get("httpVersion", "HTTP/1.1")), + protocol=_normalize_http_version(response_obj.get("httpVersion")), ) timing = _timing_from_har(entry.get("timings") or {}) diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py index e16a80d..f97ea64 100644 --- a/tests/unit/test_parser.py +++ b/tests/unit/test_parser.py @@ -284,6 +284,92 @@ def test_chrome_devtools_401_entry_yields_auth_missing(self): ids = [f.id for f in report.findings] assert "auth.missing" in ids + @pytest.mark.parametrize( + "raw_http_version,expected", + [ + ("http/2.0", "HTTP/2"), # Chromium HAR format + ("HTTP/2.0", "HTTP/2"), # Firefox HAR format + ("http/2", "HTTP/2"), + ("h2", "HTTP/2"), # ALPN identifier + ("http/1.1", "HTTP/1.1"), + ("HTTP/1.1", "HTTP/1.1"), + ("http/1.0", "HTTP/1.0"), + ("h3", "HTTP/3"), + ("http/3.0", "HTTP/3"), + ], + ) + def test_normalizes_http_version_to_runner_format(self, raw_http_version, expected): + # Browser HARs disagree on httpVersion casing/formatting (Chromium + # 'http/2.0', Firefox 'HTTP/2.0', some tools use ALPN 'h2'). httpx's + # response.http_version is always 'HTTP/1.1' or 'HTTP/2'. Normalising + # on parse keeps Reports visually consistent across surfaces. + har = _minimal_har() + har["log"]["entries"][0]["response"] = { + "status": 200, + "headers": [], + "httpVersion": raw_http_version, + } + cap = parse_har(har) + assert cap.response is not None + assert cap.response.protocol == expected + + def test_unknown_http_version_passes_through(self): + # Don't misrepresent values we don't recognise — a surprising display + # is better than a wrong one. + har = _minimal_har() + har["log"]["entries"][0]["response"] = { + "status": 200, + "headers": [], + "httpVersion": "QUIC-v3-experimental", + } + cap = parse_har(har) + assert cap.response is not None + assert cap.response.protocol == "QUIC-v3-experimental" + + def test_missing_http_version_defaults_to_http_1_1(self): + har = _minimal_har() + har["log"]["entries"][0]["response"] = {"status": 200, "headers": []} + cap = parse_har(har) + assert cap.response is not None + assert cap.response.protocol == "HTTP/1.1" + + def test_whitespace_only_http_version_defaults_to_http_1_1(self): + # Whitespace-only is treated like empty rather than passed through — + # otherwise the Report would render leaky whitespace in the Protocol cell. + har = _minimal_har() + har["log"]["entries"][0]["response"] = { + "status": 200, + "headers": [], + "httpVersion": " ", + } + cap = parse_har(har) + assert cap.response is not None + assert cap.response.protocol == "HTTP/1.1" + + def test_normalizes_recognized_http_version_with_trailing_whitespace(self): + har = _minimal_har() + har["log"]["entries"][0]["response"] = { + "status": 200, + "headers": [], + "httpVersion": " http/2.0 ", + } + cap = parse_har(har) + assert cap.response is not None + assert cap.response.protocol == "HTTP/2" + + def test_unknown_http_version_pass_through_strips_whitespace(self): + # Unknown values pass through, but stripped — leaking surrounding + # whitespace into the Protocol display would be cosmetically broken. + har = _minimal_har() + har["log"]["entries"][0]["response"] = { + "status": 200, + "headers": [], + "httpVersion": " QUIC-experimental ", + } + cap = parse_har(har) + assert cap.response is not None + assert cap.response.protocol == "QUIC-experimental" + class TestParseCurl: def test_post_with_data_and_headers(self):