Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ All notable changes to api-medic are documented here. The format follows [Keep a
- Report action buttons (`Re-run`, `Export markdown`) are now wired up — they previously rendered but did nothing on click. `Share report` is intentionally hidden in v1: it requires persistence, which is out of scope.
- Lambda `/api/analyze` now returns 400 with a useful `detail` for malformed HAR entries (missing `request.method`, missing `request.url`, non-integer `response.status`, out-of-range status codes) instead of a silent 500. The HAR parser also validates required request fields up front rather than letting `KeyError` escape. This was hitting any browser-extension capture path that produced a partial entry — the panel previously surfaced "Analyze failed: 500 Internal Server Error" with no actionable hint.
- HAR parser error messages for `request.method` / `request.url` now distinguish *missing key*, *wrong type*, and *empty string* (previously all three collapsed into one "missing" message), and prefix the failure with `HAR entry[0] (<url>):` when the entry's URL is parseable. With multi-entry HARs the URL is what tells the user which captured request actually failed; the label degrades to `HAR entry[0]:` when the URL itself is the bad field.
- HAR parser now normalizes the response `httpVersion` field to the same form `httpx` produces in the live runner — `http/2.0`, `HTTP/2.0`, and `h2` all become `HTTP/2`; `http/1.1` becomes `HTTP/1.1`. Browser HARs disagree on casing and version notation (Chromium writes `http/2.0` lowercase, Firefox writes `HTTP/2.0` uppercase), which previously produced visually inconsistent Protocol values in the rendered Report depending on which surface produced it. Unknown values pass through unchanged.

## [1.1.0] - 2026-05-01

Expand Down
28 changes: 27 additions & 1 deletion src/api_medic/core/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,32 @@
from .captured import CapturedRequest, CapturedResponse
from .models import TimingBreakdown

# HAR `httpVersion` strings vary by browser: Chromium writes 'http/2.0' lowercase
# with .0, Firefox writes 'HTTP/2.0' uppercase with .0, and some tools use the
# ALPN identifier 'h2'. httpx's response.http_version is always 'HTTP/1.1' or
# 'HTTP/2' (uppercase, no .0 on h2). Normalising on parse keeps the rendered
# Report's Protocol field visually consistent regardless of which surface
# produced it. Unknown values pass through unchanged (key-missing in this map).
_HTTP_VERSION_NORMALIZATIONS = {
"http/1.0": "HTTP/1.0",
"http/1.1": "HTTP/1.1",
"http/2": "HTTP/2",
"http/2.0": "HTTP/2",
"h2": "HTTP/2",
"http/3": "HTTP/3",
"http/3.0": "HTTP/3",
"h3": "HTTP/3",
}


def _normalize_http_version(raw: Any) -> str:
if not isinstance(raw, str):
return "HTTP/1.1"
stripped = raw.strip()
if not stripped:
return "HTTP/1.1"
return _HTTP_VERSION_NORMALIZATIONS.get(stripped.lower(), stripped)


def parse_har(raw: str | dict[str, Any]) -> CapturedRequest:
"""Parse a HAR 1.2 archive's first entry into a CapturedRequest.
Expand Down Expand Up @@ -94,7 +120,7 @@ def parse_har(raw: str | dict[str, Any]) -> CapturedRequest:
status_text=str(status_text_raw) if isinstance(status_text_raw, str) else "",
headers=resp_headers,
body=resp_body,
protocol=str(response_obj.get("httpVersion", "HTTP/1.1")),
protocol=_normalize_http_version(response_obj.get("httpVersion")),
)

timing = _timing_from_har(entry.get("timings") or {})
Expand Down
86 changes: 86 additions & 0 deletions tests/unit/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,92 @@ def test_chrome_devtools_401_entry_yields_auth_missing(self):
ids = [f.id for f in report.findings]
assert "auth.missing" in ids

@pytest.mark.parametrize(
"raw_http_version,expected",
[
("http/2.0", "HTTP/2"), # Chromium HAR format
("HTTP/2.0", "HTTP/2"), # Firefox HAR format
("http/2", "HTTP/2"),
("h2", "HTTP/2"), # ALPN identifier
("http/1.1", "HTTP/1.1"),
("HTTP/1.1", "HTTP/1.1"),
("http/1.0", "HTTP/1.0"),
("h3", "HTTP/3"),
("http/3.0", "HTTP/3"),
],
)
def test_normalizes_http_version_to_runner_format(self, raw_http_version, expected):
# Browser HARs disagree on httpVersion casing/formatting (Chromium
# 'http/2.0', Firefox 'HTTP/2.0', some tools use ALPN 'h2'). httpx's
# response.http_version is always 'HTTP/1.1' or 'HTTP/2'. Normalising
# on parse keeps Reports visually consistent across surfaces.
har = _minimal_har()
har["log"]["entries"][0]["response"] = {
"status": 200,
"headers": [],
"httpVersion": raw_http_version,
}
cap = parse_har(har)
assert cap.response is not None
assert cap.response.protocol == expected

def test_unknown_http_version_passes_through(self):
# Don't misrepresent values we don't recognise — a surprising display
# is better than a wrong one.
har = _minimal_har()
har["log"]["entries"][0]["response"] = {
"status": 200,
"headers": [],
"httpVersion": "QUIC-v3-experimental",
}
cap = parse_har(har)
assert cap.response is not None
assert cap.response.protocol == "QUIC-v3-experimental"

def test_missing_http_version_defaults_to_http_1_1(self):
har = _minimal_har()
har["log"]["entries"][0]["response"] = {"status": 200, "headers": []}
cap = parse_har(har)
assert cap.response is not None
assert cap.response.protocol == "HTTP/1.1"

def test_whitespace_only_http_version_defaults_to_http_1_1(self):
# Whitespace-only is treated like empty rather than passed through —
# otherwise the Report would render leaky whitespace in the Protocol cell.
har = _minimal_har()
har["log"]["entries"][0]["response"] = {
"status": 200,
"headers": [],
"httpVersion": " ",
}
cap = parse_har(har)
assert cap.response is not None
assert cap.response.protocol == "HTTP/1.1"

def test_normalizes_recognized_http_version_with_trailing_whitespace(self):
har = _minimal_har()
har["log"]["entries"][0]["response"] = {
"status": 200,
"headers": [],
"httpVersion": " http/2.0 ",
}
cap = parse_har(har)
assert cap.response is not None
assert cap.response.protocol == "HTTP/2"

def test_unknown_http_version_pass_through_strips_whitespace(self):
# Unknown values pass through, but stripped — leaking surrounding
# whitespace into the Protocol display would be cosmetically broken.
har = _minimal_har()
har["log"]["entries"][0]["response"] = {
"status": 200,
"headers": [],
"httpVersion": " QUIC-experimental ",
}
cap = parse_har(har)
assert cap.response is not None
assert cap.response.protocol == "QUIC-experimental"


class TestParseCurl:
def test_post_with_data_and_headers(self):
Expand Down
Loading