From 7a7302ed72709e23a22d5fcfbc6030797450f342 Mon Sep 17 00:00:00 2001 From: marky224 Date: Sat, 2 May 2026 13:27:42 -0500 Subject: [PATCH] Sharpen HAR parser error messages for method and url MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit parse_har previously collapsed three failure modes into one message ("missing"): the key was absent, the value was wrong type, or the value was an empty string. They point at different upstream bugs (non-conformant HAR tool, corrupt capture, schema violation), so they now produce three distinct messages each for both required fields: HAR entry[0] (): request.method is missing. HAR entry[0] (): request.method must be a string, got int. HAR entry[0] (): request.method is empty. The same split applies to request.url. Real-world HARs commonly contain dozens of entries; v1 only parses entries[0], so the URL parenthetical is what tells the user which captured request actually failed in their archive. When the URL itself is the bad field the label degrades to "HAR entry[0]:" without a parenthetical. Tests cover all six failure paths plus the label-with-URL and label-fallback cases. Lambda handler test still passes — its assertion only checks for the substring "method" in the 400 detail. CHANGELOG [Unreleased] / Fixed updated. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 1 + src/api_medic/core/parser.py | 33 ++++++++++++++++++----- tests/unit/test_parser.py | 52 +++++++++++++++++++++++++++++++++--- 3 files changed, 77 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 58c5c0d..a9b84b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ All notable changes to api-medic are documented here. The format follows [Keep a - Report action buttons (`Re-run`, `Export markdown`) are now wired up — they previously rendered but did nothing on click. `Share report` is intentionally hidden in v1: it requires persistence, which is out of scope. - Lambda `/api/analyze` now returns 400 with a useful `detail` for malformed HAR entries (missing `request.method`, missing `request.url`, non-integer `response.status`, out-of-range status codes) instead of a silent 500. The HAR parser also validates required request fields up front rather than letting `KeyError` escape. This was hitting any browser-extension capture path that produced a partial entry — the panel previously surfaced "Analyze failed: 500 Internal Server Error" with no actionable hint. +- HAR parser error messages for `request.method` / `request.url` now distinguish *missing key*, *wrong type*, and *empty string* (previously all three collapsed into one "missing" message), and prefix the failure with `HAR entry[0] ():` when the entry's URL is parseable. With multi-entry HARs the URL is what tells the user which captured request actually failed; the label degrades to `HAR entry[0]:` when the URL itself is the bad field. ## [1.1.0] - 2026-05-01 diff --git a/src/api_medic/core/parser.py b/src/api_medic/core/parser.py index a8bdd26..91515ee 100644 --- a/src/api_medic/core/parser.py +++ b/src/api_medic/core/parser.py @@ -40,12 +40,33 @@ def parse_har(raw: str | dict[str, Any]) -> CapturedRequest: request = entry["request"] if not isinstance(request, dict): raise ValueError("HAR entry's 'request' must be an object.") - method = request.get("method") - if not isinstance(method, str) or not method: - raise ValueError("HAR entry's request is missing 'method'.") - url = request.get("url") - if not isinstance(url, str) or not url: - raise ValueError("HAR entry's request is missing 'url'.") + + # Best-effort URL extraction so field-error messages can identify which + # captured request failed. Real-world HARs have many entries; v1 only + # parses entries[0] but the URL is what tells the user *which* request + # that was. Degrades to "HAR entry[0]" when url itself is the bad field. + maybe_url = request.get("url") + label = ( + f"HAR entry[0] ({maybe_url})" + if isinstance(maybe_url, str) and maybe_url + else "HAR entry[0]" + ) + + if "method" not in request: + raise ValueError(f"{label}: request.method is missing.") + method = request["method"] + if not isinstance(method, str): + raise ValueError(f"{label}: request.method must be a string, got {type(method).__name__}.") + if not method: + raise ValueError(f"{label}: request.method is empty.") + + if "url" not in request: + raise ValueError("HAR entry[0]: request.url is missing.") + url = request["url"] + if not isinstance(url, str): + raise ValueError(f"HAR entry[0]: request.url must be a string, got {type(url).__name__}.") + if not url: + raise ValueError("HAR entry[0]: request.url is empty.") request_headers = _har_headers(request.get("headers")) body_text = (request.get("postData") or {}).get("text", "") diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py index 7942f3c..e16a80d 100644 --- a/tests/unit/test_parser.py +++ b/tests/unit/test_parser.py @@ -127,13 +127,19 @@ def test_missing_request_method_raises_value_error(self): ], }, } - with pytest.raises(ValueError, match="missing 'method'"): + with pytest.raises(ValueError, match=r"request\.method is missing"): parse_har(har) def test_empty_request_method_raises_value_error(self): har = _minimal_har() har["log"]["entries"][0]["request"]["method"] = "" - with pytest.raises(ValueError, match="missing 'method'"): + with pytest.raises(ValueError, match=r"request\.method is empty"): + parse_har(har) + + def test_wrong_type_request_method_raises_value_error(self): + har = _minimal_har() + har["log"]["entries"][0]["request"]["method"] = 42 + with pytest.raises(ValueError, match=r"request\.method must be a string, got int"): parse_har(har) def test_missing_request_url_raises_value_error(self): @@ -148,7 +154,47 @@ def test_missing_request_url_raises_value_error(self): ], }, } - with pytest.raises(ValueError, match="missing 'url'"): + with pytest.raises(ValueError, match=r"request\.url is missing"): + parse_har(har) + + def test_empty_request_url_raises_value_error(self): + har = _minimal_har() + har["log"]["entries"][0]["request"]["url"] = "" + with pytest.raises(ValueError, match=r"request\.url is empty"): + parse_har(har) + + def test_wrong_type_request_url_raises_value_error(self): + har = _minimal_har() + har["log"]["entries"][0]["request"]["url"] = ["not", "a", "string"] + with pytest.raises(ValueError, match=r"request\.url must be a string, got list"): + parse_har(har) + + def test_error_message_includes_entry_url_when_method_invalid(self): + # When the URL is parseable but the method is not, the user wants to + # know *which* of the (potentially many) HAR entries failed. v1 only + # parses entries[0], so the URL is the load-bearing identifier. + har = _minimal_har() + har["log"]["entries"][0]["request"]["url"] = "https://api.example.com/foo" + har["log"]["entries"][0]["request"]["method"] = "" + with pytest.raises( + ValueError, + match=r"HAR entry\[0\] \(https://api\.example\.com/foo\): " + r"request\.method is empty", + ): + parse_har(har) + + def test_error_label_falls_back_when_url_also_missing(self): + # Method *and* url both missing: label can't echo a URL, so the + # message degrades to "HAR entry[0]:" without a URL parenthetical. + har = { + "log": { + "version": "1.2", + "entries": [ + {"request": {"headers": []}, "response": {"status": 200}}, + ], + }, + } + with pytest.raises(ValueError, match=r"^HAR entry\[0\]: request\.method is missing"): parse_har(har) def test_non_dict_request_raises_value_error(self):