From 7a7302ed72709e23a22d5fcfbc6030797450f342 Mon Sep 17 00:00:00 2001
From: marky224 <markymark0224@gmail.com>
Date: Sat, 2 May 2026 13:27:42 -0500
Subject: [PATCH] Sharpen HAR parser error messages for method and url
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

parse_har previously collapsed three failure modes into one message
("missing"): the key was absent, the value was wrong type, or the
value was an empty string. They point at different upstream bugs
(non-conformant HAR tool, corrupt capture, schema violation), so
they now produce three distinct messages each for both required
fields:

  HAR entry[0] (<url>): request.method is missing.
  HAR entry[0] (<url>): request.method must be a string, got int.
  HAR entry[0] (<url>): request.method is empty.

The same split applies to request.url. Real-world HARs commonly
contain dozens of entries; v1 only parses entries[0], so the URL
parenthetical is what tells the user which captured request actually
failed in their archive. When the URL itself is the bad field the
label degrades to "HAR entry[0]:" without a parenthetical.

Tests cover all six failure paths plus the label-with-URL and
label-fallback cases. Lambda handler test still passes — its
assertion only checks for the substring "method" in the 400 detail.

CHANGELOG [Unreleased] / Fixed updated.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                 |  1 +
 src/api_medic/core/parser.py | 33 ++++++++++++++++++-----
 tests/unit/test_parser.py    | 52 +++++++++++++++++++++++++++++++++---
 3 files changed, 77 insertions(+), 9 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 58c5c0d..a9b84b0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@ All notable changes to api-medic are documented here. The format follows [Keep a
 
 - Report action buttons (`Re-run`, `Export markdown`) are now wired up — they previously rendered but did nothing on click. `Share report` is intentionally hidden in v1: it requires persistence, which is out of scope.
 - Lambda `/api/analyze` now returns 400 with a useful `detail` for malformed HAR entries (missing `request.method`, missing `request.url`, non-integer `response.status`, out-of-range status codes) instead of a silent 500. The HAR parser also validates required request fields up front rather than letting `KeyError` escape. This was hitting any browser-extension capture path that produced a partial entry — the panel previously surfaced "Analyze failed: 500 Internal Server Error" with no actionable hint.
+- HAR parser error messages for `request.method` / `request.url` now distinguish *missing key*, *wrong type*, and *empty string* (previously all three collapsed into one "missing" message), and prefix the failure with `HAR entry[0] (<url>):` when the entry's URL is parseable. With multi-entry HARs the URL is what tells the user which captured request actually failed; the label degrades to `HAR entry[0]:` when the URL itself is the bad field.
 
 ## [1.1.0] - 2026-05-01
 
diff --git a/src/api_medic/core/parser.py b/src/api_medic/core/parser.py
index a8bdd26..91515ee 100644
--- a/src/api_medic/core/parser.py
+++ b/src/api_medic/core/parser.py
@@ -40,12 +40,33 @@ def parse_har(raw: str | dict[str, Any]) -> CapturedRequest:
     request = entry["request"]
     if not isinstance(request, dict):
         raise ValueError("HAR entry's 'request' must be an object.")
-    method = request.get("method")
-    if not isinstance(method, str) or not method:
-        raise ValueError("HAR entry's request is missing 'method'.")
-    url = request.get("url")
-    if not isinstance(url, str) or not url:
-        raise ValueError("HAR entry's request is missing 'url'.")
+
+    # Best-effort URL extraction so field-error messages can identify which
+    # captured request failed. Real-world HARs have many entries; v1 only
+    # parses entries[0] but the URL is what tells the user *which* request
+    # that was. Degrades to "HAR entry[0]" when url itself is the bad field.
+    maybe_url = request.get("url")
+    label = (
+        f"HAR entry[0] ({maybe_url})"
+        if isinstance(maybe_url, str) and maybe_url
+        else "HAR entry[0]"
+    )
+
+    if "method" not in request:
+        raise ValueError(f"{label}: request.method is missing.")
+    method = request["method"]
+    if not isinstance(method, str):
+        raise ValueError(f"{label}: request.method must be a string, got {type(method).__name__}.")
+    if not method:
+        raise ValueError(f"{label}: request.method is empty.")
+
+    if "url" not in request:
+        raise ValueError("HAR entry[0]: request.url is missing.")
+    url = request["url"]
+    if not isinstance(url, str):
+        raise ValueError(f"HAR entry[0]: request.url must be a string, got {type(url).__name__}.")
+    if not url:
+        raise ValueError("HAR entry[0]: request.url is empty.")
 
     request_headers = _har_headers(request.get("headers"))
     body_text = (request.get("postData") or {}).get("text", "")
diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py
index 7942f3c..e16a80d 100644
--- a/tests/unit/test_parser.py
+++ b/tests/unit/test_parser.py
@@ -127,13 +127,19 @@ def test_missing_request_method_raises_value_error(self):
                 ],
             },
         }
-        with pytest.raises(ValueError, match="missing 'method'"):
+        with pytest.raises(ValueError, match=r"request\.method is missing"):
             parse_har(har)
 
     def test_empty_request_method_raises_value_error(self):
         har = _minimal_har()
         har["log"]["entries"][0]["request"]["method"] = ""
-        with pytest.raises(ValueError, match="missing 'method'"):
+        with pytest.raises(ValueError, match=r"request\.method is empty"):
+            parse_har(har)
+
+    def test_wrong_type_request_method_raises_value_error(self):
+        har = _minimal_har()
+        har["log"]["entries"][0]["request"]["method"] = 42
+        with pytest.raises(ValueError, match=r"request\.method must be a string, got int"):
             parse_har(har)
 
     def test_missing_request_url_raises_value_error(self):
@@ -148,7 +154,47 @@ def test_missing_request_url_raises_value_error(self):
                 ],
             },
         }
-        with pytest.raises(ValueError, match="missing 'url'"):
+        with pytest.raises(ValueError, match=r"request\.url is missing"):
+            parse_har(har)
+
+    def test_empty_request_url_raises_value_error(self):
+        har = _minimal_har()
+        har["log"]["entries"][0]["request"]["url"] = ""
+        with pytest.raises(ValueError, match=r"request\.url is empty"):
+            parse_har(har)
+
+    def test_wrong_type_request_url_raises_value_error(self):
+        har = _minimal_har()
+        har["log"]["entries"][0]["request"]["url"] = ["not", "a", "string"]
+        with pytest.raises(ValueError, match=r"request\.url must be a string, got list"):
+            parse_har(har)
+
+    def test_error_message_includes_entry_url_when_method_invalid(self):
+        # When the URL is parseable but the method is not, the user wants to
+        # know *which* of the (potentially many) HAR entries failed. v1 only
+        # parses entries[0], so the URL is the load-bearing identifier.
+        har = _minimal_har()
+        har["log"]["entries"][0]["request"]["url"] = "https://api.example.com/foo"
+        har["log"]["entries"][0]["request"]["method"] = ""
+        with pytest.raises(
+            ValueError,
+            match=r"HAR entry\[0\] \(https://api\.example\.com/foo\): "
+            r"request\.method is empty",
+        ):
+            parse_har(har)
+
+    def test_error_label_falls_back_when_url_also_missing(self):
+        # Method *and* url both missing: label can't echo a URL, so the
+        # message degrades to "HAR entry[0]:" without a URL parenthetical.
+        har = {
+            "log": {
+                "version": "1.2",
+                "entries": [
+                    {"request": {"headers": []}, "response": {"status": 200}},
+                ],
+            },
+        }
+        with pytest.raises(ValueError, match=r"^HAR entry\[0\]: request\.method is missing"):
             parse_har(har)
 
     def test_non_dict_request_raises_value_error(self):