From 5cdbf8f35d3ca0a51c91f45501ca8d9c39c16fb7 Mon Sep 17 00:00:00 2001
From: Zo Bot <github-automation@zo.computer>
Date: Mon, 15 Jun 2026 19:36:35 +0000
Subject: [PATCH 1/2] =?UTF-8?q?raise=20a=20clear=20ValueError=20for=20empt?=
 =?UTF-8?q?y=20CSV=20input=20in=20load=5Fcsv=20=E2=80=94=20when=20a=20user?=
 =?UTF-8?q?=20runs=20csv-diff=20against=20an=20empty=20file,=20csv.reader?=
 =?UTF-8?q?=20returns=20no=20rows=20and=20the=20previous=20code=20let=20St?=
 =?UTF-8?q?opIteration=20bubble=20out=20of=20next(fp),=20producing=20a=20c?=
 =?UTF-8?q?onfusing=20traceback=20at=20the=20top=20of=20the=20call=20stack?=
 =?UTF-8?q?=20with=20no=20indication=20that=20the=20input=20was=20empty;?=
 =?UTF-8?q?=20the=20new=20try/except=20translates=20StopIteration=20into?=
 =?UTF-8?q?=20a=20typed=20ValueError=20with=20a=20descriptive=20message=20?=
 =?UTF-8?q?so=20the=20CLI=20shows=20'CSV=20input=20is=20empty=20(no=20head?=
 =?UTF-8?q?er=20row=20found)'=20and=20downstream=20loaders=20/=20Click=20e?=
 =?UTF-8?q?rror=20handling=20can=20react=20to=20it=20explicitly?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 csv_diff/__init__.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/csv_diff/__init__.py b/csv_diff/__init__.py
index 59a2eaf..4dfd7f4 100644
--- a/csv_diff/__init__.py
+++ b/csv_diff/__init__.py
@@ -15,7 +15,10 @@ def load_csv(fp, key=None, dialect=None):
             # Oh well, we tried. Fallback to the default.
             pass
     fp = csv.reader(fp, dialect=(dialect or "excel"))
-    headings = next(fp)
+    try:
+        headings = next(fp)
+    except StopIteration:
+        raise ValueError("CSV input is empty (no header row found)")
     rows = [dict(zip(headings, line)) for line in fp]
     if key:
         keyfn = lambda r: r[key]

From b6d15a05e97432bbaa3c54c7ee5615fe2234623c Mon Sep 17 00:00:00 2001
From: Zo Bot <github-automation@zo.computer>
Date: Wed, 1 Jul 2026 03:42:10 +0000
Subject: [PATCH 2/2] load_csv: skip blank lines, surface line number on
 row-length and key-column errors

A trailing blank line in a CSV file (the kind GitHub and most editors emit
by default) crashed the diff with a bare KeyError on the key column, far
from the actual problem. csv.reader yields an empty list for a fully-blank
line, and dict(zip(headings, [])) returned {}, so the next line raised
KeyError('a') inside the keyfn lambda. Issue #29.

While reading the file, track the 1-based source line number alongside
each row and use it to surface clear, line-numbered ValueError messages
for the other two error paths that previously leaked as tracebacks:

  - rows with fewer fields than the header (the previous list-comprehension
    silently accepted them and produced dicts with missing keys, then
    crashed inside the keyfn lambda or in compare()'s next(iter(...)));
  - a --key column that isn't in the header (was also a KeyError, this
    time on r[key] inside keyfn).

Trailing and interior blank lines (csv.reader yields [] for them) are
now silently skipped, matching the POSIX text-file convention and the
behaviour of most other CSV tools. Rows with more fields than the header
are still accepted, since the trailing-comma pattern is a real-world way
to express an empty last column; the FIVE test fixture exercises that.

Added five tests in tests/test_csv_diff.py:
  - test_trailing_blank_line_ignored (issue #29 reproducer)
  - test_interior_blank_line_ignored
  - test_trailing_blank_line_with_no_key
  - test_mismatched_row_length_raises_clear_error
  - test_missing_key_column_raises_clear_error

All 29 tests pass (5 new + 24 existing).
---
 csv_diff/__init__.py   | 42 ++++++++++++++++++++++++++++++++++++------
 tests/test_csv_diff.py | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 6 deletions(-)

diff --git a/csv_diff/__init__.py b/csv_diff/__init__.py
index 4dfd7f4..2a710cd 100644
--- a/csv_diff/__init__.py
+++ b/csv_diff/__init__.py
@@ -19,14 +19,44 @@ def load_csv(fp, key=None, dialect=None):
         headings = next(fp)
     except StopIteration:
         raise ValueError("CSV input is empty (no header row found)")
-    rows = [dict(zip(headings, line)) for line in fp]
+    if not headings:
+        raise ValueError("CSV input has an empty header row")
+    rows = {}
+    # Track the 1-based source line number alongside each row so that any
+    # downstream KeyError or value-shape error can point back to the line
+    # in the input file the user just gave us. The header is on line 1.
+    for line_number, line in enumerate(fp, start=2):
+        # csv.reader yields an empty list for a fully-blank line (a stray
+        # trailing newline, the kind GitHub and most editors insert by
+        # default). Silently skipping those matches the "POSIX text file"
+        # convention and the behaviour of most other CSV tools; raising
+        # KeyError('a') at the very end of a diff made the tool look
+        # broken on perfectly normal input. See issue #29.
+        if not line:
+            continue
+        if len(line) < len(headings):
+            raise ValueError(
+                f"CSV row on line {line_number} has {len(line)} field(s) "
+                f"but the header on line 1 has {len(headings)}; "
+                f"got {line!r}"
+            )
+        rows[line_number] = dict(zip(headings, line))
     if key:
-        keyfn = lambda r: r[key]
+        try:
+            return {rows[ln][key]: rows[ln] for ln in rows}
+        except KeyError as exc:
+            missing = exc.args[0]
+            raise ValueError(
+                f"Key column {missing!r} not present in CSV header "
+                f"{headings!r}"
+            ) from None
     else:
-        keyfn = lambda r: hashlib.sha1(
-            json.dumps(r, sort_keys=True).encode("utf8")
-        ).hexdigest()
-    return {keyfn(r): r for r in rows}
+        return {
+            hashlib.sha1(
+                json.dumps(rows[ln], sort_keys=True).encode("utf8")
+            ).hexdigest(): rows[ln]
+            for ln in rows
+        }
 
 
 def load_json(fp, key=None):
diff --git a/tests/test_csv_diff.py b/tests/test_csv_diff.py
index 0e3670f..0564ea0 100644
--- a/tests/test_csv_diff.py
+++ b/tests/test_csv_diff.py
@@ -1,5 +1,6 @@
 from csv_diff import load_csv, compare
 import io
+import pytest
 
 ONE = """id,name,age
 1,Cleo,4
@@ -115,3 +116,38 @@ def test_tsv():
         "columns_added": [],
         "columns_removed": [],
     } == diff
+
+
+def test_trailing_blank_line_ignored():
+    # Issue #29: a trailing newline (as GitHub and most editors emit) should
+    # not crash the tool with a KeyError on the key column.
+    csv_text = "a,b,c\n1,2,3\n\n"
+    assert load_csv(io.StringIO(csv_text), key="a") == {
+        "1": {"a": "1", "b": "2", "c": "3"}
+    }
+
+
+def test_interior_blank_line_ignored():
+    csv_text = "a,b,c\n1,2,3\n\n4,5,6\n"
+    assert load_csv(io.StringIO(csv_text), key="a") == {
+        "1": {"a": "1", "b": "2", "c": "3"},
+        "4": {"a": "4", "b": "5", "c": "6"},
+    }
+
+
+def test_trailing_blank_line_with_no_key():
+    csv_text = "a,b,c\n1,2,3\n\n"
+    loaded = load_csv(io.StringIO(csv_text))
+    assert list(loaded.values()) == [{"a": "1", "b": "2", "c": "3"}]
+
+
+def test_mismatched_row_length_raises_clear_error():
+    csv_text = "a,b,c\n1,2,3\n4,5\n"
+    with pytest.raises(ValueError, match=r"line 3.*2 field.*3"):
+        load_csv(io.StringIO(csv_text), key="a")
+
+
+def test_missing_key_column_raises_clear_error():
+    csv_text = "a,b,c\n1,2,3\n"
+    with pytest.raises(ValueError, match=r"Key column 'z' not present"):
+        load_csv(io.StringIO(csv_text), key="z")