From 40af3b395cebed928b0eb68fe1fdb70bee1f9288 Mon Sep 17 00:00:00 2001
From: Paul <paul.mirroros@hotmail.com>
Date: Sun, 7 Dec 2025 21:10:10 +0530
Subject: [PATCH 1/3] Fix UnicodeDecodeError when reading packed-refs with
 non-UTF8 characters

Fixes #2064

The packed-refs file can contain ref names that are not valid UTF-8
(e.g., Latin-1 encoded tag names created by older Git versions or
non-UTF8 systems). Previously, opening the file with encoding='UTF-8'
would raise UnicodeDecodeError.

Changes:
- Add errors='surrogateescape' to the open() call in _iter_packed_refs()
- This allows reading files with arbitrary byte sequences while still
  treating valid UTF-8 as text
- Add test that verifies non-UTF8 packed-refs can be read successfully

The 'surrogateescape' error handler is the standard Python approach for
handling potentially non-UTF8 data in filesystem operations, as it
preserves the original bytes in a reversible way.
---
 git/refs/symbolic.py |  2 +-
 test/test_refs.py    | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/git/refs/symbolic.py b/git/refs/symbolic.py
index 99af4f57c..e88bf93df 100644
--- a/git/refs/symbolic.py
+++ b/git/refs/symbolic.py
@@ -123,7 +123,7 @@ def _iter_packed_refs(cls, repo: "Repo") -> Iterator[Tuple[str, str]]:
             The packed refs file will be kept open as long as we iterate.
         """
         try:
-            with open(cls._get_packed_refs_path(repo), "rt", encoding="UTF-8") as fp:
+            with open(cls._get_packed_refs_path(repo), "rt", encoding="UTF-8", errors="surrogateescape") as fp:
                 for line in fp:
                     line = line.strip()
                     if not line:
diff --git a/test/test_refs.py b/test/test_refs.py
index 329515807..3bbac0fed 100644
--- a/test/test_refs.py
+++ b/test/test_refs.py
@@ -629,6 +629,45 @@ def test_tag_message(self, rw_repo):
         )
         assert tag_ref.tag.message == "test2"
 
+    @with_rw_repo("0.1.6")
+    def test_packed_refs_with_non_utf8_encoding(self, rw_repo):
+        """Test that packed-refs files with non-UTF8 encoded ref names can be read.
+        
+        This addresses issue #2064 where GitPython would fail with UnicodeDecodeError
+        when reading packed-refs files containing non-UTF8 characters (e.g., Latin-1
+        encoded tag names).
+        """
+        # Create a tag with ASCII name first
+        TagReference.create(rw_repo, "normal-tag")
+        
+        # Pack refs
+        rw_repo.git.pack_refs(all=True)
+        
+        # Manually insert a non-UTF8 ref into the packed-refs file
+        # Using Latin-1 characters that are invalid UTF-8
+        packed_refs_path = osp.join(rw_repo.common_dir, "packed-refs")
+        
+        with open(packed_refs_path, "rb") as f:
+            content = f.read()
+        
+        # Add a fake ref with Latin-1 encoded name (ñ = 0xF1 in Latin-1, invalid UTF-8)
+        # Using a valid SHA from the repo
+        head_sha = rw_repo.head.commit.hexsha
+        non_utf8_line = f"\n{head_sha} refs/tags/caf\xf1\n".encode("latin-1")
+        
+        with open(packed_refs_path, "wb") as f:
+            f.write(content + non_utf8_line)
+        
+        # This should NOT raise UnicodeDecodeError with the fix
+        # It should successfully read all tags including the non-UTF8 one
+        tags = list(rw_repo.tags)
+        assert len(tags) >= 1
+        
+        # Verify we can iterate packed refs without error
+        from git.refs import SymbolicReference
+        packed_refs = list(SymbolicReference._iter_packed_refs(rw_repo))
+        assert len(packed_refs) >= 2  # At least normal-tag and the non-UTF8 tag
+
     def test_dereference_recursive(self):
         # For now, just test the HEAD.
         assert SymbolicReference.dereference_recursive(self.rorepo, "HEAD")

From 963604b4e238c7b8de62c8d9067fd814467e74f1 Mon Sep 17 00:00:00 2001
From: Paul Desai <paul.mirroros@hotmail.com>
Date: Mon, 8 Dec 2025 13:58:10 +0530
Subject: [PATCH 2/3] Fix ruff lint: remove whitespace from blank lines

---
 test/test_refs.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/test/test_refs.py b/test/test_refs.py
index 3bbac0fed..c5378ee5f 100644
--- a/test/test_refs.py
+++ b/test/test_refs.py
@@ -632,39 +632,40 @@ def test_tag_message(self, rw_repo):
     @with_rw_repo("0.1.6")
     def test_packed_refs_with_non_utf8_encoding(self, rw_repo):
         """Test that packed-refs files with non-UTF8 encoded ref names can be read.
-        
+
         This addresses issue #2064 where GitPython would fail with UnicodeDecodeError
         when reading packed-refs files containing non-UTF8 characters (e.g., Latin-1
         encoded tag names).
         """
         # Create a tag with ASCII name first
         TagReference.create(rw_repo, "normal-tag")
-        
+
         # Pack refs
         rw_repo.git.pack_refs(all=True)
-        
+
         # Manually insert a non-UTF8 ref into the packed-refs file
         # Using Latin-1 characters that are invalid UTF-8
         packed_refs_path = osp.join(rw_repo.common_dir, "packed-refs")
-        
+
         with open(packed_refs_path, "rb") as f:
             content = f.read()
-        
+
         # Add a fake ref with Latin-1 encoded name (ñ = 0xF1 in Latin-1, invalid UTF-8)
         # Using a valid SHA from the repo
         head_sha = rw_repo.head.commit.hexsha
         non_utf8_line = f"\n{head_sha} refs/tags/caf\xf1\n".encode("latin-1")
-        
+
         with open(packed_refs_path, "wb") as f:
             f.write(content + non_utf8_line)
-        
+
         # This should NOT raise UnicodeDecodeError with the fix
         # It should successfully read all tags including the non-UTF8 one
         tags = list(rw_repo.tags)
         assert len(tags) >= 1
-        
+
         # Verify we can iterate packed refs without error
         from git.refs import SymbolicReference
+
         packed_refs = list(SymbolicReference._iter_packed_refs(rw_repo))
         assert len(packed_refs) >= 2  # At least normal-tag and the non-UTF8 tag
 

From 5e5e1c1b1c6a8869defec4e828945761ff0fc7a8 Mon Sep 17 00:00:00 2001
From: Paul Desai <paul.mirroros@hotmail.com>
Date: Mon, 8 Dec 2025 14:25:24 +0530
Subject: [PATCH 3/3] Fix codespell: rename test ref to avoid 'caf' typo
 detection

---
 test/test_refs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_refs.py b/test/test_refs.py
index c5378ee5f..ff1923fbc 100644
--- a/test/test_refs.py
+++ b/test/test_refs.py
@@ -653,7 +653,7 @@ def test_packed_refs_with_non_utf8_encoding(self, rw_repo):
         # Add a fake ref with Latin-1 encoded name (ñ = 0xF1 in Latin-1, invalid UTF-8)
         # Using a valid SHA from the repo
         head_sha = rw_repo.head.commit.hexsha
-        non_utf8_line = f"\n{head_sha} refs/tags/caf\xf1\n".encode("latin-1")
+        non_utf8_line = f"\n{head_sha} refs/tags/test-\xf1ame\n".encode("latin-1")
 
         with open(packed_refs_path, "wb") as f:
             f.write(content + non_utf8_line)