Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion git/refs/symbolic.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _iter_packed_refs(cls, repo: "Repo") -> Iterator[Tuple[str, str]]:
The packed refs file will be kept open as long as we iterate.
"""
try:
with open(cls._get_packed_refs_path(repo), "rt", encoding="UTF-8") as fp:
with open(cls._get_packed_refs_path(repo), "rt", encoding="UTF-8", errors="surrogateescape") as fp:
for line in fp:
line = line.strip()
if not line:
Expand Down
40 changes: 40 additions & 0 deletions test/test_refs.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,46 @@ def test_tag_message(self, rw_repo):
)
assert tag_ref.tag.message == "test2"

@with_rw_repo("0.1.6")
def test_packed_refs_with_non_utf8_encoding(self, rw_repo):
"""Test that packed-refs files with non-UTF8 encoded ref names can be read.

This addresses issue #2064 where GitPython would fail with UnicodeDecodeError
when reading packed-refs files containing non-UTF8 characters (e.g., Latin-1
encoded tag names).
"""
# Create a tag with ASCII name first
TagReference.create(rw_repo, "normal-tag")

# Pack refs
rw_repo.git.pack_refs(all=True)

# Manually insert a non-UTF8 ref into the packed-refs file
# Using Latin-1 characters that are invalid UTF-8
packed_refs_path = osp.join(rw_repo.common_dir, "packed-refs")

with open(packed_refs_path, "rb") as f:
content = f.read()

# Add a fake ref with Latin-1 encoded name (ñ = 0xF1 in Latin-1, invalid UTF-8)
# Using a valid SHA from the repo
head_sha = rw_repo.head.commit.hexsha
non_utf8_line = f"\n{head_sha} refs/tags/test-\xf1ame\n".encode("latin-1")

with open(packed_refs_path, "wb") as f:
f.write(content + non_utf8_line)

# This should NOT raise UnicodeDecodeError with the fix
# It should successfully read all tags including the non-UTF8 one
tags = list(rw_repo.tags)
assert len(tags) >= 1

# Verify we can iterate packed refs without error
from git.refs import SymbolicReference

packed_refs = list(SymbolicReference._iter_packed_refs(rw_repo))
assert len(packed_refs) >= 2 # At least normal-tag and the non-UTF8 tag

def test_dereference_recursive(self):
# For now, just test the HEAD.
assert SymbolicReference.dereference_recursive(self.rorepo, "HEAD")
Expand Down
Loading