Skip to content

Commit 0750aff

Browse files
authored
Handle repo patch with non-UTF8 sequences (#3918)
Git text diff is not actually text in the usual sense -- it may contain any nonprintable bytes. But since we send patches as files (multipart/form-data), not as JSON strings, we don't need to decode them at all. In addition, the missing --binary flag was added. Without this flag, modified binary files were effectively excluded from the patch with "Binary files a and b differ". Fixes: #3880
1 parent ea0f7fe commit 0750aff

1 file changed

Lines changed: 11 additions & 9 deletions

File tree

  • src/dstack/_internal/core/models/repos

src/dstack/_internal/core/models/repos/remote.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class RemoteRepoInfo(
5858
class RemoteRunRepoData(RemoteRepoInfo):
5959
repo_branch: Optional[str] = None
6060
repo_hash: Optional[str] = None
61-
repo_diff: Annotated[Optional[str], Field(exclude=True)] = None
61+
repo_diff: Annotated[Optional[bytes], Field(exclude=True)] = None
6262
repo_config_name: Optional[str] = None
6363
repo_config_email: Optional[str] = None
6464

@@ -183,13 +183,15 @@ def __init__(
183183
def has_code_to_write(self) -> bool:
184184
# repo_diff is:
185185
# * None for RemoteRepo.from_url()
186-
# * an empty string for RemoteRepo.from_dir() if there are no changes ("clean" state)
187-
# * a non-empty string for RemoteRepo.from_dir() if there are changes ("dirty" state)
186+
# * empty bytes for RemoteRepo.from_dir() if there are no changes ("clean" state)
187+
# and untracked files
188+
# * non-empty bytes for RemoteRepo.from_dir() if there are changes ("dirty" state)
189+
# and/or untracked files
188190
return bool(self.run_repo_data.repo_diff)
189191

190192
def write_code_file(self, fp: BinaryIO) -> str:
191193
if self.run_repo_data.repo_diff is not None:
192-
fp.write(self.run_repo_data.repo_diff.encode())
194+
fp.write(self.run_repo_data.repo_diff)
193195
return get_sha256(fp)
194196

195197
def get_repo_info(self) -> RemoteRepoInfo:
@@ -238,7 +240,7 @@ def __init__(self, warning_time: float, delay: float = 5):
238240
self.delay = delay
239241
self.warned = False
240242
self.start_time = time.monotonic()
241-
self.buffer = io.StringIO()
243+
self.buffer = io.BytesIO()
242244

243245
def timeout(self):
244246
now = time.monotonic()
@@ -256,9 +258,9 @@ def timeout(self):
256258
)
257259

258260
def write(self, v: bytes):
259-
self.buffer.write(v.decode())
261+
self.buffer.write(v)
260262

261-
def get(self) -> str:
263+
def get(self) -> bytes:
262264
if self.warned:
263265
print()
264266
return self.buffer.getvalue()
@@ -366,10 +368,10 @@ def _interactive_git_proc(
366368
continue
367369

368370

369-
def _repo_diff_verbose(repo: git.Repo, repo_hash: str, warning_time: float = 5) -> str:
371+
def _repo_diff_verbose(repo: git.Repo, repo_hash: str, warning_time: float = 5) -> bytes:
370372
collector = _DiffCollector(warning_time)
371373
try:
372-
_interactive_git_proc(repo.git.diff(repo_hash, as_process=True), collector)
374+
_interactive_git_proc(repo.git.diff(repo_hash, binary=True, as_process=True), collector)
373375
for filename in repo.untracked_files:
374376
_interactive_git_proc(
375377
repo.git.diff("/dev/null", filename, no_index=True, binary=True, as_process=True),

0 commit comments

Comments
 (0)