Skip to content

Commit df5b8dd

Browse files
author
Kazuki Suzuki Przyborowski
committed
Update pyarchivefile.py
1 parent e2e5907 commit df5b8dd

1 file changed

Lines changed: 105 additions & 5 deletions

File tree

pyarchivefile.py

Lines changed: 105 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4930,7 +4930,7 @@ def AppendFilesWithContent(infiles, fp, dirlistfromtxt=False, filevalues=[], ext
49304930
curcompression = "none"
49314931
if not followlink and ftype in data_types:
49324932
with open(fname, "rb") as fpc:
4933-
shutil.copyfileobj(fpc, fcontents)
4933+
copy_opaque(fpc, fcontents, bufsize=1 << 20) # 1 MiB chunks, opaque copy
49344934
typechecktest = CheckCompressionType(fcontents, filestart=0, closefp=False)
49354935
fcontents.seek(0, 0)
49364936
fcencoding = GetFileEncoding(fcontents, 0, False)
@@ -4977,7 +4977,7 @@ def AppendFilesWithContent(infiles, fp, dirlistfromtxt=False, filevalues=[], ext
49774977
return False
49784978
flstatinfo = os.stat(flinkname)
49794979
with open(flinkname, "rb") as fpc:
4980-
shutil.copyfileobj(fpc, fcontents)
4980+
copy_opaque(fpc, fcontents, bufsize=1 << 20) # 1 MiB chunks, opaque copy
49814981
typechecktest = CheckCompressionType(fcontents, filestart=0, closefp=False)
49824982
fcontents.seek(0, 0)
49834983
fcencoding = GetFileEncoding(fcontents, 0, False)
@@ -6277,6 +6277,106 @@ def open_adapter(obj_or_path, mode="rb", use_mmap=False, mmap_size=None):
62776277

62786278
# Assumes you already have: compressionsupport, outextlistwd, MkTempFile, etc.
62796279

6280+
def ensure_filelike(infile, mode="rb", use_mmap=False):
6281+
"""
6282+
Accepts either a path or an existing file-like object.
6283+
Always returns a FileLikeAdapter (optionally mmap-backed).
6284+
"""
6285+
if hasattr(infile, "read") or hasattr(infile, "write"):
6286+
# Already a file-like
6287+
fp = infile
6288+
else:
6289+
try:
6290+
fp = open(infile, mode)
6291+
except IOError: # covers FileNotFoundError on Py2
6292+
return False
6293+
6294+
# Wrap in FileLikeAdapter for consistent interface
6295+
return open_adapter(fp, mode=mode, use_mmap=use_mmap)
6296+
6297+
def fast_copy(infp, outfp, bufsize=1 << 20):
6298+
buf = bytearray(bufsize)
6299+
mv = memoryview(buf)
6300+
while True:
6301+
n = getattr(infp, "readinto", None)
6302+
if callable(n):
6303+
n = infp.readinto(mv)
6304+
if not n:
6305+
break
6306+
outfp.write(mv[:n])
6307+
else:
6308+
# Fallback if readinto is missing
6309+
data = infp.read(bufsize)
6310+
if not data:
6311+
break
6312+
outfp.write(data)
6313+
6314+
def copy_file_to_mmap_dest(src_path, outfp, chunk_size=8 << 20):
6315+
with open(src_path, "rb") as fp:
6316+
try:
6317+
mm = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ)
6318+
pos, size = 0, len(mm)
6319+
while pos < size:
6320+
end = min(pos + chunk_size, size)
6321+
outfp.write(mm[pos:end]) # outfp is your mmap-backed FileLikeAdapter
6322+
pos = end
6323+
mm.close()
6324+
except (ValueError, mmap.error, OSError):
6325+
# fall back
6326+
shutil.copyfileobj(fp, outfp, length=chunk_size)
6327+
6328+
def copy_opaque(src, dst, bufsize=1 << 20, grow_step=64 << 20):
6329+
"""
6330+
Copy opaque bytes from 'src' (any readable file-like) to 'dst'
6331+
(your mmap-backed FileLikeAdapter or any writable file-like).
6332+
- Uses readinto when available (zero extra allocations).
6333+
- If dst is mmapped and size is exceeded, auto-grow via truncate().
6334+
Returns total bytes copied.
6335+
"""
6336+
total = 0
6337+
buf = bytearray(bufsize)
6338+
mv = memoryview(buf)
6339+
6340+
# Best-effort: if src supports seek/tell, start from current position
6341+
# and do not disturb caller beyond what we read.
6342+
while True:
6343+
# Prefer readinto to avoid extra allocations
6344+
readinto = getattr(src, "readinto", None)
6345+
if callable(readinto):
6346+
n = src.readinto(mv)
6347+
if not n:
6348+
break
6349+
# write; if mmap too small, grow and retry once
6350+
try:
6351+
dst.write(mv[:n])
6352+
except IOError:
6353+
# likely "write past mapped size"; try to grow
6354+
try:
6355+
new_size = max(dst.tell() + n, dst.tell() + grow_step)
6356+
dst.truncate(new_size)
6357+
dst.write(mv[:n])
6358+
except Exception:
6359+
raise
6360+
total += n
6361+
else:
6362+
chunk = src.read(bufsize)
6363+
if not chunk:
6364+
break
6365+
try:
6366+
dst.write(chunk)
6367+
except IOError:
6368+
try:
6369+
new_size = max(dst.tell() + len(chunk), dst.tell() + grow_step)
6370+
dst.truncate(new_size)
6371+
dst.write(chunk)
6372+
except Exception:
6373+
raise
6374+
total += len(chunk)
6375+
6376+
# Your adapter's flush() already does mm.flush() + fp.flush() + fsync(fd) when possible
6377+
dst.flush()
6378+
return total
6379+
62806380
def CompressOpenFileAlt(fp, compression="auto", compressionlevel=None,
62816381
compressionuselist=compressionlistalt,
62826382
formatspecs=__file_format_dict__):
@@ -6742,7 +6842,7 @@ def PackArchiveFile(infiles, outfile, dirlistfromtxt=False, fmttype="auto", comp
67426842
curcompression = "none"
67436843
if not followlink and ftype in data_types:
67446844
with open(fname, "rb") as fpc:
6745-
shutil.copyfileobj(fpc, fcontents)
6845+
copy_opaque(fpc, fcontents, bufsize=1 << 20) # 1 MiB chunks, opaque copy
67466846
typechecktest = CheckCompressionType(fcontents, filestart=0, closefp=False)
67476847
fcontents.seek(0, 0)
67486848
fcencoding = GetFileEncoding(fcontents, 0, False)
@@ -6789,7 +6889,7 @@ def PackArchiveFile(infiles, outfile, dirlistfromtxt=False, fmttype="auto", comp
67896889
return False
67906890
flstatinfo = os.stat(flinkname)
67916891
with open(flinkname, "rb") as fpc:
6792-
shutil.copyfileobj(fpc, fcontents)
6892+
copy_opaque(fpc, fcontents, bufsize=1 << 20) # 1 MiB chunks, opaque copy
67936893
typechecktest = CheckCompressionType(fcontents, filestart=0, closefp=False)
67946894
fcontents.seek(0, 0)
67956895
fcencoding = GetFileEncoding(fcontents, 0, False)
@@ -7083,7 +7183,7 @@ def PackArchiveFileFromTarFile(infile, outfile, fmttype="auto", compression="aut
70837183
curcompression = "none"
70847184
if ftype in data_types:
70857185
fpc = tarfp.extractfile(member)
7086-
shutil.copyfileobj(fpc, fcontents)
7186+
copy_opaque(fpc, fcontents, bufsize=1 << 20) # 1 MiB chunks, opaque copy
70877187
fpc.close()
70887188
typechecktest = CheckCompressionType(fcontents, filestart=0, closefp=False)
70897189
fcontents.seek(0, 0)

0 commit comments

Comments
 (0)