Skip to content

Commit 60f68d9

Browse files
author
Joshua Thomas Przyborowski
authored
Add files via upload
1 parent 5340268 commit 60f68d9

1 file changed

Lines changed: 136 additions & 46 deletions

File tree

pycatfile.py

Lines changed: 136 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ def decode_unicode_escape(value):
323323
__program_name__ = "Py" + __file_format_name__
324324
__file_format_lower__ = __file_format_name__.lower()
325325
__file_format_magic__ = "ねこファイル"
326-
# __file_format_magic__ = "네코파일"
326+
#__file_format_magic__ = "네코파일"
327327
__file_format_len__ = len(__file_format_magic__.encode('utf-8'))
328328
__file_format_hex__ = binascii.hexlify(
329329
__file_format_magic__.encode("UTF-8")).decode("UTF-8")
@@ -927,111 +927,192 @@ def __exit__(self, exc_type, exc_value, traceback):
927927
self.close()
928928

929929

930-
class GzipFile:
931-
def __init__(self, file_path=None, fileobj=None, mode='rb', compresslevel=9, encoding=None, errors=None, newline=None):
930+
class GzipFile(object):
931+
"""
932+
A file-like wrapper around gzip compression/decompression using
933+
gzip.compress() and gzip.decompress() for a single-shot in-memory approach.
934+
935+
- In read mode (r): Reads the entire file, checks for GZIP magic bytes, then
936+
decompresses into memory.
937+
- In write mode (w/a/x): Buffers all data in memory. On close, compresses
938+
everything with gzip.compress() (using the specified level) and writes it out.
939+
- Tries to mimic gzip.GzipFile usage, but without streaming writes.
940+
"""
941+
942+
# GZIP magic bytes: b'\x1f\x8b'
943+
GZIP_MAGIC = b'\x1f\x8b'
944+
945+
def __init__(self, file_path=None, fileobj=None, mode='rb',
946+
level=9, encoding=None, errors=None, newline=None):
947+
"""
948+
:param file_path: Path to file (if any)
949+
:param fileobj: An existing file object (if any)
950+
:param mode: File mode, e.g., 'rb', 'wb', 'rt', 'wt', etc.
951+
:param level: Compression level (1..9)
952+
:param encoding: For text mode, the text encoding
953+
:param errors: Error handling for encoding/decoding
954+
:param newline: Placeholder to mimic built-in open() signature
955+
"""
932956
if file_path is None and fileobj is None:
933957
raise ValueError("Either file_path or fileobj must be provided")
934958
if file_path is not None and fileobj is not None:
935-
raise ValueError(
936-
"Only one of file_path or fileobj should be provided")
959+
raise ValueError("Only one of file_path or fileobj should be provided")
937960

938961
self.file_path = file_path
939962
self.fileobj = fileobj
940963
self.mode = mode
941-
self.compresslevel = compresslevel
964+
self.level = level
942965
self.encoding = encoding
943966
self.errors = errors
944967
self.newline = newline
945-
self._compressed_data = b''
968+
969+
# Decompressed data (if reading)
946970
self._decompressed_data = b''
947971
self._position = 0
972+
973+
# Buffer to hold raw (uncompressed) data (if writing)
974+
self._write_buffer = b''
975+
976+
# Track text vs. binary
948977
self._text_mode = 't' in mode
949978

950-
# Force binary mode for internal handling
979+
# Force binary mode internally for file I/O
951980
internal_mode = mode.replace('t', 'b')
952981

953-
if 'w' in mode or 'a' in mode or 'x' in mode:
954-
self.file = gzip.open(file_path, internal_mode, compresslevel=compresslevel) if file_path else gzip.GzipFile(
955-
fileobj=fileobj, mode=internal_mode, compresslevel=compresslevel)
956-
self._compressor = gzip.GzipFile(
957-
fileobj=self.file, mode=internal_mode, compresslevel=compresslevel)
982+
if any(m in mode for m in ('w', 'a', 'x')):
983+
# Writing / appending
984+
if file_path:
985+
self.file = open(file_path, internal_mode)
986+
else:
987+
self.file = fileobj
988+
958989
elif 'r' in mode:
990+
# Reading
959991
if file_path:
960992
if os.path.exists(file_path):
961-
self.file = gzip.open(file_path, internal_mode)
993+
self.file = open(file_path, internal_mode)
962994
self._load_file()
963995
else:
964-
raise FileNotFoundError(
965-
"No such file: '{}'".format(file_path))
966-
elif fileobj:
967-
self.file = gzip.GzipFile(fileobj=fileobj, mode=internal_mode)
996+
raise FileNotFoundError("No such file: '{}'".format(file_path))
997+
else:
998+
# fileobj provided
999+
self.file = fileobj
9681000
self._load_file()
9691001
else:
970-
raise ValueError("Mode should be 'rb' or 'wb'")
1002+
raise ValueError("Mode should be 'rb'/'rt' or 'wb'/'wt'")
9711003

9721004
def _load_file(self):
1005+
"""
1006+
Reads the entire compressed file into memory and decompresses it.
1007+
Checks for the GZIP magic bytes first.
1008+
"""
9731009
self.file.seek(0)
974-
self._compressed_data = self.file.read()
975-
if not self._compressed_data.startswith(b'\x1f\x8b'):
976-
raise ValueError("Invalid gzip file header")
977-
self._decompressed_data = gzip.decompress(self._compressed_data)
1010+
compressed_data = self.file.read()
1011+
1012+
if not compressed_data.startswith(self.GZIP_MAGIC):
1013+
raise ValueError("Invalid GZIP file header (magic bytes missing)")
1014+
1015+
# Decompress everything
1016+
self._decompressed_data = gzip.decompress(compressed_data)
1017+
1018+
# If in text mode, decode from bytes -> str
9781019
if self._text_mode:
979-
self._decompressed_data = self._decompressed_data.decode(
980-
self.encoding or 'UTF-8', self.errors or 'strict')
1020+
enc = self.encoding or 'UTF-8'
1021+
err = self.errors or 'strict'
1022+
self._decompressed_data = self._decompressed_data.decode(enc, err)
9811023

9821024
def write(self, data):
1025+
"""
1026+
Write data to our in-memory buffer. The actual compression
1027+
happens on close().
1028+
"""
1029+
if 'r' in self.mode:
1030+
raise IOError("File not open for writing")
1031+
9831032
if self._text_mode:
984-
data = data.encode(self.encoding or 'UTF-8',
985-
self.errors or 'strict')
986-
compressed_data = self._compressor.compress(data)
987-
self.file.write(compressed_data)
988-
self.file.flush()
1033+
# Convert str (Py3) or unicode (Py2) to bytes
1034+
data = data.encode(self.encoding or 'UTF-8', self.errors or 'strict')
1035+
1036+
self._write_buffer += data
9891037

9901038
def read(self, size=-1):
991-
if size == -1:
1039+
"""
1040+
Read from the decompressed data buffer.
1041+
"""
1042+
if 'r' not in self.mode:
1043+
raise IOError("File not open for reading")
1044+
1045+
if size < 0:
9921046
size = len(self._decompressed_data) - self._position
9931047
data = self._decompressed_data[self._position:self._position + size]
9941048
self._position += size
9951049
return data
9961050

9971051
def seek(self, offset, whence=0):
998-
if whence == 0: # absolute file positioning
999-
self._position = offset
1000-
elif whence == 1: # seek relative to the current position
1001-
self._position += offset
1002-
elif whence == 2: # seek relative to the file's end
1003-
self._position = len(self._decompressed_data) + offset
1052+
"""
1053+
Adjust the current read position in the decompressed buffer.
1054+
"""
1055+
if 'r' not in self.mode:
1056+
raise IOError("File not open for reading")
1057+
1058+
if whence == 0: # absolute
1059+
new_pos = offset
1060+
elif whence == 1: # relative
1061+
new_pos = self._position + offset
1062+
elif whence == 2: # relative to end
1063+
new_pos = len(self._decompressed_data) + offset
10041064
else:
10051065
raise ValueError("Invalid value for whence")
10061066

1007-
# Ensure the position is within bounds
1008-
self._position = max(
1009-
0, min(self._position, len(self._decompressed_data)))
1067+
self._position = max(0, min(new_pos, len(self._decompressed_data)))
10101068

10111069
def tell(self):
1070+
"""
1071+
Return the current position in the decompressed buffer.
1072+
"""
10121073
return self._position
10131074

10141075
def flush(self):
1015-
self.file.flush()
1076+
"""
1077+
Flush the underlying file. (We don't do partial compression flushes—
1078+
data is only compressed on close.)
1079+
"""
1080+
if hasattr(self.file, 'flush'):
1081+
self.file.flush()
10161082

10171083
def fileno(self):
1084+
"""
1085+
Return the file descriptor if available.
1086+
"""
10181087
if hasattr(self.file, 'fileno'):
10191088
return self.file.fileno()
10201089
raise OSError("The underlying file object does not support fileno()")
10211090

10221091
def isatty(self):
1092+
"""
1093+
Return whether the underlying file is a TTY.
1094+
"""
10231095
if hasattr(self.file, 'isatty'):
10241096
return self.file.isatty()
10251097
return False
10261098

10271099
def truncate(self, size=None):
1100+
"""
1101+
Truncate the underlying file if possible.
1102+
"""
10281103
if hasattr(self.file, 'truncate'):
10291104
return self.file.truncate(size)
10301105
raise OSError("The underlying file object does not support truncate()")
10311106

10321107
def close(self):
1033-
if 'w' in self.mode or 'a' in self.mode or 'x' in self.mode:
1034-
self.file.write(self._compressor.flush())
1108+
"""
1109+
If in write mode, compress the entire `_write_buffer` with gzip.compress
1110+
using `level`, then write it to the file.
1111+
"""
1112+
if any(m in self.mode for m in ('w', 'a', 'x')):
1113+
compressed = gzip.compress(self._write_buffer, compresslevel=self.level)
1114+
self.file.write(compressed)
1115+
10351116
if self.file_path:
10361117
self.file.close()
10371118

@@ -3971,7 +4052,10 @@ def UncompressFile(infile, formatspecs=__file_format_dict__, mode="rb"):
39714052
mode = "w"
39724053
try:
39734054
if(compresscheck == "gzip" and compresscheck in compressionsupport):
3974-
filefp = gzip.open(infile, mode)
4055+
if sys.version_info[0] == 2:
4056+
filefp = GzipFile(infile, mode=mode)
4057+
else:
4058+
filefp = gzip.open(infile, mode)
39754059
elif(compresscheck == "bzip2" and compresscheck in compressionsupport):
39764060
filefp = bz2.open(infile, mode)
39774061
elif(compresscheck == "zstd" and compresscheck in compressionsupport):
@@ -4152,7 +4236,10 @@ def CheckCompressionSubType(infile, formatspecs=__file_format_dict__, closefp=Tr
41524236
else:
41534237
try:
41544238
if(compresscheck == "gzip" and compresscheck in compressionsupport):
4155-
catfp = gzip.GzipFile(infile, "rb")
4239+
if sys.version_info[0] == 2:
4240+
catfp = GzipFile(infile, mode="rb")
4241+
else:
4242+
catfp = gzip.GzipFile(infile, "rb")
41564243
elif(compresscheck == "bzip2" and compresscheck in compressionsupport):
41574244
catfp = bz2.BZ2File(infile, "rb")
41584245
elif(compresscheck == "lz4" and compresscheck in compressionsupport):
@@ -4292,7 +4379,10 @@ def CompressOpenFile(outfile, compressionenable=True, compressionlevel=None):
42924379
if(fextname not in outextlistwd or not compressionenable):
42934380
outfp = open(outfile, "wb")
42944381
elif(fextname == ".gz" and "gzip" in compressionsupport):
4295-
outfp = gzip.open(outfile, mode, compressionlevel)
4382+
if sys.version_info[0] == 2:
4383+
outfp = GzipFile(outfile, mode=mode, level=compressionlevel)
4384+
else:
4385+
outfp = gzip.open(outfile, mode, compressionlevel)
42964386
elif(fextname == ".bz2" and "bzip2" in compressionsupport):
42974387
outfp = bz2.open(outfile, mode, compressionlevel)
42984388
elif(fextname == ".zst" and "zstandard" in compressionsupport):

0 commit comments

Comments
 (0)