Skip to content

Commit ed9c007

Browse files
author
Joshua Thomas Przyborowski
authored
Add files via upload
1 parent 60f68d9 commit ed9c007

1 file changed

Lines changed: 65 additions & 43 deletions

File tree

pycatfile.py

Lines changed: 65 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -927,31 +927,54 @@ def __exit__(self, exc_type, exc_value, traceback):
927927
self.close()
928928

929929

930+
def _gzip_compress(data, compresslevel=9):
931+
"""
932+
Compress data with gzip headers/trailers using zlib at wbits=31.
933+
Single-shot approach (not streaming).
934+
:param data: Bytes to compress
935+
:param compresslevel: 1..9
936+
:return: GZIP-compressed bytes
937+
"""
938+
# compressobj usage for gzip: method=zlib.DEFLATED, wbits=31 => GZIP container
939+
compobj = zlib.compressobj(compresslevel, zlib.DEFLATED, 31)
940+
cdata = compobj.compress(data)
941+
cdata += compobj.flush(zlib.Z_FINISH)
942+
return cdata
943+
944+
def _gzip_decompress(data):
945+
"""
946+
Decompress data with gzip headers/trailers (wbits=31).
947+
Single-shot approach.
948+
:param data: GZIP-compressed bytes
949+
:return: Decompressed bytes
950+
"""
951+
# If you need multi-member support, you'd need a streaming loop here.
952+
return zlib.decompress(data, 31)
953+
930954
class GzipFile(object):
931955
"""
932-
A file-like wrapper around gzip compression/decompression using
933-
gzip.compress() and gzip.decompress() for a single-shot in-memory approach.
934-
935-
- In read mode (r): Reads the entire file, checks for GZIP magic bytes, then
936-
decompresses into memory.
937-
- In write mode (w/a/x): Buffers all data in memory. On close, compresses
938-
everything with gzip.compress() (using the specified level) and writes it out.
939-
- Tries to mimic gzip.GzipFile usage, but without streaming writes.
956+
A file-like wrapper that uses zlib at wbits=31 to mimic gzip compress/decompress,
957+
for Python versions lacking gzip.compress/gzip.decompress (e.g., Python <3.2 or Py2).
958+
959+
- Read mode: loads entire file, checks for magic bytes, decompresses.
960+
- Write mode: buffers data in memory, writes compressed data on close().
961+
- Includes 'level' to set compression level (1..9).
962+
- Text vs. binary mode: specify 't' in the mode for text, with optional encoding/errors.
940963
"""
941964

942-
# GZIP magic bytes: b'\x1f\x8b'
965+
# GZIP magic
943966
GZIP_MAGIC = b'\x1f\x8b'
944967

945968
def __init__(self, file_path=None, fileobj=None, mode='rb',
946969
level=9, encoding=None, errors=None, newline=None):
947970
"""
948-
:param file_path: Path to file (if any)
949-
:param fileobj: An existing file object (if any)
950-
:param mode: File mode, e.g., 'rb', 'wb', 'rt', 'wt', etc.
971+
:param file_path: Path to file on disk (optional)
972+
:param fileobj: Existing file-like object (optional)
973+
:param mode: e.g. 'rb', 'wb', 'rt', 'wt'
951974
:param level: Compression level (1..9)
952-
:param encoding: For text mode, the text encoding
953-
:param errors: Error handling for encoding/decoding
954-
:param newline: Placeholder to mimic built-in open() signature
975+
:param encoding: Used if 't' in mode for text encoding
976+
:param errors: Error handling for text encode/decode
977+
:param newline: Placeholder for signature compatibility; not implemented
955978
"""
956979
if file_path is None and fileobj is None:
957980
raise ValueError("Either file_path or fileobj must be provided")
@@ -966,21 +989,21 @@ def __init__(self, file_path=None, fileobj=None, mode='rb',
966989
self.errors = errors
967990
self.newline = newline
968991

969-
# Decompressed data (if reading)
992+
# For reading, we store decompressed data in memory
970993
self._decompressed_data = b''
971994
self._position = 0
972995

973-
# Buffer to hold raw (uncompressed) data (if writing)
996+
# For writing, we store uncompressed data in memory until close()
974997
self._write_buffer = b''
975998

976-
# Track text vs. binary
999+
# Text mode if 't' is present in mode
9771000
self._text_mode = 't' in mode
9781001

979-
# Force binary mode internally for file I/O
1002+
# Force binary mode for the actual file I/O
9801003
internal_mode = mode.replace('t', 'b')
9811004

9821005
if any(m in mode for m in ('w', 'a', 'x')):
983-
# Writing / appending
1006+
# Writing or appending
9841007
if file_path:
9851008
self.file = open(file_path, internal_mode)
9861009
else:
@@ -995,62 +1018,62 @@ def __init__(self, file_path=None, fileobj=None, mode='rb',
9951018
else:
9961019
raise FileNotFoundError("No such file: '{}'".format(file_path))
9971020
else:
998-
# fileobj provided
1021+
# fileobj
9991022
self.file = fileobj
10001023
self._load_file()
10011024
else:
10021025
raise ValueError("Mode should be 'rb'/'rt' or 'wb'/'wt'")
10031026

10041027
def _load_file(self):
10051028
"""
1006-
Reads the entire compressed file into memory and decompresses it.
1007-
Checks for the GZIP magic bytes first.
1029+
Read entire compressed file. Check magic. Decompress (single-shot).
10081030
"""
10091031
self.file.seek(0)
10101032
compressed_data = self.file.read()
10111033

1034+
# Verify GZIP magic
10121035
if not compressed_data.startswith(self.GZIP_MAGIC):
1013-
raise ValueError("Invalid GZIP file header (magic bytes missing)")
1036+
raise ValueError("Invalid GZIP header (magic bytes missing)")
10141037

1015-
# Decompress everything
1016-
self._decompressed_data = gzip.decompress(compressed_data)
1038+
# Decompress everything in one shot
1039+
self._decompressed_data = _gzip_decompress(compressed_data)
10171040

1018-
# If in text mode, decode from bytes -> str
1041+
# If text mode, decode to str (Py3) or unicode (Py2)
10191042
if self._text_mode:
10201043
enc = self.encoding or 'UTF-8'
10211044
err = self.errors or 'strict'
10221045
self._decompressed_data = self._decompressed_data.decode(enc, err)
10231046

10241047
def write(self, data):
10251048
"""
1026-
Write data to our in-memory buffer. The actual compression
1027-
happens on close().
1049+
Write data to our in-memory buffer.
1050+
Actual compression to GZIP happens on close().
10281051
"""
10291052
if 'r' in self.mode:
10301053
raise IOError("File not open for writing")
10311054

10321055
if self._text_mode:
1033-
# Convert str (Py3) or unicode (Py2) to bytes
1056+
# Encode str/unicode to bytes
10341057
data = data.encode(self.encoding or 'UTF-8', self.errors or 'strict')
10351058

10361059
self._write_buffer += data
10371060

10381061
def read(self, size=-1):
10391062
"""
1040-
Read from the decompressed data buffer.
1063+
Read from the decompressed data buffer in memory.
10411064
"""
10421065
if 'r' not in self.mode:
10431066
raise IOError("File not open for reading")
10441067

10451068
if size < 0:
10461069
size = len(self._decompressed_data) - self._position
1047-
data = self._decompressed_data[self._position:self._position + size]
1070+
data = self._decompressed_data[self._position : self._position + size]
10481071
self._position += size
10491072
return data
10501073

10511074
def seek(self, offset, whence=0):
10521075
"""
1053-
Adjust the current read position in the decompressed buffer.
1076+
Seek in the decompressed data buffer.
10541077
"""
10551078
if 'r' not in self.mode:
10561079
raise IOError("File not open for reading")
@@ -1059,7 +1082,7 @@ def seek(self, offset, whence=0):
10591082
new_pos = offset
10601083
elif whence == 1: # relative
10611084
new_pos = self._position + offset
1062-
elif whence == 2: # relative to end
1085+
elif whence == 2: # from the end
10631086
new_pos = len(self._decompressed_data) + offset
10641087
else:
10651088
raise ValueError("Invalid value for whence")
@@ -1068,49 +1091,48 @@ def seek(self, offset, whence=0):
10681091

10691092
def tell(self):
10701093
"""
1071-
Return the current position in the decompressed buffer.
1094+
Return current position in decompressed data.
10721095
"""
10731096
return self._position
10741097

10751098
def flush(self):
10761099
"""
1077-
Flush the underlying file. (We don't do partial compression flushes—
1078-
data is only compressed on close.)
1100+
Flush the underlying file. No partial compression flush in this design.
10791101
"""
10801102
if hasattr(self.file, 'flush'):
10811103
self.file.flush()
10821104

10831105
def fileno(self):
10841106
"""
1085-
Return the file descriptor if available.
1107+
Return underlying file descriptor if available.
10861108
"""
10871109
if hasattr(self.file, 'fileno'):
10881110
return self.file.fileno()
10891111
raise OSError("The underlying file object does not support fileno()")
10901112

10911113
def isatty(self):
10921114
"""
1093-
Return whether the underlying file is a TTY.
1115+
Return True if file is a TTY, else False.
10941116
"""
10951117
if hasattr(self.file, 'isatty'):
10961118
return self.file.isatty()
10971119
return False
10981120

10991121
def truncate(self, size=None):
11001122
"""
1101-
Truncate the underlying file if possible.
1123+
Truncate underlying file if possible.
11021124
"""
11031125
if hasattr(self.file, 'truncate'):
11041126
return self.file.truncate(size)
11051127
raise OSError("The underlying file object does not support truncate()")
11061128

11071129
def close(self):
11081130
"""
1109-
If in write mode, compress the entire `_write_buffer` with gzip.compress
1110-
using `level`, then write it to the file.
1131+
If in write mode, compress buffered data using _gzip_compress(level),
1132+
then write it. Close file if we opened it ourselves.
11111133
"""
11121134
if any(m in self.mode for m in ('w', 'a', 'x')):
1113-
compressed = gzip.compress(self._write_buffer, compresslevel=self.level)
1135+
compressed = _gzip_compress(self._write_buffer, compresslevel=self.level)
11141136
self.file.write(compressed)
11151137

11161138
if self.file_path:

0 commit comments

Comments
 (0)