@@ -927,31 +927,54 @@ def __exit__(self, exc_type, exc_value, traceback):
927927 self .close ()
928928
929929
930+ def _gzip_compress (data , compresslevel = 9 ):
931+ """
932+ Compress data with gzip headers/trailers using zlib at wbits=31.
933+ Single-shot approach (not streaming).
934+ :param data: Bytes to compress
935+ :param compresslevel: 1..9
936+ :return: GZIP-compressed bytes
937+ """
938+ # compressobj usage for gzip: method=zlib.DEFLATED, wbits=31 => GZIP container
939+ compobj = zlib .compressobj (compresslevel , zlib .DEFLATED , 31 )
940+ cdata = compobj .compress (data )
941+ cdata += compobj .flush (zlib .Z_FINISH )
942+ return cdata
943+
944+ def _gzip_decompress (data ):
945+ """
946+ Decompress data with gzip headers/trailers (wbits=31).
947+ Single-shot approach.
948+ :param data: GZIP-compressed bytes
949+ :return: Decompressed bytes
950+ """
951+ # If you need multi-member support, you'd need a streaming loop here.
952+ return zlib .decompress (data , 31 )
953+
930954class GzipFile (object ):
931955 """
932- A file-like wrapper around gzip compression/decompression using
933- gzip.compress() and gzip.decompress() for a single-shot in-memory approach.
934-
935- - In read mode (r): Reads the entire file, checks for GZIP magic bytes, then
936- decompresses into memory.
937- - In write mode (w/a/x): Buffers all data in memory. On close, compresses
938- everything with gzip.compress() (using the specified level) and writes it out.
939- - Tries to mimic gzip.GzipFile usage, but without streaming writes.
956+ A file-like wrapper that uses zlib at wbits=31 to mimic gzip compress/decompress,
957+ for Python versions lacking gzip.compress/gzip.decompress (e.g., Python <3.2 or Py2).
958+
959+ - Read mode: loads entire file, checks for magic bytes, decompresses.
960+ - Write mode: buffers data in memory, writes compressed data on close().
961+ - Includes 'level' to set compression level (1..9).
962+ - Text vs. binary mode: specify 't' in the mode for text, with optional encoding/errors.
940963 """
941964
942- # GZIP magic bytes: b'\x1f\x8b'
965+ # GZIP magic
943966 GZIP_MAGIC = b'\x1f \x8b '
944967
945968 def __init__ (self , file_path = None , fileobj = None , mode = 'rb' ,
946969 level = 9 , encoding = None , errors = None , newline = None ):
947970 """
948- :param file_path: Path to file (if any )
949- :param fileobj: An existing file object (if any )
950- :param mode: File mode, e.g., 'rb', 'wb', 'rt', 'wt', etc.
971+ :param file_path: Path to file on disk (optional )
972+ :param fileobj: Existing file-like object (optional )
973+ :param mode: e.g. 'rb', 'wb', 'rt', 'wt'
951974 :param level: Compression level (1..9)
952- :param encoding: For text mode, the text encoding
953- :param errors: Error handling for encoding/decoding
954- :param newline: Placeholder to mimic built-in open() signature
975+ :param encoding: Used if 't' in mode for text encoding
976+ :param errors: Error handling for text encode/decode
977+ :param newline: Placeholder for signature compatibility; not implemented
955978 """
956979 if file_path is None and fileobj is None :
957980 raise ValueError ("Either file_path or fileobj must be provided" )
@@ -966,21 +989,21 @@ def __init__(self, file_path=None, fileobj=None, mode='rb',
966989 self .errors = errors
967990 self .newline = newline
968991
969- # Decompressed data (if reading)
992+ # For reading, we store decompressed data in memory
970993 self ._decompressed_data = b''
971994 self ._position = 0
972995
973- # Buffer to hold raw ( uncompressed) data (if writing )
996+ # For writing, we store uncompressed data in memory until close( )
974997 self ._write_buffer = b''
975998
976- # Track text vs. binary
999+ # Text mode if 't' is present in mode
9771000 self ._text_mode = 't' in mode
9781001
979- # Force binary mode internally for file I/O
1002+ # Force binary mode for the actual file I/O
9801003 internal_mode = mode .replace ('t' , 'b' )
9811004
9821005 if any (m in mode for m in ('w' , 'a' , 'x' )):
983- # Writing / appending
1006+ # Writing or appending
9841007 if file_path :
9851008 self .file = open (file_path , internal_mode )
9861009 else :
@@ -995,62 +1018,62 @@ def __init__(self, file_path=None, fileobj=None, mode='rb',
9951018 else :
9961019 raise FileNotFoundError ("No such file: '{}'" .format (file_path ))
9971020 else :
998- # fileobj provided
1021+ # fileobj
9991022 self .file = fileobj
10001023 self ._load_file ()
10011024 else :
10021025 raise ValueError ("Mode should be 'rb'/'rt' or 'wb'/'wt'" )
10031026
10041027 def _load_file (self ):
10051028 """
1006- Reads the entire compressed file into memory and decompresses it.
1007- Checks for the GZIP magic bytes first.
1029+ Read entire compressed file. Check magic. Decompress (single-shot).
10081030 """
10091031 self .file .seek (0 )
10101032 compressed_data = self .file .read ()
10111033
1034+ # Verify GZIP magic
10121035 if not compressed_data .startswith (self .GZIP_MAGIC ):
1013- raise ValueError ("Invalid GZIP file header (magic bytes missing)" )
1036+ raise ValueError ("Invalid GZIP header (magic bytes missing)" )
10141037
1015- # Decompress everything
1016- self ._decompressed_data = gzip . decompress (compressed_data )
1038+ # Decompress everything in one shot
1039+ self ._decompressed_data = _gzip_decompress (compressed_data )
10171040
1018- # If in text mode, decode from bytes -> str
1041+ # If text mode, decode to str (Py3) or unicode (Py2)
10191042 if self ._text_mode :
10201043 enc = self .encoding or 'UTF-8'
10211044 err = self .errors or 'strict'
10221045 self ._decompressed_data = self ._decompressed_data .decode (enc , err )
10231046
10241047 def write (self , data ):
10251048 """
1026- Write data to our in-memory buffer. The actual compression
1027- happens on close().
1049+ Write data to our in-memory buffer.
1050+ Actual compression to GZIP happens on close().
10281051 """
10291052 if 'r' in self .mode :
10301053 raise IOError ("File not open for writing" )
10311054
10321055 if self ._text_mode :
1033- # Convert str (Py3) or unicode (Py2) to bytes
1056+ # Encode str/ unicode to bytes
10341057 data = data .encode (self .encoding or 'UTF-8' , self .errors or 'strict' )
10351058
10361059 self ._write_buffer += data
10371060
10381061 def read (self , size = - 1 ):
10391062 """
1040- Read from the decompressed data buffer.
1063+ Read from the decompressed data buffer in memory .
10411064 """
10421065 if 'r' not in self .mode :
10431066 raise IOError ("File not open for reading" )
10441067
10451068 if size < 0 :
10461069 size = len (self ._decompressed_data ) - self ._position
1047- data = self ._decompressed_data [self ._position : self ._position + size ]
1070+ data = self ._decompressed_data [self ._position : self ._position + size ]
10481071 self ._position += size
10491072 return data
10501073
10511074 def seek (self , offset , whence = 0 ):
10521075 """
1053- Adjust the current read position in the decompressed buffer.
1076+ Seek in the decompressed data buffer.
10541077 """
10551078 if 'r' not in self .mode :
10561079 raise IOError ("File not open for reading" )
@@ -1059,7 +1082,7 @@ def seek(self, offset, whence=0):
10591082 new_pos = offset
10601083 elif whence == 1 : # relative
10611084 new_pos = self ._position + offset
1062- elif whence == 2 : # relative to end
1085+ elif whence == 2 : # from the end
10631086 new_pos = len (self ._decompressed_data ) + offset
10641087 else :
10651088 raise ValueError ("Invalid value for whence" )
@@ -1068,49 +1091,48 @@ def seek(self, offset, whence=0):
10681091
10691092 def tell (self ):
10701093 """
1071- Return the current position in the decompressed buffer .
1094+ Return current position in decompressed data .
10721095 """
10731096 return self ._position
10741097
10751098 def flush (self ):
10761099 """
1077- Flush the underlying file. (We don't do partial compression flushes—
1078- data is only compressed on close.)
1100+ Flush the underlying file. No partial compression flush in this design.
10791101 """
10801102 if hasattr (self .file , 'flush' ):
10811103 self .file .flush ()
10821104
10831105 def fileno (self ):
10841106 """
1085- Return the file descriptor if available.
1107+ Return underlying file descriptor if available.
10861108 """
10871109 if hasattr (self .file , 'fileno' ):
10881110 return self .file .fileno ()
10891111 raise OSError ("The underlying file object does not support fileno()" )
10901112
10911113 def isatty (self ):
10921114 """
1093- Return whether the underlying file is a TTY.
1115+ Return True if file is a TTY, else False .
10941116 """
10951117 if hasattr (self .file , 'isatty' ):
10961118 return self .file .isatty ()
10971119 return False
10981120
10991121 def truncate (self , size = None ):
11001122 """
1101- Truncate the underlying file if possible.
1123+ Truncate underlying file if possible.
11021124 """
11031125 if hasattr (self .file , 'truncate' ):
11041126 return self .file .truncate (size )
11051127 raise OSError ("The underlying file object does not support truncate()" )
11061128
11071129 def close (self ):
11081130 """
1109- If in write mode, compress the entire `_write_buffer` with gzip.compress
1110- using `level`, then write it to the file.
1131+ If in write mode, compress buffered data using _gzip_compress(level),
1132+ then write it. Close file if we opened it ourselves .
11111133 """
11121134 if any (m in self .mode for m in ('w' , 'a' , 'x' )):
1113- compressed = gzip . compress (self ._write_buffer , compresslevel = self .level )
1135+ compressed = _gzip_compress (self ._write_buffer , compresslevel = self .level )
11141136 self .file .write (compressed )
11151137
11161138 if self .file_path :
0 commit comments