@@ -323,7 +323,7 @@ def decode_unicode_escape(value):
323323 __program_name__ = "Py" + __file_format_name__
324324 __file_format_lower__ = __file_format_name__ .lower ()
325325 __file_format_magic__ = "ねこファイル"
326- # __file_format_magic__ = "네코파일"
326+ #__file_format_magic__ = "네코파일"
327327 __file_format_len__ = len (__file_format_magic__ .encode ('utf-8' ))
328328 __file_format_hex__ = binascii .hexlify (
329329 __file_format_magic__ .encode ("UTF-8" )).decode ("UTF-8" )
@@ -927,111 +927,192 @@ def __exit__(self, exc_type, exc_value, traceback):
927927 self .close ()
928928
929929
930- class GzipFile :
931- def __init__ (self , file_path = None , fileobj = None , mode = 'rb' , compresslevel = 9 , encoding = None , errors = None , newline = None ):
930+ class GzipFile (object ):
931+ """
932+ A file-like wrapper around gzip compression/decompression using
933+ gzip.compress() and gzip.decompress() for a single-shot in-memory approach.
934+
935+ - In read mode (r): Reads the entire file, checks for GZIP magic bytes, then
936+ decompresses into memory.
937+ - In write mode (w/a/x): Buffers all data in memory. On close, compresses
938+ everything with gzip.compress() (using the specified level) and writes it out.
939+ - Tries to mimic gzip.GzipFile usage, but without streaming writes.
940+ """
941+
942+ # GZIP magic bytes: b'\x1f\x8b'
943+ GZIP_MAGIC = b'\x1f \x8b '
944+
945+ def __init__ (self , file_path = None , fileobj = None , mode = 'rb' ,
946+ level = 9 , encoding = None , errors = None , newline = None ):
947+ """
948+ :param file_path: Path to file (if any)
949+ :param fileobj: An existing file object (if any)
950+ :param mode: File mode, e.g., 'rb', 'wb', 'rt', 'wt', etc.
951+ :param level: Compression level (1..9)
952+ :param encoding: For text mode, the text encoding
953+ :param errors: Error handling for encoding/decoding
954+ :param newline: Placeholder to mimic built-in open() signature
955+ """
932956 if file_path is None and fileobj is None :
933957 raise ValueError ("Either file_path or fileobj must be provided" )
934958 if file_path is not None and fileobj is not None :
935- raise ValueError (
936- "Only one of file_path or fileobj should be provided" )
959+ raise ValueError ("Only one of file_path or fileobj should be provided" )
937960
938961 self .file_path = file_path
939962 self .fileobj = fileobj
940963 self .mode = mode
941- self .compresslevel = compresslevel
964+ self .level = level
942965 self .encoding = encoding
943966 self .errors = errors
944967 self .newline = newline
945- self ._compressed_data = b''
968+
969+ # Decompressed data (if reading)
946970 self ._decompressed_data = b''
947971 self ._position = 0
972+
973+ # Buffer to hold raw (uncompressed) data (if writing)
974+ self ._write_buffer = b''
975+
976+ # Track text vs. binary
948977 self ._text_mode = 't' in mode
949978
950- # Force binary mode for internal handling
979+ # Force binary mode internally for file I/O
951980 internal_mode = mode .replace ('t' , 'b' )
952981
953- if 'w' in mode or 'a' in mode or 'x' in mode :
954- self .file = gzip .open (file_path , internal_mode , compresslevel = compresslevel ) if file_path else gzip .GzipFile (
955- fileobj = fileobj , mode = internal_mode , compresslevel = compresslevel )
956- self ._compressor = gzip .GzipFile (
957- fileobj = self .file , mode = internal_mode , compresslevel = compresslevel )
982+ if any (m in mode for m in ('w' , 'a' , 'x' )):
983+ # Writing / appending
984+ if file_path :
985+ self .file = open (file_path , internal_mode )
986+ else :
987+ self .file = fileobj
988+
958989 elif 'r' in mode :
990+ # Reading
959991 if file_path :
960992 if os .path .exists (file_path ):
961- self .file = gzip . open (file_path , internal_mode )
993+ self .file = open (file_path , internal_mode )
962994 self ._load_file ()
963995 else :
964- raise FileNotFoundError (
965- "No such file: '{}'" . format ( file_path ))
966- elif fileobj :
967- self .file = gzip . GzipFile ( fileobj = fileobj , mode = internal_mode )
996+ raise FileNotFoundError ("No such file: '{}'" . format ( file_path ))
997+ else :
998+ # fileobj provided
999+ self .file = fileobj
9681000 self ._load_file ()
9691001 else :
970- raise ValueError ("Mode should be 'rb' or 'wb'" )
1002+ raise ValueError ("Mode should be 'rb'/'rt' or 'wb'/'wt '" )
9711003
9721004 def _load_file (self ):
1005+ """
1006+ Reads the entire compressed file into memory and decompresses it.
1007+ Checks for the GZIP magic bytes first.
1008+ """
9731009 self .file .seek (0 )
974- self ._compressed_data = self .file .read ()
975- if not self ._compressed_data .startswith (b'\x1f \x8b ' ):
976- raise ValueError ("Invalid gzip file header" )
977- self ._decompressed_data = gzip .decompress (self ._compressed_data )
1010+ compressed_data = self .file .read ()
1011+
1012+ if not compressed_data .startswith (self .GZIP_MAGIC ):
1013+ raise ValueError ("Invalid GZIP file header (magic bytes missing)" )
1014+
1015+ # Decompress everything
1016+ self ._decompressed_data = gzip .decompress (compressed_data )
1017+
1018+ # If in text mode, decode from bytes -> str
9781019 if self ._text_mode :
979- self ._decompressed_data = self ._decompressed_data .decode (
980- self .encoding or 'UTF-8' , self .errors or 'strict' )
1020+ enc = self .encoding or 'UTF-8'
1021+ err = self .errors or 'strict'
1022+ self ._decompressed_data = self ._decompressed_data .decode (enc , err )
9811023
9821024 def write (self , data ):
1025+ """
1026+ Write data to our in-memory buffer. The actual compression
1027+ happens on close().
1028+ """
1029+ if 'r' in self .mode :
1030+ raise IOError ("File not open for writing" )
1031+
9831032 if self ._text_mode :
984- data = data .encode (self .encoding or 'UTF-8' ,
985- self .errors or 'strict' )
986- compressed_data = self ._compressor .compress (data )
987- self .file .write (compressed_data )
988- self .file .flush ()
1033+ # Convert str (Py3) or unicode (Py2) to bytes
1034+ data = data .encode (self .encoding or 'UTF-8' , self .errors or 'strict' )
1035+
1036+ self ._write_buffer += data
9891037
9901038 def read (self , size = - 1 ):
991- if size == - 1 :
1039+ """
1040+ Read from the decompressed data buffer.
1041+ """
1042+ if 'r' not in self .mode :
1043+ raise IOError ("File not open for reading" )
1044+
1045+ if size < 0 :
9921046 size = len (self ._decompressed_data ) - self ._position
9931047 data = self ._decompressed_data [self ._position :self ._position + size ]
9941048 self ._position += size
9951049 return data
9961050
9971051 def seek (self , offset , whence = 0 ):
998- if whence == 0 : # absolute file positioning
999- self ._position = offset
1000- elif whence == 1 : # seek relative to the current position
1001- self ._position += offset
1002- elif whence == 2 : # seek relative to the file's end
1003- self ._position = len (self ._decompressed_data ) + offset
1052+ """
1053+ Adjust the current read position in the decompressed buffer.
1054+ """
1055+ if 'r' not in self .mode :
1056+ raise IOError ("File not open for reading" )
1057+
1058+ if whence == 0 : # absolute
1059+ new_pos = offset
1060+ elif whence == 1 : # relative
1061+ new_pos = self ._position + offset
1062+ elif whence == 2 : # relative to end
1063+ new_pos = len (self ._decompressed_data ) + offset
10041064 else :
10051065 raise ValueError ("Invalid value for whence" )
10061066
1007- # Ensure the position is within bounds
1008- self ._position = max (
1009- 0 , min (self ._position , len (self ._decompressed_data )))
1067+ self ._position = max (0 , min (new_pos , len (self ._decompressed_data )))
10101068
10111069 def tell (self ):
1070+ """
1071+ Return the current position in the decompressed buffer.
1072+ """
10121073 return self ._position
10131074
10141075 def flush (self ):
1015- self .file .flush ()
1076+ """
1077+ Flush the underlying file. (We don't do partial compression flushes—
1078+ data is only compressed on close.)
1079+ """
1080+ if hasattr (self .file , 'flush' ):
1081+ self .file .flush ()
10161082
10171083 def fileno (self ):
1084+ """
1085+ Return the file descriptor if available.
1086+ """
10181087 if hasattr (self .file , 'fileno' ):
10191088 return self .file .fileno ()
10201089 raise OSError ("The underlying file object does not support fileno()" )
10211090
10221091 def isatty (self ):
1092+ """
1093+ Return whether the underlying file is a TTY.
1094+ """
10231095 if hasattr (self .file , 'isatty' ):
10241096 return self .file .isatty ()
10251097 return False
10261098
10271099 def truncate (self , size = None ):
1100+ """
1101+ Truncate the underlying file if possible.
1102+ """
10281103 if hasattr (self .file , 'truncate' ):
10291104 return self .file .truncate (size )
10301105 raise OSError ("The underlying file object does not support truncate()" )
10311106
10321107 def close (self ):
1033- if 'w' in self .mode or 'a' in self .mode or 'x' in self .mode :
1034- self .file .write (self ._compressor .flush ())
1108+ """
1109+ If in write mode, compress the entire `_write_buffer` with gzip.compress
1110+ using `level`, then write it to the file.
1111+ """
1112+ if any (m in self .mode for m in ('w' , 'a' , 'x' )):
1113+ compressed = gzip .compress (self ._write_buffer , compresslevel = self .level )
1114+ self .file .write (compressed )
1115+
10351116 if self .file_path :
10361117 self .file .close ()
10371118
@@ -3971,7 +4052,10 @@ def UncompressFile(infile, formatspecs=__file_format_dict__, mode="rb"):
39714052 mode = "w"
39724053 try :
39734054 if (compresscheck == "gzip" and compresscheck in compressionsupport ):
3974- filefp = gzip .open (infile , mode )
4055+ if sys .version_info [0 ] == 2 :
4056+ filefp = GzipFile (infile , mode = mode )
4057+ else :
4058+ filefp = gzip .open (infile , mode )
39754059 elif (compresscheck == "bzip2" and compresscheck in compressionsupport ):
39764060 filefp = bz2 .open (infile , mode )
39774061 elif (compresscheck == "zstd" and compresscheck in compressionsupport ):
@@ -4152,7 +4236,10 @@ def CheckCompressionSubType(infile, formatspecs=__file_format_dict__, closefp=Tr
41524236 else :
41534237 try :
41544238 if (compresscheck == "gzip" and compresscheck in compressionsupport ):
4155- catfp = gzip .GzipFile (infile , "rb" )
4239+ if sys .version_info [0 ] == 2 :
4240+ catfp = GzipFile (infile , mode = "rb" )
4241+ else :
4242+ catfp = gzip .GzipFile (infile , "rb" )
41564243 elif (compresscheck == "bzip2" and compresscheck in compressionsupport ):
41574244 catfp = bz2 .BZ2File (infile , "rb" )
41584245 elif (compresscheck == "lz4" and compresscheck in compressionsupport ):
@@ -4292,7 +4379,10 @@ def CompressOpenFile(outfile, compressionenable=True, compressionlevel=None):
42924379 if (fextname not in outextlistwd or not compressionenable ):
42934380 outfp = open (outfile , "wb" )
42944381 elif (fextname == ".gz" and "gzip" in compressionsupport ):
4295- outfp = gzip .open (outfile , mode , compressionlevel )
4382+ if sys .version_info [0 ] == 2 :
4383+ outfp = GzipFile (outfile , mode = mode , level = compressionlevel )
4384+ else :
4385+ outfp = gzip .open (outfile , mode , compressionlevel )
42964386 elif (fextname == ".bz2" and "bzip2" in compressionsupport ):
42974387 outfp = bz2 .open (outfile , mode , compressionlevel )
42984388 elif (fextname == ".zst" and "zstandard" in compressionsupport ):
0 commit comments