Skip to content

Commit 2877145

Browse files
author
Kazuki Suzuki Przyborowski
committed
Update pycatfile.py
1 parent d328817 commit 2877145

1 file changed

Lines changed: 201 additions & 4 deletions

File tree

pycatfile.py

Lines changed: 201 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def decode_unicode_escape(value):
331331
__version_date__ = str(__version_date_info__[0]) + "." + str(
332332
__version_date_info__[1]).zfill(2) + "." + str(__version_date_info__[2]).zfill(2)
333333
__revision__ = __version_info__[3]
334-
__revision_id__ = "$Id: 20b12dc043f27f33e7faf4f73e4655433875d2ce $"
334+
__revision_id__ = "$Id$"
335335
if(__version_info__[4] is not None):
336336
__version_date_plusrc__ = __version_date__ + \
337337
"-" + str(__version_date_info__[4])
@@ -1211,6 +1211,203 @@ def __exit__(self, exc_type, exc_value, traceback):
12111211
self.close()
12121212

12131213

1214+
class LzopFile(object):
1215+
# LZOP magic bytes: b'\x89LZO\x0D\x0A\x1A\n'
1216+
# If your files use a different LZO wrapper, you can adjust the magic or remove it entirely.
1217+
LZOP_MAGIC = b'\x89LZO\x0D\x0A\x1A\n'
1218+
1219+
def __init__(self, file_path=None, fileobj=None, mode='rb',
1220+
encoding=None, errors=None, newline=None):
1221+
"""
1222+
A file-like wrapper around LZO (via python-lzo).
1223+
- For reading: reads entire file, verifies LZOP magic, then decompresses.
1224+
- For writing: buffers all data in memory until close(), then writes the LZOP magic + compressed data.
1225+
"""
1226+
if file_path is None and fileobj is None:
1227+
raise ValueError("Either file_path or fileobj must be provided")
1228+
if file_path is not None and fileobj is not None:
1229+
raise ValueError("Only one of file_path or fileobj should be provided")
1230+
1231+
self.file_path = file_path
1232+
self.fileobj = fileobj
1233+
self.mode = mode
1234+
self.encoding = encoding
1235+
self.errors = errors
1236+
self.newline = newline
1237+
self._decompressed_data = b''
1238+
self._position = 0
1239+
1240+
# For writing, we'll store uncompressed data in memory until close()
1241+
self._write_buffer = b''
1242+
1243+
# Track whether we're doing text mode
1244+
self._text_mode = 't' in mode
1245+
1246+
# Force binary mode internally for file I/O
1247+
internal_mode = mode.replace('t', 'b')
1248+
1249+
if 'w' in mode or 'a' in mode or 'x' in mode:
1250+
# Open the file if a path was specified; otherwise, use fileobj
1251+
if file_path:
1252+
self.file = open(file_path, internal_mode)
1253+
else:
1254+
self.file = fileobj
1255+
1256+
elif 'r' in mode:
1257+
# Reading
1258+
if file_path:
1259+
if os.path.exists(file_path):
1260+
self.file = open(file_path, internal_mode)
1261+
self._load_file()
1262+
else:
1263+
raise FileNotFoundError("No such file: '{}'".format(file_path))
1264+
else:
1265+
# fileobj provided
1266+
self.file = fileobj
1267+
self._load_file()
1268+
1269+
else:
1270+
raise ValueError("Mode should be 'rb'/'rt' or 'wb'/'wt'")
1271+
1272+
def _load_file(self):
1273+
"""
1274+
Reads the entire compressed file into memory. Expects an LZOP-style header
1275+
(with magic bytes). Decompresses the remainder into _decompressed_data.
1276+
"""
1277+
self.file.seek(0)
1278+
compressed_data = self.file.read()
1279+
1280+
# Check for the LZOP magic
1281+
if not compressed_data.startswith(self.LZOP_MAGIC):
1282+
raise ValueError("Invalid LZOP file header (magic bytes missing)")
1283+
1284+
# Strip the magic from the front; the rest is actual LZO-compressed data
1285+
# In a real lzop file, there may be more fields in the header, but
1286+
# this simplistic approach just strips the magic bytes.
1287+
compressed_data = compressed_data[len(self.LZOP_MAGIC):]
1288+
1289+
# Decompress the remainder
1290+
try:
1291+
self._decompressed_data = lzo.decompress(compressed_data)
1292+
except lzo.error as e:
1293+
raise ValueError("LZO decompression failed: {}".format(str(e)))
1294+
1295+
# If we're in text mode, decode from bytes to str
1296+
if self._text_mode:
1297+
enc = self.encoding or 'UTF-8'
1298+
err = self.errors or 'strict'
1299+
self._decompressed_data = self._decompressed_data.decode(enc, err)
1300+
1301+
def write(self, data):
1302+
"""
1303+
Write data to our internal buffer. The actual compression + file writing
1304+
happens on close().
1305+
"""
1306+
if 'r' in self.mode:
1307+
raise IOError("File not open for writing")
1308+
1309+
if self._text_mode:
1310+
# Encode data from str (Py3) or unicode (Py2) to bytes
1311+
data = data.encode(self.encoding or 'UTF-8', self.errors or 'strict')
1312+
1313+
# Accumulate in memory
1314+
self._write_buffer += data
1315+
1316+
def read(self, size=-1):
1317+
"""
1318+
Read from the decompressed data buffer.
1319+
"""
1320+
if 'r' not in self.mode:
1321+
raise IOError("File not open for reading")
1322+
1323+
if size < 0:
1324+
size = len(self._decompressed_data) - self._position
1325+
data = self._decompressed_data[self._position:self._position + size]
1326+
self._position += size
1327+
return data
1328+
1329+
def seek(self, offset, whence=0):
1330+
"""
1331+
Adjust the current read position in the decompressed buffer.
1332+
"""
1333+
if 'r' not in self.mode:
1334+
raise IOError("File not open for reading")
1335+
1336+
if whence == 0: # absolute
1337+
new_pos = offset
1338+
elif whence == 1: # relative
1339+
new_pos = self._position + offset
1340+
elif whence == 2: # relative to end
1341+
new_pos = len(self._decompressed_data) + offset
1342+
else:
1343+
raise ValueError("Invalid value for whence")
1344+
1345+
self._position = max(0, min(new_pos, len(self._decompressed_data)))
1346+
1347+
def tell(self):
1348+
"""
1349+
Returns the current read position in the decompressed buffer.
1350+
"""
1351+
return self._position
1352+
1353+
def flush(self):
1354+
"""
1355+
Flush the underlying file, if any. (No partial flush for LZO
1356+
because we only compress on close.)
1357+
"""
1358+
if hasattr(self.file, 'flush'):
1359+
self.file.flush()
1360+
1361+
def fileno(self):
1362+
"""
1363+
Return the file descriptor if available.
1364+
"""
1365+
if hasattr(self.file, 'fileno'):
1366+
return self.file.fileno()
1367+
raise OSError("The underlying file object does not support fileno()")
1368+
1369+
def isatty(self):
1370+
"""
1371+
Return whether the underlying file is a TTY.
1372+
"""
1373+
if hasattr(self.file, 'isatty'):
1374+
return self.file.isatty()
1375+
return False
1376+
1377+
def truncate(self, size=None):
1378+
"""
1379+
Truncate the underlying file if possible.
1380+
"""
1381+
if hasattr(self.file, 'truncate'):
1382+
return self.file.truncate(size)
1383+
raise OSError("The underlying file object does not support truncate()")
1384+
1385+
def close(self):
1386+
"""
1387+
If in write mode, compress the entire accumulated buffer using LZO
1388+
and write it (with the LZOP magic header) to the file. Then close
1389+
if we opened it ourselves.
1390+
"""
1391+
if any(x in self.mode for x in ('w', 'a', 'x')):
1392+
# Write the LZOP magic
1393+
self.file.write(self.LZOP_MAGIC)
1394+
# Compress the entire buffer
1395+
try:
1396+
compressed = lzo.compress(self._write_buffer)
1397+
except lzo.error as e:
1398+
raise ValueError("LZO compression failed: {}".format(str(e)))
1399+
self.file.write(compressed)
1400+
1401+
if self.file_path:
1402+
self.file.close()
1403+
1404+
def __enter__(self):
1405+
return self
1406+
1407+
def __exit__(self, exc_type, exc_value, traceback):
1408+
self.close()
1409+
1410+
12141411
'''
12151412
class BloscFile:
12161413
def __init__(self, file_path=None, fileobj=None, mode='rb', level=9, encoding=None, errors=None, newline=None):
@@ -3712,7 +3909,7 @@ def UncompressFile(infile, formatspecs=__file_format_dict__, mode="rb"):
37123909
elif(compresscheck == "lz4" and compresscheck in compressionsupport):
37133910
filefp = lz4.frame.open(infile, mode)
37143911
elif((compresscheck == "lzo" or compresscheck == "lzop") and compresscheck in compressionsupport):
3715-
filefp = lzo.open(infile, mode)
3912+
filefp = LzopFile(infile, mode=mode)
37163913
elif((compresscheck == "lzma" or compresscheck == "xz") and compresscheck in compressionsupport):
37173914
filefp = lzma.open(infile, mode)
37183915
elif(compresscheck == "zlib" and compresscheck in compressionsupport):
@@ -3893,7 +4090,7 @@ def CheckCompressionSubType(infile, formatspecs=__file_format_dict__, closefp=Tr
38934090
else:
38944091
return Flase
38954092
elif((compresscheck == "lzo" or compresscheck == "lzop") and compresscheck in compressionsupport):
3896-
catfp = lzo.open(infile, "rb")
4093+
catfp = LzopFile(infile, mode="rb")
38974094
elif((compresscheck == "lzma" or compresscheck == "xz") and compresscheck in compressionsupport):
38984095
catfp = lzma.open(infile, "rb")
38994096
elif(compresscheck == "zlib" and compresscheck in compressionsupport):
@@ -4043,7 +4240,7 @@ def CompressOpenFile(outfile, compressionenable=True, compressionlevel=None):
40434240
outfp = lz4.frame.open(
40444241
outfile, mode, compression_level=compressionlevel)
40454242
elif(fextname == ".lzo" and "lzop" in compressionsupport):
4046-
outfp = lzo.open(outfile, mode, compresslevel=compressionlevel)
4243+
outfp = LzopFile(outfile, mode=mode, level=compressionlevel)
40474244
elif(fextname == ".lzma" and "lzma" in compressionsupport):
40484245
try:
40494246
outfp = lzma.open(outfile, mode, format=lzma.FORMAT_ALONE, filters=[{"id": lzma.FILTER_LZMA1, "preset": compressionlevel}])

0 commit comments

Comments
 (0)