From 20e21cdbc4b8756feb45d8122abd5ce4a4087f9e Mon Sep 17 00:00:00 2001 From: Karsten Hoffrath Date: Sun, 8 Jan 2017 00:13:54 +0100 Subject: [PATCH] Extract the number of expected records even if default way to read it fails because reading the last 40 bytes results in invalid UTF-8 code points and decoding the bytes fails with an UnicodeDecodeError exception. --- epf/parser.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/epf/parser.py b/epf/parser.py index cd0e334..83308e4 100644 --- a/epf/parser.py +++ b/epf/parser.py @@ -51,11 +51,25 @@ def parse(path): def parse_file(f, name): f.seek(-40, os.SEEK_END) - records_expected = int( - f.read().decode('utf8') - .rpartition('#recordsWritten:')[2] - .rpartition(record_delim)[0] - ) + records_expected = None + try: + records_expected = int( + f.read().decode('utf8') + .rpartition('#recordsWritten:')[2] + .rpartition(record_delim)[0] + ) + except UnicodeDecodeError: + f.seek(-40, os.SEEK_END) + content = f.read() + while not records_expected: + try: + records_expected = int( + content.decode('utf8') + .rpartition('#recordsWritten:')[2] + .rpartition(record_delim)[0] + ) + except UnicodeDecodeError: + content = content[1:] log.debug('Records expected: %s', records_expected) f.seek(0)