diff --git a/ingestors/packages/__init__.py b/ingestors/packages/__init__.py index 6a919de5b..b4ae0fe22 100644 --- a/ingestors/packages/__init__.py +++ b/ingestors/packages/__init__.py @@ -5,9 +5,9 @@ from pathlib import PurePath import py7zr -from py7zr.exceptions import ArchiveError +from py7zr.exceptions import ArchiveError, PasswordRequired -from ingestors.exc import ProcessingException +from ingestors.exc import ENCRYPTED_MSG, ProcessingException from ingestors.ingestor import Ingestor from ingestors.support.package import PackageSupport from ingestors.support.shell import ShellSupport @@ -29,7 +29,10 @@ def unpack(self, file_path, entity, temp_dir): try: with py7zr.SevenZipFile(str(pure_file_path), mode="r") as extractor: - extractor.extractall(path=temp_dir) + try: + extractor.extractall(path=temp_dir) + except PasswordRequired: + raise ProcessingException(ENCRYPTED_MSG) except ArchiveError as e: raise ProcessingException(f"Error: {e}") diff --git a/ingestors/support/package.py b/ingestors/support/package.py index b17326370..d77a769aa 100644 --- a/ingestors/support/package.py +++ b/ingestors/support/package.py @@ -1,11 +1,12 @@ -import shutil import logging +import shutil + from followthemoney import model -from ingestors.support.temp import TempFileSupport -from ingestors.support.encoding import EncodingSupport from ingestors.directory import DirectoryIngestor from ingestors.exc import ProcessingException +from ingestors.support.encoding import EncodingSupport +from ingestors.support.temp import TempFileSupport log = logging.getLogger(__name__) @@ -36,7 +37,7 @@ def ingest(self, file_path, entity): self.manager.delegate(DirectoryIngestor, temp_dir, entity) except ProcessingException as e: raise ProcessingException( - "Could not unpack the contents of this file." + f"Could not unpack the contents of this file. {e}" ) from e def unpack(self, file_path, entity, temp_dir): diff --git a/tests/fixtures/500_pages_password.7z b/tests/fixtures/500_pages_password.7z new file mode 100644 index 000000000..2b6a10e92 Binary files /dev/null and b/tests/fixtures/500_pages_password.7z differ diff --git a/tests/test_packages.py b/tests/test_packages.py index 084b3a7de..617b25656 100644 --- a/tests/test_packages.py +++ b/tests/test_packages.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from pprint import pprint # noqa -from .support import TestCase +from tests.support import TestCase class PackagesTest(TestCase): @@ -22,3 +22,12 @@ def test_tar(self): self.manager.ingest(fixture_path, entity) self.assertEqual(entity.first("processingStatus"), self.manager.STATUS_SUCCESS) self.assertEqual(entity.schema.name, "Package") + + def test_password_protected_7z(self): + fixture_path, entity = self.fixture("500_pages_password.7z") + self.manager.ingest(fixture_path, entity) + self.assertEqual(entity.first("processingStatus"), self.manager.STATUS_FAILURE) + self.assertEqual( + entity.get("processingError")[0], + "Could not unpack the contents of this file. The document might be protected with a password. Try removing the password protection and re-uploading the documents.", + )