diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index bc5ea99ed..0910c4283 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -1,5 +1,6 @@ import csv import os +import re import shutil import tempfile import urllib.parse @@ -42,6 +43,7 @@ BulkUploadMetadataException, InvalidFileNameException, LGInvalidFilesException, + OdsErrorException, VirusScanFailedException, ) from utils.filename_utils import extract_nhs_number_from_bulk_upload_file_name @@ -50,6 +52,7 @@ logger = LoggingService(__name__) UNSUCCESSFUL = "Unsuccessful bulk upload" +EXPEDITE_UPLOAD_VALIDATION_FAILED_LOG_MARKER = "EXPEDITE_UPLOAD_VALIDATION_FAILED" class BulkUploadMetadataProcessorService: @@ -299,7 +302,7 @@ def validate_expedite_file(self, s3_object_key: str): "Failed processing expedite event due to file not being a 1of1" ) logger.error(failure_msg) - raise BulkUploadMetadataException(failure_msg) + raise InvalidFileNameException(failure_msg) nhs_number = extract_nhs_number_from_bulk_upload_file_name(file_path)[0] file_name = self.metadata_formatter_service.validate_record_filename( @@ -309,6 +312,15 @@ def validate_expedite_file(self, s3_object_key: str): scan_date = datetime.now().strftime("%Y-%m-%d") return nhs_number, file_name, ods_code, scan_date + def validate_ods_code_format_in_expedite_folder(self, s3_object_key: str) -> None: + """Ensure the parent directory of the expedite file is a valid ODS code.""" + ods_code = Path(s3_object_key).parent.name + + if not re.fullmatch(r"^[A-Z]\d{5}$", ods_code): + failure_msg = f"Invalid ODS code folder '{ods_code}' in expedite path: {s3_object_key}" + logger.error(failure_msg) + raise OdsErrorException(failure_msg) + def handle_expedite_event(self, event): """Process S3 EventBridge expedite uploads: enforce virus scan, ensure 1of1, extract identifiers, and send metadata to SQS.""" @@ -321,14 +333,22 @@ def handle_expedite_event(self, event): if s3_object_key.startswith("expedite/"): logger.info("Processing file from expedite folder") + try: + self.validate_ods_code_format_in_expedite_folder(s3_object_key) + + self.enforce_virus_scanner(s3_object_key) + self.check_file_status(s3_object_key) + + sqs_metadata = self.create_expedite_sqs_metadata(s3_object_key) - self.enforce_virus_scanner(s3_object_key) - self.check_file_status(s3_object_key) + self.send_metadata_to_expedite_sqs(sqs_metadata) + logger.info("Successfully processed expedite event") - sqs_metadata = self.create_expedite_sqs_metadata(s3_object_key) + except (InvalidFileNameException, OdsErrorException) as error: + logger.info(EXPEDITE_UPLOAD_VALIDATION_FAILED_LOG_MARKER) + failure_msg = f"Expedite upload validation failed for '{s3_object_key}': {error}" + raise BulkUploadMetadataException(failure_msg) - self.send_metadata_to_expedite_sqs(sqs_metadata) - logger.info("Successfully processed expedite event") else: failure_msg = f"Unexpected directory or file location received from EventBridge: {s3_object_key}" logger.error(failure_msg) diff --git a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py index 05281c63c..cdb9c1dca 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py @@ -36,6 +36,7 @@ BulkUploadMetadataException, InvalidFileNameException, LGInvalidFilesException, + OdsErrorException, VirusScanFailedException, ) @@ -856,7 +857,7 @@ def test_handle_expedite_event_rejects_non_1of1(test_service, mocker): mocker.patch.object(BulkUploadMetadataProcessorService, "check_file_status") mocked_send = mocker.patch.object( BulkUploadMetadataProcessorService, - "send_metadata_to_fifo_sqs", + "send_metadata_to_expedite_sqs", ) key = "expedite/A12345/2of3_1234567890_record.pdf" event = {"detail": {"object": {"key": key}}} @@ -1152,12 +1153,16 @@ def test_validate_expedite_file_happy_path_returns_expected_tuple(test_service): ], ) def test_validate_expedite_file_rejects_non_1of1(test_service, key): - with pytest.raises(BulkUploadMetadataException): + with pytest.raises(InvalidFileNameException) as exc: test_service.validate_expedite_file(key) + assert "not being a 1of1" in str(exc.value) + def test_handle_expedite_event_calls_enforce_for_expedite_key(mocker, test_service): - encoded_key = urllib.parse.quote_plus("expedite/folder/some file.pdf") + encoded_key = urllib.parse.quote_plus( + "expedite/folder/G12345/1of1_1234567890_some_file.pdf", + ) event = {"detail": {"object": {"key": encoded_key}}} mocker.patch.object( @@ -1169,7 +1174,7 @@ def test_handle_expedite_event_calls_enforce_for_expedite_key(mocker, test_servi test_service.handle_expedite_event(event) - decoded_key = "expedite/folder/some file.pdf" + decoded_key = "expedite/folder/G12345/1of1_1234567890_some_file.pdf" mocked_enforce.assert_called_once_with(decoded_key) mocked_check_status.assert_called_once_with(decoded_key) @@ -1451,3 +1456,142 @@ def test_apply_fixed_values_returns_valid_metadata_file(mocker, base_metadata_fi # Ensure it can be validated again validated = MetadataFile.model_validate(result.model_dump(by_alias=True)) assert validated.section == "AR" + + +@pytest.mark.parametrize( + "key", + [ + "expedite/folder/G12345/1of1_file.pdf", + "ABC/XPTO/expedite/folder/G12345/1of1_file.pdf", + ], +) +def test_validate_ods_code_format_in_expedite_folder_accepts_valid_ods_code( + test_service, + key, +): + test_service.validate_ods_code_format_in_expedite_folder(key) + + +@pytest.mark.parametrize( + "key, expected_ods_code", + [ + ("expedite/folder/g12345/1of1_file.pdf", "g12345"), + ("expedite/folder/G1234/1of1_file.pdf", "G1234"), + ("expedite/folder/G123456/1of1_file.pdf", "G123456"), + ("expedite/folder/AB1234/1of1_file.pdf", "AB1234"), + ("expedite/folder/123456/1of1_file.pdf", "123456"), + ], +) +def test_validate_ods_code_format_in_expedite_folder_rejects_invalid_ods_code( + test_service, + key, + expected_ods_code, +): + with pytest.raises(OdsErrorException) as exc: + test_service.validate_ods_code_format_in_expedite_folder(key) + + assert expected_ods_code in str(exc.value) + assert "Invalid ODS code folder" in str(exc.value) + + +def test_handle_expedite_event_logs_marker_and_raises_for_invalid_ods_folder( + test_service, + mocker, + caplog, +): + key = "expedite/folder/invalid/1of1_1234567890_record.pdf" + event = {"detail": {"object": {"key": key}}} + + mocked_enforce = mocker.patch.object(test_service, "enforce_virus_scanner") + mocked_check = mocker.patch.object(test_service, "check_file_status") + mocked_send = mocker.patch.object(test_service, "send_metadata_to_expedite_sqs") + + with pytest.raises(BulkUploadMetadataException) as exc: + test_service.handle_expedite_event(event) + + assert "Expedite upload validation failed" in str(exc.value) + assert "Invalid ODS code folder" in str(exc.value) + assert any( + "EXPEDITE_UPLOAD_VALIDATION_FAILED" in record.message + for record in caplog.records + ) + mocked_enforce.assert_not_called() + mocked_check.assert_not_called() + mocked_send.assert_not_called() + + +def test_handle_expedite_event_logs_marker_and_raises_for_non_1of1_file( + test_service, + mocker, + caplog, +): + key = "expedite/folder/G12345/2of3_1234567890_record.pdf" + event = {"detail": {"object": {"key": key}}} + + mocker.patch.object(test_service, "enforce_virus_scanner") + mocker.patch.object(test_service, "check_file_status") + mocked_send = mocker.patch.object(test_service, "send_metadata_to_expedite_sqs") + + with pytest.raises(BulkUploadMetadataException) as exc: + test_service.handle_expedite_event(event) + + assert "Expedite upload validation failed" in str(exc.value) + assert "not being a 1of1" in str(exc.value) + assert any( + "EXPEDITE_UPLOAD_VALIDATION_FAILED" in record.message + for record in caplog.records + ) + mocked_send.assert_not_called() + + +def test_handle_expedite_event_logs_marker_and_raises_for_invalid_nhs_number( + test_service, + mocker, + caplog, +): + key = "expedite/folder/G12345/1of1_no_nhs_here.pdf" + event = {"detail": {"object": {"key": key}}} + + mocker.patch.object(test_service, "enforce_virus_scanner") + mocker.patch.object(test_service, "check_file_status") + mocked_send = mocker.patch.object(test_service, "send_metadata_to_expedite_sqs") + + with pytest.raises(BulkUploadMetadataException) as exc: + test_service.handle_expedite_event(event) + + assert "Expedite upload validation failed" in str(exc.value) + assert "Invalid NHS number" in str(exc.value) + assert any( + "EXPEDITE_UPLOAD_VALIDATION_FAILED" in record.message + for record in caplog.records + ) + mocked_send.assert_not_called() + + +def test_handle_expedite_event_logs_marker_and_raises_for_invalid_filename_validation( + test_service, + mocker, + caplog, +): + key = "expedite/folder/G12345/1of1_1234567890_record.pdf" + event = {"detail": {"object": {"key": key}}} + + mocker.patch.object(test_service, "enforce_virus_scanner") + mocker.patch.object(test_service, "check_file_status") + mocker.patch.object( + test_service.metadata_formatter_service, + "validate_record_filename", + side_effect=InvalidFileNameException("invalid filename"), + ) + mocked_send = mocker.patch.object(test_service, "send_metadata_to_expedite_sqs") + + with pytest.raises(BulkUploadMetadataException) as exc: + test_service.handle_expedite_event(event) + + assert "Expedite upload validation failed" in str(exc.value) + assert "invalid filename" in str(exc.value) + assert any( + "EXPEDITE_UPLOAD_VALIDATION_FAILED" in record.message + for record in caplog.records + ) + mocked_send.assert_not_called()