From 2b89082953380866a4f187a90e7b74690d8fe5b1 Mon Sep 17 00:00:00 2001 From: PedroSoaresNHS Date: Mon, 16 Mar 2026 15:35:02 +0000 Subject: [PATCH 1/9] [PRMP-1588] validate ODS code in expedite folder --- .../bulk_upload_metadata_processor_service.py | 14 ++ ..._bulk_upload_metadata_processor_service.py | 168 +++++++++++------- 2 files changed, 116 insertions(+), 66 deletions(-) diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index d32fa39ec..989ccec51 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -1,5 +1,6 @@ import csv import os +import re import shutil import tempfile import urllib.parse @@ -309,6 +310,17 @@ def validate_expedite_file(self, s3_object_key: str): scan_date = datetime.now().strftime("%Y-%m-%d") return nhs_number, file_name, ods_code, scan_date + def validate_ods_code_format_in_expedite_folder(self, s3_object_key: str) -> None: + """Ensure the parent directory of the expedite file is a valid ODS code.""" + ods_code = Path(s3_object_key).parent.name + + if not re.fullmatch(r"^[A-Z][0-9]{5}$", ods_code): + failure_msg = ( + f"Invalid ODS code folder '{ods_code}' in expedite path: {s3_object_key}" + ) + logger.error(failure_msg) + raise BulkUploadMetadataException(failure_msg) + def handle_expedite_event(self, event): """Process S3 EventBridge expedite uploads: enforce virus scan, ensure 1of1, extract identifiers, and send metadata to SQS.""" @@ -322,6 +334,8 @@ def handle_expedite_event(self, event): if s3_object_key.startswith("expedite/"): logger.info("Processing file from expedite folder") + self.validate_ods_code_format_in_expedite_folder(s3_object_key) + self.enforce_virus_scanner(s3_object_key) self.check_file_status(s3_object_key) diff --git a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py index 05281c63c..aab54796b 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py @@ -171,9 +171,9 @@ def base_metadata_file(): def test_process_metadata_send_metadata_to_sqs_queue( - mocker, - test_service, - mock_download_metadata_from_s3, + mocker, + test_service, + mock_download_metadata_from_s3, ): fake_csv_path = "fake/path/metadata.csv" @@ -209,11 +209,11 @@ def test_process_metadata_send_metadata_to_sqs_queue( def test_process_metadata_catch_and_log_error_when_fail_to_get_metadata_csv_from_s3( - set_env, - caplog, - mock_s3_service, - mock_sqs_service, - test_service, + set_env, + caplog, + mock_s3_service, + mock_sqs_service, + test_service, ): mock_s3_service.download_file.side_effect = ClientError( {"Error": {"Code": "403", "Message": "Forbidden"}}, @@ -232,10 +232,10 @@ def test_process_metadata_catch_and_log_error_when_fail_to_get_metadata_csv_from def test_process_metadata_raise_validation_error_when_metadata_csv_is_invalid( - mock_sqs_service, - mock_download_metadata_from_s3, - test_service, - mocker, + mock_sqs_service, + mock_download_metadata_from_s3, + test_service, + mocker, ): mock_download_metadata_from_s3.return_value = "fake/path.csv" mocker.patch.object( @@ -252,16 +252,16 @@ def test_process_metadata_raise_validation_error_when_metadata_csv_is_invalid( def test_process_metadata_raise_validation_error_when_gp_practice_code_is_missing( - mock_sqs_service, - mock_download_metadata_from_s3, - test_service, - mocker, + mock_sqs_service, + mock_download_metadata_from_s3, + test_service, + mocker, ): mock_download_metadata_from_s3.return_value = "fake/path.csv" expected_error_log = ( - "Failed to parse metadata.csv: 1 validation error for MetadataFile\n" - + "GP-PRACTICE-CODE\n missing GP-PRACTICE-CODE for patient 1234567890" + "Failed to parse metadata.csv: 1 validation error for MetadataFile\n" + + "GP-PRACTICE-CODE\n missing GP-PRACTICE-CODE for patient 1234567890" ) mocker.patch.object( @@ -278,8 +278,8 @@ def test_process_metadata_raise_validation_error_when_gp_practice_code_is_missin def test_process_metadata_raise_client_error_when_failed_to_send_message_to_sqs( - test_service, - mocker, + test_service, + mocker, ): mocker.patch.object( test_service, @@ -341,18 +341,18 @@ def test_download_metadata_from_s3(mock_s3_service, test_service): def test_download_metadata_from_s3_raise_error_when_failed_to_download( - set_env, - mock_s3_service, - mock_tempfile, - test_service, + set_env, + mock_s3_service, + mock_tempfile, + test_service, ): mock_s3_service.download_file.side_effect = ClientError( {"Error": {"Code": "500", "Message": "file not exist in bucket"}}, "s3_get_object", ) with pytest.raises( - BulkUploadMetadataException, - match=f"Could not retrieve the following metadata file: {TEST_MOCK_METADATA_CSV}", + BulkUploadMetadataException, + match=f"Could not retrieve the following metadata file: {TEST_MOCK_METADATA_CSV}", ): test_service.download_metadata_from_s3() @@ -440,9 +440,9 @@ def test_send_metadata_to_sqs(set_env, mocker, mock_sqs_service, test_service): def test_send_metadata_to_sqs_raise_error_when_fail_to_send_message( - set_env, - mock_sqs_service, - test_service, + set_env, + mock_sqs_service, + test_service, ): mock_sqs_service.send_message_with_nhs_number_attr_fifo.side_effect = ClientError( { @@ -575,9 +575,9 @@ def test_extract_patient_info(test_service, base_metadata_file): def test_handle_invalid_filename_writes_failed_entry_to_dynamo( - mocker, - test_service, - base_metadata_file, + mocker, + test_service, + base_metadata_file, ): nhs_number = "1234567890" ods_code = "Y12345" @@ -626,9 +626,9 @@ def test_handle_invalid_filename_writes_failed_entry_to_dynamo( def test_handle_invalid_filename_sets_sent_to_review_true_when_review_enabled( - mocker, - test_service_with_review_enabled, - base_metadata_file, + mocker, + test_service_with_review_enabled, + base_metadata_file, ): nhs_number = "1234567890" ods_code = "Y12345" @@ -658,8 +658,8 @@ def test_handle_invalid_filename_sets_sent_to_review_true_when_review_enabled( def test_csv_to_sqs_metadata_sends_failed_files_to_review_queue_when_enabled( - mocker, - test_service_with_review_enabled, + mocker, + test_service_with_review_enabled, ): """Test that failed files are sent to review queue when flag is enabled""" mocker.patch.object( @@ -678,8 +678,8 @@ def test_csv_to_sqs_metadata_sends_failed_files_to_review_queue_when_enabled( def test_csv_to_sqs_metadata_does_not_send_to_review_when_disabled( - mocker, - test_service, + mocker, + test_service, ): """Test that failed files are NOT sent to review queue when flag is disabled""" mocker.patch.object( @@ -696,8 +696,8 @@ def test_csv_to_sqs_metadata_does_not_send_to_review_when_disabled( def test_csv_to_sqs_metadata_does_not_send_to_review_when_no_failures( - mocker, - test_service, + mocker, + test_service, ): """Test that review queue is not called when there are no failures""" mock_send_to_review = mocker.patch.object( @@ -718,8 +718,8 @@ def test_csv_to_sqs_metadata_does_not_send_to_review_when_no_failures( def test_csv_to_sqs_metadata_groups_multiple_failed_files_by_nhs_number( - mocker, - test_service, + mocker, + test_service, ): pass @@ -740,9 +740,9 @@ def test_convert_to_sqs_metadata(base_metadata_file): def test_validate_and_correct_filename_returns_happy_path( - mocker, - test_service, - base_metadata_file, + mocker, + test_service, + base_metadata_file, ): mocker.patch( "services.bulk_upload_metadata_processor_service.validate_file_name", @@ -755,9 +755,9 @@ def test_validate_and_correct_filename_returns_happy_path( def test_validate_and_correct_filename_sad_path( - mocker, - test_service, - base_metadata_file, + mocker, + test_service, + base_metadata_file, ): mocker.patch( "services.bulk_upload_metadata_processor_service.validate_file_name", @@ -900,8 +900,8 @@ def test_csv_to_sqs_metadata_happy_path(mocker, test_service, mock_csv_content): def test_csv_to_sqs_metadata_raises_BulkUploadMetadataException_if_no_headers( - mocker, - test_service, + mocker, + test_service, ): mocker.patch("builtins.open", mocker.mock_open(read_data="")) @@ -910,9 +910,9 @@ def test_csv_to_sqs_metadata_raises_BulkUploadMetadataException_if_no_headers( def test_csv_to_sqs_metadata_raises_BulkUploadMetadataException_if_all_rows_rejected( - mocker, - test_service, - mock_csv_content, + mocker, + test_service, + mock_csv_content, ): mocker.patch("builtins.open", mocker.mock_open(read_data=mock_csv_content)) @@ -927,8 +927,8 @@ def test_csv_to_sqs_metadata_raises_BulkUploadMetadataException_if_all_rows_reje ) with pytest.raises( - BulkUploadMetadataException, - match="No valid metadata rows found", + BulkUploadMetadataException, + match="No valid metadata rows found", ): test_service.csv_to_sqs_metadata("fake/path.csv") @@ -1030,9 +1030,9 @@ def mock_remap_csv_content(): def test_remapping_mandatory_fields( - mocker, - mock_service_remapping_mandatory_fields, - mock_remap_csv_content, + mocker, + mock_service_remapping_mandatory_fields, + mock_remap_csv_content, ): mocker.patch("builtins.open", mocker.mock_open(read_data=mock_remap_csv_content)) @@ -1104,9 +1104,9 @@ def mock_noremap_csv_content(): def test_no_remapping_logic( - mocker, - mock_service_no_remapping, - mock_noremap_csv_content, + mocker, + mock_service_no_remapping, + mock_noremap_csv_content, ): mocker.patch("builtins.open", mocker.mock_open(read_data=mock_noremap_csv_content)) @@ -1157,7 +1157,7 @@ def test_validate_expedite_file_rejects_non_1of1(test_service, key): def test_handle_expedite_event_calls_enforce_for_expedite_key(mocker, test_service): - encoded_key = urllib.parse.quote_plus("expedite/folder/some file.pdf") + encoded_key = urllib.parse.quote_plus("expedite/folder/G12345/some file.pdf") event = {"detail": {"object": {"key": encoded_key}}} mocker.patch.object( @@ -1169,7 +1169,7 @@ def test_handle_expedite_event_calls_enforce_for_expedite_key(mocker, test_servi test_service.handle_expedite_event(event) - decoded_key = "expedite/folder/some file.pdf" + decoded_key = "expedite/folder/G12345/some file.pdf" mocked_enforce.assert_called_once_with(decoded_key) mocked_check_status.assert_called_once_with(decoded_key) @@ -1264,8 +1264,8 @@ def test_enforce_virus_scanner_triggers_scan_when_no_result(mocker, test_service def test_enforce_virus_scanner_raises_bulk_exception_on_s3_access_error( - mocker, - test_service, + mocker, + test_service, ): file_key = "expedite/folder/file.pdf" client_error = ClientError( @@ -1451,3 +1451,39 @@ def test_apply_fixed_values_returns_valid_metadata_file(mocker, base_metadata_fi # Ensure it can be validated again validated = MetadataFile.model_validate(result.model_dump(by_alias=True)) assert validated.section == "AR" + + +@pytest.mark.parametrize( + "key", + [ + "expedite/folder/G12345/1of1_file.pdf", + "ABC/XPTO/expedite/folder/G12345/1of1_file.pdf", + ], +) +def test_validate_ods_code_format_in_expedite_folder_accepts_valid_ods_code( + test_service, + key, +): + test_service.validate_ods_code_format_in_expedite_folder(key) + + +@pytest.mark.parametrize( +"key, expected_ods_code", +[ + ("expedite/folder/g12345/1of1_file.pdf", "g12345"), + ("expedite/folder/G1234/1of1_file.pdf", "G1234"), + ("expedite/folder/G123456/1of1_file.pdf", "G123456"), + ("expedite/folder/AB1234/1of1_file.pdf", "AB1234"), + ("expedite/folder/123456/1of1_file.pdf", "123456"), +], +) +def test_validate_ods_code_format_in_expedite_folder_rejects_invalid_ods_code( + test_service, + key, + expected_ods_code, +): + with pytest.raises(BulkUploadMetadataException) as exc: + test_service.validate_ods_code_format_in_expedite_folder(key) + + assert expected_ods_code in str(exc.value) + assert "Invalid ODS code folder" in str(exc.value) From 626c95240375435a4368ece97fa049c15ed8e4e2 Mon Sep 17 00:00:00 2001 From: PedroSoaresNHS Date: Mon, 16 Mar 2026 15:35:17 +0000 Subject: [PATCH 2/9] [PRMP-1588] linting --- .../bulk_upload_metadata_processor_service.py | 4 +- ..._bulk_upload_metadata_processor_service.py | 148 +++++++++--------- 2 files changed, 75 insertions(+), 77 deletions(-) diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index 989ccec51..4401c2c53 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -315,9 +315,7 @@ def validate_ods_code_format_in_expedite_folder(self, s3_object_key: str) -> Non ods_code = Path(s3_object_key).parent.name if not re.fullmatch(r"^[A-Z][0-9]{5}$", ods_code): - failure_msg = ( - f"Invalid ODS code folder '{ods_code}' in expedite path: {s3_object_key}" - ) + failure_msg = f"Invalid ODS code folder '{ods_code}' in expedite path: {s3_object_key}" logger.error(failure_msg) raise BulkUploadMetadataException(failure_msg) diff --git a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py index aab54796b..63022136e 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py @@ -171,9 +171,9 @@ def base_metadata_file(): def test_process_metadata_send_metadata_to_sqs_queue( - mocker, - test_service, - mock_download_metadata_from_s3, + mocker, + test_service, + mock_download_metadata_from_s3, ): fake_csv_path = "fake/path/metadata.csv" @@ -209,11 +209,11 @@ def test_process_metadata_send_metadata_to_sqs_queue( def test_process_metadata_catch_and_log_error_when_fail_to_get_metadata_csv_from_s3( - set_env, - caplog, - mock_s3_service, - mock_sqs_service, - test_service, + set_env, + caplog, + mock_s3_service, + mock_sqs_service, + test_service, ): mock_s3_service.download_file.side_effect = ClientError( {"Error": {"Code": "403", "Message": "Forbidden"}}, @@ -232,10 +232,10 @@ def test_process_metadata_catch_and_log_error_when_fail_to_get_metadata_csv_from def test_process_metadata_raise_validation_error_when_metadata_csv_is_invalid( - mock_sqs_service, - mock_download_metadata_from_s3, - test_service, - mocker, + mock_sqs_service, + mock_download_metadata_from_s3, + test_service, + mocker, ): mock_download_metadata_from_s3.return_value = "fake/path.csv" mocker.patch.object( @@ -252,16 +252,16 @@ def test_process_metadata_raise_validation_error_when_metadata_csv_is_invalid( def test_process_metadata_raise_validation_error_when_gp_practice_code_is_missing( - mock_sqs_service, - mock_download_metadata_from_s3, - test_service, - mocker, + mock_sqs_service, + mock_download_metadata_from_s3, + test_service, + mocker, ): mock_download_metadata_from_s3.return_value = "fake/path.csv" expected_error_log = ( - "Failed to parse metadata.csv: 1 validation error for MetadataFile\n" - + "GP-PRACTICE-CODE\n missing GP-PRACTICE-CODE for patient 1234567890" + "Failed to parse metadata.csv: 1 validation error for MetadataFile\n" + + "GP-PRACTICE-CODE\n missing GP-PRACTICE-CODE for patient 1234567890" ) mocker.patch.object( @@ -278,8 +278,8 @@ def test_process_metadata_raise_validation_error_when_gp_practice_code_is_missin def test_process_metadata_raise_client_error_when_failed_to_send_message_to_sqs( - test_service, - mocker, + test_service, + mocker, ): mocker.patch.object( test_service, @@ -341,18 +341,18 @@ def test_download_metadata_from_s3(mock_s3_service, test_service): def test_download_metadata_from_s3_raise_error_when_failed_to_download( - set_env, - mock_s3_service, - mock_tempfile, - test_service, + set_env, + mock_s3_service, + mock_tempfile, + test_service, ): mock_s3_service.download_file.side_effect = ClientError( {"Error": {"Code": "500", "Message": "file not exist in bucket"}}, "s3_get_object", ) with pytest.raises( - BulkUploadMetadataException, - match=f"Could not retrieve the following metadata file: {TEST_MOCK_METADATA_CSV}", + BulkUploadMetadataException, + match=f"Could not retrieve the following metadata file: {TEST_MOCK_METADATA_CSV}", ): test_service.download_metadata_from_s3() @@ -440,9 +440,9 @@ def test_send_metadata_to_sqs(set_env, mocker, mock_sqs_service, test_service): def test_send_metadata_to_sqs_raise_error_when_fail_to_send_message( - set_env, - mock_sqs_service, - test_service, + set_env, + mock_sqs_service, + test_service, ): mock_sqs_service.send_message_with_nhs_number_attr_fifo.side_effect = ClientError( { @@ -575,9 +575,9 @@ def test_extract_patient_info(test_service, base_metadata_file): def test_handle_invalid_filename_writes_failed_entry_to_dynamo( - mocker, - test_service, - base_metadata_file, + mocker, + test_service, + base_metadata_file, ): nhs_number = "1234567890" ods_code = "Y12345" @@ -626,9 +626,9 @@ def test_handle_invalid_filename_writes_failed_entry_to_dynamo( def test_handle_invalid_filename_sets_sent_to_review_true_when_review_enabled( - mocker, - test_service_with_review_enabled, - base_metadata_file, + mocker, + test_service_with_review_enabled, + base_metadata_file, ): nhs_number = "1234567890" ods_code = "Y12345" @@ -658,8 +658,8 @@ def test_handle_invalid_filename_sets_sent_to_review_true_when_review_enabled( def test_csv_to_sqs_metadata_sends_failed_files_to_review_queue_when_enabled( - mocker, - test_service_with_review_enabled, + mocker, + test_service_with_review_enabled, ): """Test that failed files are sent to review queue when flag is enabled""" mocker.patch.object( @@ -678,8 +678,8 @@ def test_csv_to_sqs_metadata_sends_failed_files_to_review_queue_when_enabled( def test_csv_to_sqs_metadata_does_not_send_to_review_when_disabled( - mocker, - test_service, + mocker, + test_service, ): """Test that failed files are NOT sent to review queue when flag is disabled""" mocker.patch.object( @@ -696,8 +696,8 @@ def test_csv_to_sqs_metadata_does_not_send_to_review_when_disabled( def test_csv_to_sqs_metadata_does_not_send_to_review_when_no_failures( - mocker, - test_service, + mocker, + test_service, ): """Test that review queue is not called when there are no failures""" mock_send_to_review = mocker.patch.object( @@ -718,8 +718,8 @@ def test_csv_to_sqs_metadata_does_not_send_to_review_when_no_failures( def test_csv_to_sqs_metadata_groups_multiple_failed_files_by_nhs_number( - mocker, - test_service, + mocker, + test_service, ): pass @@ -740,9 +740,9 @@ def test_convert_to_sqs_metadata(base_metadata_file): def test_validate_and_correct_filename_returns_happy_path( - mocker, - test_service, - base_metadata_file, + mocker, + test_service, + base_metadata_file, ): mocker.patch( "services.bulk_upload_metadata_processor_service.validate_file_name", @@ -755,9 +755,9 @@ def test_validate_and_correct_filename_returns_happy_path( def test_validate_and_correct_filename_sad_path( - mocker, - test_service, - base_metadata_file, + mocker, + test_service, + base_metadata_file, ): mocker.patch( "services.bulk_upload_metadata_processor_service.validate_file_name", @@ -900,8 +900,8 @@ def test_csv_to_sqs_metadata_happy_path(mocker, test_service, mock_csv_content): def test_csv_to_sqs_metadata_raises_BulkUploadMetadataException_if_no_headers( - mocker, - test_service, + mocker, + test_service, ): mocker.patch("builtins.open", mocker.mock_open(read_data="")) @@ -910,9 +910,9 @@ def test_csv_to_sqs_metadata_raises_BulkUploadMetadataException_if_no_headers( def test_csv_to_sqs_metadata_raises_BulkUploadMetadataException_if_all_rows_rejected( - mocker, - test_service, - mock_csv_content, + mocker, + test_service, + mock_csv_content, ): mocker.patch("builtins.open", mocker.mock_open(read_data=mock_csv_content)) @@ -927,8 +927,8 @@ def test_csv_to_sqs_metadata_raises_BulkUploadMetadataException_if_all_rows_reje ) with pytest.raises( - BulkUploadMetadataException, - match="No valid metadata rows found", + BulkUploadMetadataException, + match="No valid metadata rows found", ): test_service.csv_to_sqs_metadata("fake/path.csv") @@ -1030,9 +1030,9 @@ def mock_remap_csv_content(): def test_remapping_mandatory_fields( - mocker, - mock_service_remapping_mandatory_fields, - mock_remap_csv_content, + mocker, + mock_service_remapping_mandatory_fields, + mock_remap_csv_content, ): mocker.patch("builtins.open", mocker.mock_open(read_data=mock_remap_csv_content)) @@ -1104,9 +1104,9 @@ def mock_noremap_csv_content(): def test_no_remapping_logic( - mocker, - mock_service_no_remapping, - mock_noremap_csv_content, + mocker, + mock_service_no_remapping, + mock_noremap_csv_content, ): mocker.patch("builtins.open", mocker.mock_open(read_data=mock_noremap_csv_content)) @@ -1264,8 +1264,8 @@ def test_enforce_virus_scanner_triggers_scan_when_no_result(mocker, test_service def test_enforce_virus_scanner_raises_bulk_exception_on_s3_access_error( - mocker, - test_service, + mocker, + test_service, ): file_key = "expedite/folder/file.pdf" client_error = ClientError( @@ -1461,21 +1461,21 @@ def test_apply_fixed_values_returns_valid_metadata_file(mocker, base_metadata_fi ], ) def test_validate_ods_code_format_in_expedite_folder_accepts_valid_ods_code( - test_service, - key, + test_service, + key, ): test_service.validate_ods_code_format_in_expedite_folder(key) @pytest.mark.parametrize( -"key, expected_ods_code", -[ - ("expedite/folder/g12345/1of1_file.pdf", "g12345"), - ("expedite/folder/G1234/1of1_file.pdf", "G1234"), - ("expedite/folder/G123456/1of1_file.pdf", "G123456"), - ("expedite/folder/AB1234/1of1_file.pdf", "AB1234"), - ("expedite/folder/123456/1of1_file.pdf", "123456"), -], + "key, expected_ods_code", + [ + ("expedite/folder/g12345/1of1_file.pdf", "g12345"), + ("expedite/folder/G1234/1of1_file.pdf", "G1234"), + ("expedite/folder/G123456/1of1_file.pdf", "G123456"), + ("expedite/folder/AB1234/1of1_file.pdf", "AB1234"), + ("expedite/folder/123456/1of1_file.pdf", "123456"), + ], ) def test_validate_ods_code_format_in_expedite_folder_rejects_invalid_ods_code( test_service, From a334f87be67b7cd1b4cd80142bd4b3d0e381e096 Mon Sep 17 00:00:00 2001 From: PedroSoaresNHS Date: Tue, 17 Mar 2026 09:48:03 +0000 Subject: [PATCH 3/9] [PRMP-1588] added extra logging --- lambdas/services/bulk_upload_metadata_processor_service.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index 4401c2c53..4ea42059d 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -316,6 +316,7 @@ def validate_ods_code_format_in_expedite_folder(self, s3_object_key: str) -> Non if not re.fullmatch(r"^[A-Z][0-9]{5}$", ods_code): failure_msg = f"Invalid ODS code folder '{ods_code}' in expedite path: {s3_object_key}" + logger.info("Failed to validate ods code in expedite folder") logger.error(failure_msg) raise BulkUploadMetadataException(failure_msg) From c2a3014f4b0ba9b2d9401815f7e5b4f118c89672 Mon Sep 17 00:00:00 2001 From: PedroSoaresNHS Date: Wed, 18 Mar 2026 10:27:40 +0000 Subject: [PATCH 4/9] [PRMP-1589] added alarm --- .../bulk_upload_metadata_processor_service.py | 32 +++-- ..._bulk_upload_metadata_processor_service.py | 115 +++++++++++++++++- 2 files changed, 130 insertions(+), 17 deletions(-) diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index 4ea42059d..aaf11e4d2 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -43,7 +43,7 @@ BulkUploadMetadataException, InvalidFileNameException, LGInvalidFilesException, - VirusScanFailedException, + VirusScanFailedException, OdsErrorException, ) from utils.filename_utils import extract_nhs_number_from_bulk_upload_file_name from utils.lloyd_george_validator import validate_file_name @@ -51,6 +51,7 @@ logger = LoggingService(__name__) UNSUCCESSFUL = "Unsuccessful bulk upload" +EXPEDITE_UPLOAD_VALIDATION_FAILED_LOG_MARKER = "EXPEDITE_UPLOAD_VALIDATION_FAILED" class BulkUploadMetadataProcessorService: @@ -300,7 +301,7 @@ def validate_expedite_file(self, s3_object_key: str): "Failed processing expedite event due to file not being a 1of1" ) logger.error(failure_msg) - raise BulkUploadMetadataException(failure_msg) + raise InvalidFileNameException(failure_msg) nhs_number = extract_nhs_number_from_bulk_upload_file_name(file_path)[0] file_name = self.metadata_formatter_service.validate_record_filename( @@ -315,10 +316,11 @@ def validate_ods_code_format_in_expedite_folder(self, s3_object_key: str) -> Non ods_code = Path(s3_object_key).parent.name if not re.fullmatch(r"^[A-Z][0-9]{5}$", ods_code): - failure_msg = f"Invalid ODS code folder '{ods_code}' in expedite path: {s3_object_key}" - logger.info("Failed to validate ods code in expedite folder") + failure_msg = ( + f"Invalid ODS code folder '{ods_code}' in expedite path: {s3_object_key}" + ) logger.error(failure_msg) - raise BulkUploadMetadataException(failure_msg) + raise OdsErrorException(failure_msg) def handle_expedite_event(self, event): """Process S3 EventBridge expedite uploads: enforce virus scan, ensure 1of1, extract identifiers, @@ -332,16 +334,24 @@ def handle_expedite_event(self, event): if s3_object_key.startswith("expedite/"): logger.info("Processing file from expedite folder") + try: + self.validate_ods_code_format_in_expedite_folder(s3_object_key) + + self.enforce_virus_scanner(s3_object_key) + self.check_file_status(s3_object_key) - self.validate_ods_code_format_in_expedite_folder(s3_object_key) + sqs_metadata = self.create_expedite_sqs_metadata(s3_object_key) - self.enforce_virus_scanner(s3_object_key) - self.check_file_status(s3_object_key) + self.send_metadata_to_expedite_sqs(sqs_metadata) + logger.info("Successfully processed expedite event") - sqs_metadata = self.create_expedite_sqs_metadata(s3_object_key) + except (InvalidFileNameException, OdsErrorException) as error: + logger.info(EXPEDITE_UPLOAD_VALIDATION_FAILED_LOG_MARKER) + failure_msg = ( + f"Expedite upload validation failed for '{s3_object_key}': {error}" + ) + raise BulkUploadMetadataException(failure_msg) - self.send_metadata_to_expedite_sqs(sqs_metadata) - logger.info("Successfully processed expedite event") else: failure_msg = f"Unexpected directory or file location received from EventBridge: {s3_object_key}" logger.error(failure_msg) diff --git a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py index 63022136e..b4c247d53 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py @@ -36,7 +36,7 @@ BulkUploadMetadataException, InvalidFileNameException, LGInvalidFilesException, - VirusScanFailedException, + VirusScanFailedException, OdsErrorException, ) METADATA_FILE_DIR = "tests/unit/helpers/data/bulk_upload" @@ -856,7 +856,7 @@ def test_handle_expedite_event_rejects_non_1of1(test_service, mocker): mocker.patch.object(BulkUploadMetadataProcessorService, "check_file_status") mocked_send = mocker.patch.object( BulkUploadMetadataProcessorService, - "send_metadata_to_fifo_sqs", + "send_metadata_to_expedite_sqs", ) key = "expedite/A12345/2of3_1234567890_record.pdf" event = {"detail": {"object": {"key": key}}} @@ -1152,12 +1152,16 @@ def test_validate_expedite_file_happy_path_returns_expected_tuple(test_service): ], ) def test_validate_expedite_file_rejects_non_1of1(test_service, key): - with pytest.raises(BulkUploadMetadataException): + with pytest.raises(InvalidFileNameException) as exc: test_service.validate_expedite_file(key) + assert "not being a 1of1" in str(exc.value) + def test_handle_expedite_event_calls_enforce_for_expedite_key(mocker, test_service): - encoded_key = urllib.parse.quote_plus("expedite/folder/G12345/some file.pdf") + encoded_key = urllib.parse.quote_plus( + "expedite/folder/G12345/1of1_1234567890_some file.pdf" + ) event = {"detail": {"object": {"key": encoded_key}}} mocker.patch.object( @@ -1169,7 +1173,7 @@ def test_handle_expedite_event_calls_enforce_for_expedite_key(mocker, test_servi test_service.handle_expedite_event(event) - decoded_key = "expedite/folder/G12345/some file.pdf" + decoded_key = "expedite/folder/G12345/1of1_1234567890_some file.pdf" mocked_enforce.assert_called_once_with(decoded_key) mocked_check_status.assert_called_once_with(decoded_key) @@ -1482,8 +1486,107 @@ def test_validate_ods_code_format_in_expedite_folder_rejects_invalid_ods_code( key, expected_ods_code, ): - with pytest.raises(BulkUploadMetadataException) as exc: + with pytest.raises(OdsErrorException) as exc: test_service.validate_ods_code_format_in_expedite_folder(key) assert expected_ods_code in str(exc.value) assert "Invalid ODS code folder" in str(exc.value) + +def test_handle_expedite_event_logs_marker_and_raises_for_invalid_ods_folder( + test_service, + mocker, + caplog, +): + key = "expedite/folder/invalid/1of1_1234567890_record.pdf" + event = {"detail": {"object": {"key": key}}} + + mocked_enforce = mocker.patch.object(test_service, "enforce_virus_scanner") + mocked_check = mocker.patch.object(test_service, "check_file_status") + mocked_send = mocker.patch.object(test_service, "send_metadata_to_expedite_sqs") + + with pytest.raises(BulkUploadMetadataException) as exc: + test_service.handle_expedite_event(event) + + assert "Expedite upload validation failed" in str(exc.value) + assert "Invalid ODS code folder" in str(exc.value) + assert any( + "EXPEDITE_UPLOAD_VALIDATION_FAILED" in record.message + for record in caplog.records + ) + mocked_enforce.assert_not_called() + mocked_check.assert_not_called() + mocked_send.assert_not_called() + +def test_handle_expedite_event_logs_marker_and_raises_for_non_1of1_file( + test_service, + mocker, + caplog, +): + key = "expedite/folder/G12345/2of3_1234567890_record.pdf" + event = {"detail": {"object": {"key": key}}} + + mocker.patch.object(test_service, "enforce_virus_scanner") + mocker.patch.object(test_service, "check_file_status") + mocked_send = mocker.patch.object(test_service, "send_metadata_to_expedite_sqs") + + with pytest.raises(BulkUploadMetadataException) as exc: + test_service.handle_expedite_event(event) + + assert "Expedite upload validation failed" in str(exc.value) + assert "not being a 1of1" in str(exc.value) + assert any( + "EXPEDITE_UPLOAD_VALIDATION_FAILED" in record.message + for record in caplog.records + ) + mocked_send.assert_not_called() + +def test_handle_expedite_event_logs_marker_and_raises_for_invalid_nhs_number( + test_service, + mocker, + caplog, +): + key = "expedite/folder/G12345/1of1_no_nhs_here.pdf" + event = {"detail": {"object": {"key": key}}} + + mocker.patch.object(test_service, "enforce_virus_scanner") + mocker.patch.object(test_service, "check_file_status") + mocked_send = mocker.patch.object(test_service, "send_metadata_to_expedite_sqs") + + with pytest.raises(BulkUploadMetadataException) as exc: + test_service.handle_expedite_event(event) + + assert "Expedite upload validation failed" in str(exc.value) + assert "Invalid NHS number" in str(exc.value) + assert any( + "EXPEDITE_UPLOAD_VALIDATION_FAILED" in record.message + for record in caplog.records + ) + mocked_send.assert_not_called() + +def test_handle_expedite_event_logs_marker_and_raises_for_invalid_filename_validation( + test_service, + mocker, + caplog, +): + key = "expedite/folder/G12345/1of1_1234567890_record.pdf" + event = {"detail": {"object": {"key": key}}} + + mocker.patch.object(test_service, "enforce_virus_scanner") + mocker.patch.object(test_service, "check_file_status") + mocker.patch.object( + test_service.metadata_formatter_service, + "validate_record_filename", + side_effect=InvalidFileNameException("invalid filename"), + ) + mocked_send = mocker.patch.object(test_service, "send_metadata_to_expedite_sqs") + + with pytest.raises(BulkUploadMetadataException) as exc: + test_service.handle_expedite_event(event) + + assert "Expedite upload validation failed" in str(exc.value) + assert "invalid filename" in str(exc.value) + assert any( + "EXPEDITE_UPLOAD_VALIDATION_FAILED" in record.message + for record in caplog.records + ) + mocked_send.assert_not_called() \ No newline at end of file From b8142e2a1509346e52488ce5f885b1016188b9be Mon Sep 17 00:00:00 2001 From: PedroSoaresNHS Date: Wed, 18 Mar 2026 10:27:53 +0000 Subject: [PATCH 5/9] [PRMP-1589] format code --- .../bulk_upload_metadata_processor_service.py | 11 ++++------- .../test_bulk_upload_metadata_processor_service.py | 11 ++++++++--- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index aaf11e4d2..c049aec9b 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -43,7 +43,8 @@ BulkUploadMetadataException, InvalidFileNameException, LGInvalidFilesException, - VirusScanFailedException, OdsErrorException, + OdsErrorException, + VirusScanFailedException, ) from utils.filename_utils import extract_nhs_number_from_bulk_upload_file_name from utils.lloyd_george_validator import validate_file_name @@ -316,9 +317,7 @@ def validate_ods_code_format_in_expedite_folder(self, s3_object_key: str) -> Non ods_code = Path(s3_object_key).parent.name if not re.fullmatch(r"^[A-Z][0-9]{5}$", ods_code): - failure_msg = ( - f"Invalid ODS code folder '{ods_code}' in expedite path: {s3_object_key}" - ) + failure_msg = f"Invalid ODS code folder '{ods_code}' in expedite path: {s3_object_key}" logger.error(failure_msg) raise OdsErrorException(failure_msg) @@ -347,9 +346,7 @@ def handle_expedite_event(self, event): except (InvalidFileNameException, OdsErrorException) as error: logger.info(EXPEDITE_UPLOAD_VALIDATION_FAILED_LOG_MARKER) - failure_msg = ( - f"Expedite upload validation failed for '{s3_object_key}': {error}" - ) + failure_msg = f"Expedite upload validation failed for '{s3_object_key}': {error}" raise BulkUploadMetadataException(failure_msg) else: diff --git a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py index b4c247d53..f06843141 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py @@ -36,7 +36,8 @@ BulkUploadMetadataException, InvalidFileNameException, LGInvalidFilesException, - VirusScanFailedException, OdsErrorException, + OdsErrorException, + VirusScanFailedException, ) METADATA_FILE_DIR = "tests/unit/helpers/data/bulk_upload" @@ -1160,7 +1161,7 @@ def test_validate_expedite_file_rejects_non_1of1(test_service, key): def test_handle_expedite_event_calls_enforce_for_expedite_key(mocker, test_service): encoded_key = urllib.parse.quote_plus( - "expedite/folder/G12345/1of1_1234567890_some file.pdf" + "expedite/folder/G12345/1of1_1234567890_some file.pdf", ) event = {"detail": {"object": {"key": encoded_key}}} @@ -1492,6 +1493,7 @@ def test_validate_ods_code_format_in_expedite_folder_rejects_invalid_ods_code( assert expected_ods_code in str(exc.value) assert "Invalid ODS code folder" in str(exc.value) + def test_handle_expedite_event_logs_marker_and_raises_for_invalid_ods_folder( test_service, mocker, @@ -1517,6 +1519,7 @@ def test_handle_expedite_event_logs_marker_and_raises_for_invalid_ods_folder( mocked_check.assert_not_called() mocked_send.assert_not_called() + def test_handle_expedite_event_logs_marker_and_raises_for_non_1of1_file( test_service, mocker, @@ -1540,6 +1543,7 @@ def test_handle_expedite_event_logs_marker_and_raises_for_non_1of1_file( ) mocked_send.assert_not_called() + def test_handle_expedite_event_logs_marker_and_raises_for_invalid_nhs_number( test_service, mocker, @@ -1563,6 +1567,7 @@ def test_handle_expedite_event_logs_marker_and_raises_for_invalid_nhs_number( ) mocked_send.assert_not_called() + def test_handle_expedite_event_logs_marker_and_raises_for_invalid_filename_validation( test_service, mocker, @@ -1589,4 +1594,4 @@ def test_handle_expedite_event_logs_marker_and_raises_for_invalid_filename_valid "EXPEDITE_UPLOAD_VALIDATION_FAILED" in record.message for record in caplog.records ) - mocked_send.assert_not_called() \ No newline at end of file + mocked_send.assert_not_called() From 080f7446b411d9b085f422d1dd044ab2108a35db Mon Sep 17 00:00:00 2001 From: PedroSoaresNHS Date: Thu, 19 Mar 2026 10:06:47 +0000 Subject: [PATCH 6/9] [PRMP-1589] fixed comment --- .../services/test_bulk_upload_metadata_processor_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py index f06843141..cdb9c1dca 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py @@ -1161,7 +1161,7 @@ def test_validate_expedite_file_rejects_non_1of1(test_service, key): def test_handle_expedite_event_calls_enforce_for_expedite_key(mocker, test_service): encoded_key = urllib.parse.quote_plus( - "expedite/folder/G12345/1of1_1234567890_some file.pdf", + "expedite/folder/G12345/1of1_1234567890_some_file.pdf", ) event = {"detail": {"object": {"key": encoded_key}}} @@ -1174,7 +1174,7 @@ def test_handle_expedite_event_calls_enforce_for_expedite_key(mocker, test_servi test_service.handle_expedite_event(event) - decoded_key = "expedite/folder/G12345/1of1_1234567890_some file.pdf" + decoded_key = "expedite/folder/G12345/1of1_1234567890_some_file.pdf" mocked_enforce.assert_called_once_with(decoded_key) mocked_check_status.assert_called_once_with(decoded_key) From e08458b25605db9aba47184bd358b015d359dde7 Mon Sep 17 00:00:00 2001 From: PedroSoaresNHS Date: Thu, 19 Mar 2026 10:09:27 +0000 Subject: [PATCH 7/9] [PRMP-1588] fixed comments --- lambdas/services/bulk_upload_metadata_processor_service.py | 2 +- .../services/test_bulk_upload_metadata_processor_service.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index 4ea42059d..7fbcb950c 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -316,7 +316,7 @@ def validate_ods_code_format_in_expedite_folder(self, s3_object_key: str) -> Non if not re.fullmatch(r"^[A-Z][0-9]{5}$", ods_code): failure_msg = f"Invalid ODS code folder '{ods_code}' in expedite path: {s3_object_key}" - logger.info("Failed to validate ods code in expedite folder") + logger.info("Failed to validate ODS code in expedite folder") logger.error(failure_msg) raise BulkUploadMetadataException(failure_msg) diff --git a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py index 63022136e..027cfd57a 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_metadata_processor_service.py @@ -1157,7 +1157,7 @@ def test_validate_expedite_file_rejects_non_1of1(test_service, key): def test_handle_expedite_event_calls_enforce_for_expedite_key(mocker, test_service): - encoded_key = urllib.parse.quote_plus("expedite/folder/G12345/some file.pdf") + encoded_key = urllib.parse.quote_plus("expedite/folder/G12345/some_file.pdf") event = {"detail": {"object": {"key": encoded_key}}} mocker.patch.object( @@ -1169,7 +1169,7 @@ def test_handle_expedite_event_calls_enforce_for_expedite_key(mocker, test_servi test_service.handle_expedite_event(event) - decoded_key = "expedite/folder/G12345/some file.pdf" + decoded_key = "expedite/folder/G12345/some_file.pdf" mocked_enforce.assert_called_once_with(decoded_key) mocked_check_status.assert_called_once_with(decoded_key) From b679fff3e9cea9e06936ca7ea83acd514957794f Mon Sep 17 00:00:00 2001 From: PedroSoaresNHS Date: Thu, 19 Mar 2026 10:45:12 +0000 Subject: [PATCH 8/9] [PRMP-1588] fixed sonar --- lambdas/services/bulk_upload_metadata_processor_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index 7fbcb950c..d60a72aac 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -314,7 +314,7 @@ def validate_ods_code_format_in_expedite_folder(self, s3_object_key: str) -> Non """Ensure the parent directory of the expedite file is a valid ODS code.""" ods_code = Path(s3_object_key).parent.name - if not re.fullmatch(r"^[A-Z][0-9]{5}$", ods_code): + if not re.fullmatch(r"^[A-Z]\d{5}$", ods_code): failure_msg = f"Invalid ODS code folder '{ods_code}' in expedite path: {s3_object_key}" logger.info("Failed to validate ODS code in expedite folder") logger.error(failure_msg) From e6251a9e3c99adee4106b22f24d9a2afa9a10e8f Mon Sep 17 00:00:00 2001 From: PedroSoaresNHS Date: Thu, 19 Mar 2026 10:46:51 +0000 Subject: [PATCH 9/9] [PRMP-1589] fixed sonar --- lambdas/services/bulk_upload_metadata_processor_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lambdas/services/bulk_upload_metadata_processor_service.py b/lambdas/services/bulk_upload_metadata_processor_service.py index c049aec9b..8b29cec35 100644 --- a/lambdas/services/bulk_upload_metadata_processor_service.py +++ b/lambdas/services/bulk_upload_metadata_processor_service.py @@ -316,7 +316,7 @@ def validate_ods_code_format_in_expedite_folder(self, s3_object_key: str) -> Non """Ensure the parent directory of the expedite file is a valid ODS code.""" ods_code = Path(s3_object_key).parent.name - if not re.fullmatch(r"^[A-Z][0-9]{5}$", ods_code): + if not re.fullmatch(r"^[A-Z]\d{5}$", ods_code): failure_msg = f"Invalid ODS code folder '{ods_code}' in expedite path: {s3_object_key}" logger.error(failure_msg) raise OdsErrorException(failure_msg)