Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions lambdas/services/document_review_processor_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

logger = LoggingService(__name__)

S3_STORAGE_NAMESPACE = uuid.UUID("fb0e6109-abd7-4531-b0f2-c6e5be5861b0")


class ReviewProcessorService:

Expand Down Expand Up @@ -113,10 +115,20 @@ def _move_files_to_review_bucket(
message_data: ReviewMessageBody,
review_record_id: str,
) -> list[DocumentReviewFileDetails]:
seen_source_paths: set[str] = set()
new_file_keys: list[DocumentReviewFileDetails] = []

for file in message_data.files:
object_key = uuid.uuid4()
if file.file_path in seen_source_paths:
logger.warning(
f"Duplicate source file path detected, skipping: {file.file_path}",
)
continue
seen_source_paths.add(file.file_path)

object_key = uuid.uuid5(
S3_STORAGE_NAMESPACE,
f"{review_record_id}/{file.file_path}",
)
new_file_key = f"{review_record_id}/{object_key}"

logger.info(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import uuid

import pytest
from botocore.exceptions import ClientError

from enums.document_review_reason import DocumentReviewReason
from enums.document_review_status import DocumentReviewStatus
from models.document_review import (
Expand All @@ -8,7 +11,10 @@
)
from models.pds_models import PatientDetails
from models.sqs.review_message_body import ReviewMessageBody, ReviewMessageFile
from services.document_review_processor_service import ReviewProcessorService
from services.document_review_processor_service import (
S3_STORAGE_NAMESPACE,
ReviewProcessorService,
)
from tests.unit.conftest import MOCK_DOCUMENT_REVIEW_BUCKET, S3_PREFIX, TEST_NHS_NUMBER
from utils.exceptions import (
InvalidResourceIdException,
Expand Down Expand Up @@ -280,16 +286,20 @@ def test_build_review_record_with_multiple_files(service_under_test):
assert result.files[1].file_name == "document_2.pdf"


def test_move_files_success(service_under_test, sample_review_message, mocker):
mocker.patch("uuid.uuid4", return_value="123412342")
def test_move_files_success(service_under_test, sample_review_message):
review_id = "test-review-id-123"
source_file_path = f"staging/{TEST_NHS_NUMBER}/test_document.pdf"
expected_object_key = uuid.uuid5(
S3_STORAGE_NAMESPACE,
f"{review_id}/{source_file_path}",
)
expected_key = f"{review_id}/{expected_object_key}"

files = service_under_test._move_files_to_review_bucket(
sample_review_message,
"test-review-id-123",
review_id,
)

expected_key = "test-review-id-123/123412342"

assert len(files) == 1
assert files[0].file_name == "test_document.pdf"
assert (
Expand All @@ -299,49 +309,124 @@ def test_move_files_success(service_under_test, sample_review_message, mocker):

service_under_test.s3_service.copy_across_bucket.assert_called_once_with(
source_bucket="test_staging_bulk_store",
source_file_key=f"staging/{TEST_NHS_NUMBER}/test_document.pdf",
source_file_key=source_file_path,
dest_bucket="test_document_review_bucket",
dest_file_key=expected_key,
if_none_match=True,
)


def test_move_multiple_files_success(service_under_test, mocker):
def test_move_multiple_files_success(service_under_test):
review_id = "test-review-id"
file_path_1 = f"staging/{TEST_NHS_NUMBER}/document_1.pdf"
file_path_2 = f"staging/{TEST_NHS_NUMBER}/document_2.pdf"
expected_key_1 = (
f"{review_id}/{uuid.uuid5(S3_STORAGE_NAMESPACE, f'{review_id}/{file_path_1}')}"
)
expected_key_2 = (
f"{review_id}/{uuid.uuid5(S3_STORAGE_NAMESPACE, f'{review_id}/{file_path_2}')}"
)

message = ReviewMessageBody(
upload_id="test-upload-id-999",
files=[
ReviewMessageFile(
file_name="document_1.pdf",
file_path=f"staging/{TEST_NHS_NUMBER}/document_1.pdf",
file_path=file_path_1,
),
ReviewMessageFile(
file_name="document_2.pdf",
file_path=f"staging/{TEST_NHS_NUMBER}/document_2.pdf",
file_path=file_path_2,
),
],
nhs_number=TEST_NHS_NUMBER,
failure_reason=DocumentReviewReason.UNSUCCESSFUL_UPLOAD,
uploader_ods="Y12345",
)
mocker.patch("uuid.uuid4", side_effect=["123412342", "56785678"])

files = service_under_test._move_files_to_review_bucket(message, "test-review-id")
files = service_under_test._move_files_to_review_bucket(message, review_id)

assert len(files) == 2
assert files[0].file_name == "document_1.pdf"
assert (
files[0].file_location
== f"{S3_PREFIX}{MOCK_DOCUMENT_REVIEW_BUCKET}/test-review-id/123412342"
== f"{S3_PREFIX}{MOCK_DOCUMENT_REVIEW_BUCKET}/{expected_key_1}"
)
assert files[1].file_name == "document_2.pdf"
assert (
files[1].file_location
== f"{S3_PREFIX}{MOCK_DOCUMENT_REVIEW_BUCKET}/test-review-id/56785678"
== f"{S3_PREFIX}{MOCK_DOCUMENT_REVIEW_BUCKET}/{expected_key_2}"
)

assert service_under_test.s3_service.copy_across_bucket.call_count == 2


def test_move_files_destination_key_is_deterministic_across_retries(
service_under_test,
sample_review_message,
):

review_id = "test-review-id-123"

files_first_invocation = service_under_test._move_files_to_review_bucket(
sample_review_message,
review_id,
)

# Reset the mock so the second call is tracked independently
service_under_test.s3_service.copy_across_bucket.reset_mock()

files_second_invocation = service_under_test._move_files_to_review_bucket(
sample_review_message,
review_id,
)

assert (
files_first_invocation[0].file_location
== files_second_invocation[0].file_location
)


def test_move_files_deduplicates_identical_file_paths(service_under_test):
review_id = "test-review-id-dup"
source_file_path = f"staging/{TEST_NHS_NUMBER}/document_1.pdf"
expected_key = f"{review_id}/{uuid.uuid5(S3_STORAGE_NAMESPACE, f'{review_id}/{source_file_path}')}"

message_with_duplicate_files = ReviewMessageBody(
upload_id=review_id,
files=[
ReviewMessageFile(file_name="document_1.pdf", file_path=source_file_path),
ReviewMessageFile(
file_name="document_1.pdf",
file_path=source_file_path,
), # duplicate row
],
nhs_number=TEST_NHS_NUMBER,
failure_reason=DocumentReviewReason.UNSUCCESSFUL_UPLOAD,
uploader_ods="Y12345",
)

files = service_under_test._move_files_to_review_bucket(
message_with_duplicate_files,
review_id,
)

assert len(files) == 1
assert files[0].file_name == "document_1.pdf"
assert (
files[0].file_location
== f"{S3_PREFIX}{MOCK_DOCUMENT_REVIEW_BUCKET}/{expected_key}"
)

service_under_test.s3_service.copy_across_bucket.assert_called_once_with(
source_bucket="test_staging_bulk_store",
source_file_key=source_file_path,
dest_bucket="test_document_review_bucket",
dest_file_key=expected_key,
if_none_match=True,
)


def test_move_files_copy_error(service_under_test, sample_review_message):
service_under_test.s3_service.copy_across_bucket.side_effect = ClientError(
{"Error": {"Code": "NoSuchKey", "Message": "Source not found"}},
Expand Down
Loading