Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions lambdas/rds_snapshot_export_s3_to_s3_copier/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,28 @@ def get_date_time(source_identifier: str) -> tuple[str, str, str, str]:
source_identifier (str): source identifier taken from the
event, as implemented this will include datetime for the snapshot as
applicable in the form sql-to-parquet-yy-mm-dd-hhmmss or
sql-to-parquet-yy-mm-dd-hhmmss-backdated
sql-to-parquet-yyyy-mm-dd-backdated

Returns:
tuple(str, str, str, str): year, month, day, date
"""

pattern = r"^sql-to-parquet-(\d{2})-(\d{2})-(\d{2})-(\d{6})(-backdated)?$"

if not re.match(pattern, source_identifier):
pattern_with_time = r"^sql-to-parquet-(\d{2})-(\d{2})-(\d{2})-(\d{6})$"
pattern_backdated = r"^sql-to-parquet-(\d{4})-(\d{2})-(\d{2})-backdated$"

if re.match(pattern_with_time, source_identifier):
split_identifier = source_identifier.split("-")
day = split_identifier[5]
month = split_identifier[4]
year = "20" + split_identifier[3]
elif re.match(pattern_backdated, source_identifier):
split_identifier = source_identifier.split("-")
day = split_identifier[5]
month = split_identifier[4]
year = split_identifier[3]
else:
raise ValueError("Invalid source identifier format")

split_identifier = source_identifier.split("-")
day = split_identifier[5]
month = split_identifier[4]
year = "20" + split_identifier[3]

date = f"{year}{month}{day}"
return year, month, day, date

Expand Down
76 changes: 76 additions & 0 deletions lambdas/rds_snapshot_export_s3_to_s3_copier/test_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import pytest
from main import get_date_time


class TestGetDateTime:
def test_get_date_time_yy_format_with_time(self):
source_identifier = "sql-to-parquet-23-12-25-143000"
year, month, day, date = get_date_time(source_identifier)

assert year == "2023"
assert month == "12"
assert day == "25"
assert date == "20231225"

def test_get_date_time_yyyy_format_backdated(self):
source_identifier = "sql-to-parquet-2023-12-25-backdated"
year, month, day, date = get_date_time(source_identifier)

assert year == "2023"
assert month == "12"
assert day == "25"
assert date == "20231225"

def test_get_date_time_yyyy_format_backdated_different_date(self):
source_identifier = "sql-to-parquet-2024-01-15-backdated"
year, month, day, date = get_date_time(source_identifier)

assert year == "2024"
assert month == "01"
assert day == "15"
assert date == "20240115"

def test_get_date_time_yy_format_different_time(self):
source_identifier = "sql-to-parquet-24-03-10-090000"
year, month, day, date = get_date_time(source_identifier)

assert year == "2024"
assert month == "03"
assert day == "10"
assert date == "20240310"

def test_get_date_time_invalid_format_raises_error(self):
invalid_identifiers = [
"sql-to-parquet-2023-12-25", # Missing -backdated for yyyy format
"sql-to-parquet-23-12-25", # Missing time for yy format
"sql-to-parquet-23-12-25-143000-backdated", # Invalid: yy format cannot have -backdated
"invalid-format-23-12-25-143000", # Wrong prefix
"sql-to-parquet-23-12-25-14300", # Wrong time format (5 digits)
"sql-to-parquet-23-12-25-1430000", # Wrong time format (7 digits)
"sql-to-parquet-2023-12-backdated", # Missing day
"sql-to-parquet-123-12-25-143000", # 3-digit year
]

for invalid_id in invalid_identifiers:
with pytest.raises(ValueError, match="Invalid source identifier format"):
get_date_time(invalid_id)

def test_get_date_time_edge_cases(self):
# Test with single digit month/day (should still work with zero padding)
source_identifier = "sql-to-parquet-23-01-05-000000"
year, month, day, date = get_date_time(source_identifier)

assert year == "2023"
assert month == "01"
assert day == "05"
assert date == "20230105"

def test_get_date_time_leap_year(self):
# Test leap year date
source_identifier = "sql-to-parquet-2024-02-29-backdated"
year, month, day, date = get_date_time(source_identifier)

assert year == "2024"
assert month == "02"
assert day == "29"
assert date == "20240229"