From b4c71297faaeddc35605cfa49e6e4161e7149bef Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Tue, 3 Mar 2026 23:05:14 +0000 Subject: [PATCH 01/36] feat: add deterministic seed_random management command for synthetic dev data --- .../management/commands/seed_random.py | 297 ++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100644 metrics/interfaces/management/commands/seed_random.py diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py new file mode 100644 index 000000000..cd5b2af77 --- /dev/null +++ b/metrics/interfaces/management/commands/seed_random.py @@ -0,0 +1,297 @@ +import random +import time +from collections.abc import Iterable +from datetime import date, timedelta +from decimal import Decimal + +from django.core.management import CommandParser, call_command +from django.core.management.base import BaseCommand +from django.db import transaction + +from metrics.data.enums import TimePeriod +from metrics.data.models.api_models import APITimeSeries +from metrics.data.models.core_models.supporting import ( + Age, + Geography, + GeographyType, + Metric, + Stratum, + SubTheme, + Theme, + Topic, +) +from metrics.data.models.core_models.timeseries import CoreTimeSeries + +SCALE_CONFIGS = { + "small": {"geographies": 5, "metrics": 10, "days": 30}, + "medium": {"geographies": 20, "metrics": 50, "days": 180}, + "large": {"geographies": 100, "metrics": 200, "days": 365}, +} + + +class Command(BaseCommand): + def add_arguments(self, parser: CommandParser) -> None: + parser.add_argument( + "--dataset", + choices=["cms", "metrics", "both"], + default="both", + ) + parser.add_argument( + "--scale", + choices=["small", "medium", "large"], + default="small", + ) + parser.add_argument( + "--seed", + type=int, + required=False, + default=None, + ) + parser.add_argument( + "--truncate-first", + action="store_true", + default=False, + ) + + def handle(self, *args, **options) -> None: + started_at = time.perf_counter() + dataset: str = options["dataset"] + scale: str = options["scale"] + truncate_first: bool = options["truncate_first"] + + selected_seed = options["seed"] if options["seed"] is not None else int(time.time()) + random.seed(selected_seed) + self.stdout.write(f"Seed used: {selected_seed}") + + should_seed_cms = dataset in ("cms", "both") + should_seed_metrics = dataset in ("metrics", "both") + + counts: dict[str, int] = { + "Theme": 0, + "SubTheme": 0, + "Topic": 0, + "Metric": 0, + "Geography": 0, + "CoreTimeSeries": 0, + "APITimeSeries": 0, + } + + if should_seed_metrics: + scale_config = SCALE_CONFIGS[scale] + counts = self._seed_metrics_data( + scale_config=scale_config, + truncate_first=truncate_first, + ) + + if should_seed_cms: + call_command("build_cms_site") + + runtime_seconds = time.perf_counter() - started_at + self._print_summary( + dataset=dataset, + scale=scale, + seed=selected_seed, + counts=counts, + runtime_seconds=runtime_seconds, + ) + + @classmethod + def _seed_metrics_data(cls, *, scale_config: dict[str, int], truncate_first: bool) -> dict[str, int]: + if truncate_first: + cls._truncate_metrics_data() + + with transaction.atomic(): + themes = cls._bulk_create( + Theme, + [Theme(name=f"Theme {index + 1}") for index in range(3)], + ) + + sub_themes = cls._bulk_create( + SubTheme, + [ + SubTheme(name=f"SubTheme {index + 1}", theme=themes[index % len(themes)]) + for index in range(6) + ], + ) + + topics = cls._bulk_create( + Topic, + [ + Topic(name=f"Topic {index + 1}", sub_theme=sub_themes[index % len(sub_themes)]) + for index in range(12) + ], + ) + + metrics = cls._bulk_create( + Metric, + [ + Metric(name=f"Random Metric {index + 1}", topic=topics[index % len(topics)]) + for index in range(scale_config["metrics"]) + ], + ) + + geography_type = GeographyType.objects.create(name="Nation") + + geographies = cls._bulk_create( + Geography, + [ + Geography( + name=f"Area {index + 1}", + geography_code=f"RND{index + 1:04d}", + geography_type=geography_type, + ) + for index in range(scale_config["geographies"]) + ], + ) + + stratum = Stratum.objects.create(name="All") + age = Age.objects.create(name="All ages") + + core_count, api_count = cls._seed_time_series_rows( + metrics=metrics, + geographies=geographies, + stratum=stratum, + age=age, + days=scale_config["days"], + ) + + return { + "Theme": len(themes), + "SubTheme": len(sub_themes), + "Topic": len(topics), + "Metric": len(metrics), + "Geography": len(geographies), + "CoreTimeSeries": core_count, + "APITimeSeries": api_count, + } + + @classmethod + def _truncate_metrics_data(cls) -> None: + APITimeSeries.objects.all().delete() + CoreTimeSeries.objects.all().delete() + Metric.objects.all().delete() + Topic.objects.all().delete() + SubTheme.objects.all().delete() + Theme.objects.all().delete() + Geography.objects.all().delete() + GeographyType.objects.all().delete() + Age.objects.all().delete() + Stratum.objects.all().delete() + + @classmethod + def _seed_time_series_rows( + cls, + *, + metrics: list[Metric], + geographies: list[Geography], + stratum: Stratum, + age: Age, + days: int, + ) -> tuple[int, int]: + frequency = TimePeriod.Weekly.value + today = date.today() + start_date = today - timedelta(days=days - 1) + batch_size = 5000 + core_rows: list[CoreTimeSeries] = [] + api_rows: list[APITimeSeries] = [] + core_count = 0 + api_count = 0 + + for metric in metrics: + topic = metric.topic + sub_theme = topic.sub_theme + theme = sub_theme.theme + + for geography in geographies: + for day_offset in range(days): + current_date = start_date + timedelta(days=day_offset) + base_value = random.uniform(5.0, 250.0) + metric_value = round(base_value + random.uniform(-10.0, 10.0), 2) + epidemiological_week = current_date.isocalendar().week + + core_rows.append( + CoreTimeSeries( + metric=metric, + metric_frequency=frequency, + geography=geography, + stratum=stratum, + age=age, + sex=None, + year=current_date.year, + month=current_date.month, + epiweek=epidemiological_week, + date=current_date, + metric_value=Decimal(str(metric_value)), + is_public=True, + ) + ) + + if len(core_rows) >= batch_size: + CoreTimeSeries.objects.bulk_create(core_rows, batch_size=batch_size) + core_count += len(core_rows) + core_rows = [] + + api_rows.append( + APITimeSeries( + metric_frequency=frequency, + age=age.name, + month=current_date.month, + geography_code=geography.geography_code, + metric_group=None, + theme=theme.name, + sub_theme=sub_theme.name, + topic=topic.name, + geography_type=geography.geography_type.name, + geography=geography.name, + metric=metric.name, + stratum=stratum.name, + sex=None, + year=current_date.year, + epiweek=epidemiological_week, + date=current_date, + metric_value=float(metric_value), + is_public=True, + ) + ) + + if len(api_rows) >= batch_size: + APITimeSeries.objects.bulk_create(api_rows, batch_size=batch_size) + api_count += len(api_rows) + api_rows = [] + + if core_rows: + CoreTimeSeries.objects.bulk_create(core_rows, batch_size=batch_size) + core_count += len(core_rows) + + if api_rows: + APITimeSeries.objects.bulk_create(api_rows, batch_size=batch_size) + api_count += len(api_rows) + + return core_count, api_count + + @staticmethod + def _bulk_create(model, records: Iterable): + return model.objects.bulk_create(list(records)) + + def _print_summary( + self, + *, + dataset: str, + scale: str, + seed: int, + counts: dict[str, int], + runtime_seconds: float, + ) -> None: + self.stdout.write("") + self.stdout.write("Seed random summary:") + self.stdout.write(f" dataset: {dataset}") + self.stdout.write(f" scale: {scale}") + self.stdout.write(f" seed used: {seed}") + self.stdout.write(f" Theme: {counts['Theme']}") + self.stdout.write(f" SubTheme: {counts['SubTheme']}") + self.stdout.write(f" Topic: {counts['Topic']}") + self.stdout.write(f" Metric: {counts['Metric']}") + self.stdout.write(f" Geography: {counts['Geography']}") + self.stdout.write(f" CoreTimeSeries: {counts['CoreTimeSeries']}") + self.stdout.write(f" APITimeSeries: {counts['APITimeSeries']}") + self.stdout.write(f" runtime seconds: {runtime_seconds:.2f}") From 8f2ea58b19542a5c3cec4d81533845ac1c194a17 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Wed, 4 Mar 2026 00:40:08 +0000 Subject: [PATCH 02/36] lint fixes --- metrics/interfaces/management/commands/seed_random.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index cd5b2af77..187190416 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -3,6 +3,7 @@ from collections.abc import Iterable from datetime import date, timedelta from decimal import Decimal +from typing import override from django.core.management import CommandParser, call_command from django.core.management.base import BaseCommand @@ -30,6 +31,7 @@ class Command(BaseCommand): + @override def add_arguments(self, parser: CommandParser) -> None: parser.add_argument( "--dataset", @@ -63,8 +65,8 @@ def handle(self, *args, **options) -> None: random.seed(selected_seed) self.stdout.write(f"Seed used: {selected_seed}") - should_seed_cms = dataset in ("cms", "both") - should_seed_metrics = dataset in ("metrics", "both") + should_seed_cms = dataset in {"cms", "both"} + should_seed_metrics = dataset in {"metrics", "both"} counts: dict[str, int] = { "Theme": 0, @@ -205,8 +207,8 @@ def _seed_time_series_rows( for geography in geographies: for day_offset in range(days): current_date = start_date + timedelta(days=day_offset) - base_value = random.uniform(5.0, 250.0) - metric_value = round(base_value + random.uniform(-10.0, 10.0), 2) + base_value = random.uniform(5.0, 250.0) # noqa: S311 + metric_value = round(base_value + random.uniform(-10.0, 10.0), 2) # noqa: S311 epidemiological_week = current_date.isocalendar().week core_rows.append( From 7e402078b91243f27cfa51a067a56966a1d821c7 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Wed, 4 Mar 2026 00:48:08 +0000 Subject: [PATCH 03/36] Patched Bandit suppression to use Bandit-compatible syntax (# nosec B311) instead of # noqa: S311 --- .../management/commands/seed_random.py | 38 ++++++++++++++----- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 187190416..75f4ff4b3 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -61,8 +61,10 @@ def handle(self, *args, **options) -> None: scale: str = options["scale"] truncate_first: bool = options["truncate_first"] - selected_seed = options["seed"] if options["seed"] is not None else int(time.time()) - random.seed(selected_seed) + selected_seed = ( + options["seed"] if options["seed"] is not None else int(time.time()) + ) + random.seed(selected_seed) # nosec B311 self.stdout.write(f"Seed used: {selected_seed}") should_seed_cms = dataset in {"cms", "both"} @@ -98,7 +100,9 @@ def handle(self, *args, **options) -> None: ) @classmethod - def _seed_metrics_data(cls, *, scale_config: dict[str, int], truncate_first: bool) -> dict[str, int]: + def _seed_metrics_data( + cls, *, scale_config: dict[str, int], truncate_first: bool + ) -> dict[str, int]: if truncate_first: cls._truncate_metrics_data() @@ -111,7 +115,9 @@ def _seed_metrics_data(cls, *, scale_config: dict[str, int], truncate_first: boo sub_themes = cls._bulk_create( SubTheme, [ - SubTheme(name=f"SubTheme {index + 1}", theme=themes[index % len(themes)]) + SubTheme( + name=f"SubTheme {index + 1}", theme=themes[index % len(themes)] + ) for index in range(6) ], ) @@ -119,7 +125,10 @@ def _seed_metrics_data(cls, *, scale_config: dict[str, int], truncate_first: boo topics = cls._bulk_create( Topic, [ - Topic(name=f"Topic {index + 1}", sub_theme=sub_themes[index % len(sub_themes)]) + Topic( + name=f"Topic {index + 1}", + sub_theme=sub_themes[index % len(sub_themes)], + ) for index in range(12) ], ) @@ -127,7 +136,10 @@ def _seed_metrics_data(cls, *, scale_config: dict[str, int], truncate_first: boo metrics = cls._bulk_create( Metric, [ - Metric(name=f"Random Metric {index + 1}", topic=topics[index % len(topics)]) + Metric( + name=f"Random Metric {index + 1}", + topic=topics[index % len(topics)], + ) for index in range(scale_config["metrics"]) ], ) @@ -207,8 +219,10 @@ def _seed_time_series_rows( for geography in geographies: for day_offset in range(days): current_date = start_date + timedelta(days=day_offset) - base_value = random.uniform(5.0, 250.0) # noqa: S311 - metric_value = round(base_value + random.uniform(-10.0, 10.0), 2) # noqa: S311 + base_value = random.uniform(5.0, 250.0) # nosec B311 + metric_value = round( + base_value + random.uniform(-10.0, 10.0), 2 + ) # nosec B311 epidemiological_week = current_date.isocalendar().week core_rows.append( @@ -229,7 +243,9 @@ def _seed_time_series_rows( ) if len(core_rows) >= batch_size: - CoreTimeSeries.objects.bulk_create(core_rows, batch_size=batch_size) + CoreTimeSeries.objects.bulk_create( + core_rows, batch_size=batch_size + ) core_count += len(core_rows) core_rows = [] @@ -257,7 +273,9 @@ def _seed_time_series_rows( ) if len(api_rows) >= batch_size: - APITimeSeries.objects.bulk_create(api_rows, batch_size=batch_size) + APITimeSeries.objects.bulk_create( + api_rows, batch_size=batch_size + ) api_count += len(api_rows) api_rows = [] From 5531f69a9b36a5a4a229898314293c85c45b8061 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Wed, 4 Mar 2026 00:52:13 +0000 Subject: [PATCH 04/36] Updated the two flagged random.uniform(...) lines to suppress both Ruff and Bandit at the exact call sites. --- metrics/interfaces/management/commands/seed_random.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 75f4ff4b3..471294730 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -219,10 +219,12 @@ def _seed_time_series_rows( for geography in geographies: for day_offset in range(days): current_date = start_date + timedelta(days=day_offset) - base_value = random.uniform(5.0, 250.0) # nosec B311 + base_value = random.uniform(5.0, 250.0) # noqa: S311 # nosec B311 metric_value = round( - base_value + random.uniform(-10.0, 10.0), 2 - ) # nosec B311 + base_value + + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 + 2, + ) epidemiological_week = current_date.isocalendar().week core_rows.append( From 879da808d061e604db0787f37adbfa4bd9729001 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Wed, 4 Mar 2026 01:04:25 +0000 Subject: [PATCH 05/36] Added full unit coverage for seed_random --- .../interfaces/management/test_seed_random.py | 347 ++++++++++++++++++ 1 file changed, 347 insertions(+) create mode 100644 tests/unit/metrics/interfaces/management/test_seed_random.py diff --git a/tests/unit/metrics/interfaces/management/test_seed_random.py b/tests/unit/metrics/interfaces/management/test_seed_random.py new file mode 100644 index 000000000..1dafe35d3 --- /dev/null +++ b/tests/unit/metrics/interfaces/management/test_seed_random.py @@ -0,0 +1,347 @@ +from collections.abc import Iterator +from contextlib import ExitStack, nullcontext +from types import SimpleNamespace +from unittest import mock + +import pytest +from django.core.management import CommandParser + +from metrics.interfaces.management.commands.seed_random import Command, SCALE_CONFIGS + +MODULE_PATH = "metrics.interfaces.management.commands.seed_random" + + +def _fake_metric_hierarchy() -> SimpleNamespace: + theme = SimpleNamespace(name="Theme 1") + sub_theme = SimpleNamespace(name="SubTheme 1", theme=theme) + topic = SimpleNamespace(name="Topic 1", sub_theme=sub_theme) + return SimpleNamespace(name="Metric 1", topic=topic) + + +def _fake_geography() -> SimpleNamespace: + geography_type = SimpleNamespace(name="Nation") + return SimpleNamespace( + name="Area 1", + geography_code="RND0001", + geography_type=geography_type, + ) + + +class TestSeedRandomCommand: + def test_add_arguments_parses_defaults(self): + parser = CommandParser(prog="manage.py seed_random") + + Command().add_arguments(parser) + options = parser.parse_args([]) + + assert options.dataset == "both" + assert options.scale == "small" + assert options.seed is None + assert options.truncate_first is False + + @mock.patch(f"{MODULE_PATH}.random.seed") + @mock.patch(f"{MODULE_PATH}.call_command") + @mock.patch.object(Command, "_seed_metrics_data") + @mock.patch.object(Command, "_print_summary") + @mock.patch(f"{MODULE_PATH}.time.perf_counter") + def test_handle_metrics_dataset( + self, + spy_perf_counter: mock.MagicMock, + spy_print_summary: mock.MagicMock, + spy_seed_metrics_data: mock.MagicMock, + spy_call_command: mock.MagicMock, + spy_random_seed: mock.MagicMock, + ): + spy_perf_counter.side_effect = [10.0, 14.5] + spy_seed_metrics_data.return_value = { + "Theme": 3, + "SubTheme": 6, + "Topic": 12, + "Metric": 10, + "Geography": 5, + "CoreTimeSeries": 1, + "APITimeSeries": 1, + } + + Command().handle(dataset="metrics", scale="small", truncate_first=True, seed=42) + + spy_random_seed.assert_called_once_with(42) + spy_seed_metrics_data.assert_called_once_with( + scale_config=SCALE_CONFIGS["small"], + truncate_first=True, + ) + spy_call_command.assert_not_called() + spy_print_summary.assert_called_once_with( + dataset="metrics", + scale="small", + seed=42, + counts=spy_seed_metrics_data.return_value, + runtime_seconds=4.5, + ) + + @mock.patch(f"{MODULE_PATH}.random.seed") + @mock.patch(f"{MODULE_PATH}.call_command") + @mock.patch.object(Command, "_seed_metrics_data") + @mock.patch.object(Command, "_print_summary") + @mock.patch(f"{MODULE_PATH}.time.time") + @mock.patch(f"{MODULE_PATH}.time.perf_counter") + def test_handle_cms_dataset_uses_time_seed_and_builds_cms( + self, + spy_perf_counter: mock.MagicMock, + spy_time: mock.MagicMock, + spy_print_summary: mock.MagicMock, + spy_seed_metrics_data: mock.MagicMock, + spy_call_command: mock.MagicMock, + spy_random_seed: mock.MagicMock, + ): + spy_perf_counter.side_effect = [20.0, 22.0] + spy_time.return_value = 1234 + + Command().handle(dataset="cms", scale="large", truncate_first=False, seed=None) + + spy_random_seed.assert_called_once_with(1234) + spy_seed_metrics_data.assert_not_called() + spy_call_command.assert_called_once_with("build_cms_site") + spy_print_summary.assert_called_once_with( + dataset="cms", + scale="large", + seed=1234, + counts={ + "Theme": 0, + "SubTheme": 0, + "Topic": 0, + "Metric": 0, + "Geography": 0, + "CoreTimeSeries": 0, + "APITimeSeries": 0, + }, + runtime_seconds=2.0, + ) + + @mock.patch.object(Command, "_truncate_metrics_data") + @mock.patch.object(Command, "_seed_time_series_rows") + @mock.patch.object(Command, "_bulk_create") + @mock.patch(f"{MODULE_PATH}.Geography") + @mock.patch(f"{MODULE_PATH}.Metric") + @mock.patch(f"{MODULE_PATH}.Topic") + @mock.patch(f"{MODULE_PATH}.SubTheme") + @mock.patch(f"{MODULE_PATH}.Theme") + @mock.patch(f"{MODULE_PATH}.transaction.atomic") + @mock.patch(f"{MODULE_PATH}.GeographyType.objects.create") + @mock.patch(f"{MODULE_PATH}.Stratum.objects.create") + @mock.patch(f"{MODULE_PATH}.Age.objects.create") + def test_seed_metrics_data_builds_expected_counts_and_calls( + self, + spy_age_create: mock.MagicMock, + spy_stratum_create: mock.MagicMock, + spy_geography_type_create: mock.MagicMock, + spy_atomic: mock.MagicMock, + spy_theme: mock.MagicMock, + spy_sub_theme: mock.MagicMock, + spy_topic: mock.MagicMock, + spy_metric: mock.MagicMock, + spy_geography: mock.MagicMock, + spy_bulk_create: mock.MagicMock, + spy_seed_time_series_rows: mock.MagicMock, + spy_truncate: mock.MagicMock, + ): + spy_atomic.return_value = nullcontext() + spy_theme.side_effect = lambda **kwargs: SimpleNamespace(**kwargs) + spy_sub_theme.side_effect = lambda **kwargs: SimpleNamespace(**kwargs) + spy_topic.side_effect = lambda **kwargs: SimpleNamespace(**kwargs) + spy_metric.side_effect = lambda **kwargs: SimpleNamespace(**kwargs) + spy_geography.side_effect = lambda **kwargs: SimpleNamespace(**kwargs) + spy_geography_type_create.return_value = SimpleNamespace(name="Nation") + spy_stratum_create.return_value = SimpleNamespace(name="All") + spy_age_create.return_value = SimpleNamespace(name="All ages") + spy_seed_time_series_rows.return_value = (77, 88) + + themes = [SimpleNamespace(name=f"Theme {index + 1}") for index in range(3)] + sub_themes = [ + SimpleNamespace( + name=f"SubTheme {index + 1}", theme=themes[index % len(themes)] + ) + for index in range(6) + ] + topics = [ + SimpleNamespace( + name=f"Topic {index + 1}", + sub_theme=sub_themes[index % len(sub_themes)], + ) + for index in range(12) + ] + metrics = [ + SimpleNamespace( + name=f"Metric {index + 1}", topic=topics[index % len(topics)] + ) + for index in range(4) + ] + geographies = [ + SimpleNamespace( + name=f"Area {index + 1}", + geography_code=f"RND{index + 1:04d}", + geography_type=spy_geography_type_create.return_value, + ) + for index in range(2) + ] + spy_bulk_create.side_effect = [themes, sub_themes, topics, metrics, geographies] + + result = Command._seed_metrics_data( + scale_config={"geographies": 2, "metrics": 4, "days": 9}, + truncate_first=True, + ) + + assert result == { + "Theme": 3, + "SubTheme": 6, + "Topic": 12, + "Metric": 4, + "Geography": 2, + "CoreTimeSeries": 77, + "APITimeSeries": 88, + } + spy_truncate.assert_called_once_with() + spy_seed_time_series_rows.assert_called_once_with( + metrics=metrics, + geographies=geographies, + stratum=spy_stratum_create.return_value, + age=spy_age_create.return_value, + days=9, + ) + + def test_truncate_metrics_data_deletes_from_all_models(self): + model_names = [ + "APITimeSeries", + "CoreTimeSeries", + "Metric", + "Topic", + "SubTheme", + "Theme", + "Geography", + "GeographyType", + "Age", + "Stratum", + ] + + managers: dict[str, mock.MagicMock] = {} + with ExitStack() as stack: + for model_name in model_names: + manager = mock.MagicMock() + managers[model_name] = manager + stack.enter_context( + mock.patch(f"{MODULE_PATH}.{model_name}.objects", manager) + ) + + Command._truncate_metrics_data() + + for model_name in model_names: + managers[model_name].all.assert_called_once_with() + managers[model_name].all.return_value.delete.assert_called_once_with() + + @mock.patch(f"{MODULE_PATH}.APITimeSeries") + @mock.patch(f"{MODULE_PATH}.CoreTimeSeries") + def test_seed_time_series_rows_flushes_remainder( + self, + spy_core_time_series: mock.MagicMock, + spy_api_time_series: mock.MagicMock, + ): + spy_core_time_series.side_effect = lambda **kwargs: kwargs + spy_api_time_series.side_effect = lambda **kwargs: kwargs + + core_count, api_count = Command._seed_time_series_rows( + metrics=[_fake_metric_hierarchy()], + geographies=[_fake_geography()], + stratum=SimpleNamespace(name="All"), + age=SimpleNamespace(name="All ages"), + days=1, + ) + + assert core_count == 1 + assert api_count == 1 + spy_core_time_series.objects.bulk_create.assert_called_once() + spy_api_time_series.objects.bulk_create.assert_called_once() + + @mock.patch(f"{MODULE_PATH}.APITimeSeries") + @mock.patch(f"{MODULE_PATH}.CoreTimeSeries") + def test_seed_time_series_rows_flushes_at_batch_size( + self, + spy_core_time_series: mock.MagicMock, + spy_api_time_series: mock.MagicMock, + ): + spy_core_time_series.side_effect = lambda **kwargs: kwargs + spy_api_time_series.side_effect = lambda **kwargs: kwargs + + core_count, api_count = Command._seed_time_series_rows( + metrics=[_fake_metric_hierarchy()], + geographies=[_fake_geography()], + stratum=SimpleNamespace(name="All"), + age=SimpleNamespace(name="All ages"), + days=5000, + ) + + assert core_count == 5000 + assert api_count == 5000 + spy_core_time_series.objects.bulk_create.assert_called_once() + spy_api_time_series.objects.bulk_create.assert_called_once() + + def test_bulk_create_materialises_iterable_and_delegates(self): + class FakeModel: + objects = mock.MagicMock() + + def records_generator() -> Iterator[int]: + yield 1 + yield 2 + + FakeModel.objects.bulk_create.return_value = ["created-records"] + + result = Command._bulk_create(FakeModel, records_generator()) + + assert result == ["created-records"] + FakeModel.objects.bulk_create.assert_called_once_with([1, 2]) + + def test_print_summary_writes_expected_output(self): + command = Command() + command.stdout = mock.MagicMock() + + command._print_summary( + dataset="both", + scale="small", + seed=123, + counts={ + "Theme": 3, + "SubTheme": 6, + "Topic": 12, + "Metric": 10, + "Geography": 5, + "CoreTimeSeries": 1500, + "APITimeSeries": 1500, + }, + runtime_seconds=3.456, + ) + + expected_lines = [ + "", + "Seed random summary:", + " dataset: both", + " scale: small", + " seed used: 123", + " Theme: 3", + " SubTheme: 6", + " Topic: 12", + " Metric: 10", + " Geography: 5", + " CoreTimeSeries: 1500", + " APITimeSeries: 1500", + " runtime seconds: 3.46", + ] + actual_lines = [call.args[0] for call in command.stdout.write.call_args_list] + + assert actual_lines == expected_lines + + +def test_add_arguments_rejects_invalid_dataset_value(): + parser = CommandParser(prog="manage.py seed_random") + Command().add_arguments(parser) + + with pytest.raises(SystemExit): + parser.parse_args(["--dataset", "invalid"]) From 3147af487d912ad121c867e1cf3a6a07668a98c8 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Wed, 4 Mar 2026 01:08:44 +0000 Subject: [PATCH 06/36] =?UTF-8?q?Fixed=20the=20failing=20test=20by=20match?= =?UTF-8?q?ing=20Django=E2=80=99s=20actual=20exception=20type.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/metrics/interfaces/management/test_seed_random.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/metrics/interfaces/management/test_seed_random.py b/tests/unit/metrics/interfaces/management/test_seed_random.py index 1dafe35d3..f09077e2d 100644 --- a/tests/unit/metrics/interfaces/management/test_seed_random.py +++ b/tests/unit/metrics/interfaces/management/test_seed_random.py @@ -4,6 +4,7 @@ from unittest import mock import pytest +from django.core.management.base import CommandError from django.core.management import CommandParser from metrics.interfaces.management.commands.seed_random import Command, SCALE_CONFIGS @@ -343,5 +344,5 @@ def test_add_arguments_rejects_invalid_dataset_value(): parser = CommandParser(prog="manage.py seed_random") Command().add_arguments(parser) - with pytest.raises(SystemExit): + with pytest.raises(CommandError): parser.parse_args(["--dataset", "invalid"]) From cfb5b0509006bc98fb6355bbe202aaaa284f84aa Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Mon, 9 Mar 2026 23:55:18 +0000 Subject: [PATCH 07/36] addressing PR review except arch change --- .../management/commands/seed_random.py | 195 ++++++++++++++++-- 1 file changed, 177 insertions(+), 18 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 471294730..6405eef88 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -1,6 +1,6 @@ import random import time -from collections.abc import Iterable +from collections.abc import Callable, Iterable from datetime import date, timedelta from decimal import Decimal from typing import override @@ -22,6 +22,11 @@ Topic, ) from metrics.data.models.core_models.timeseries import CoreTimeSeries +from validation import enums as validation_enums +from validation.geography_code import ( + NATION_GEOGRAPHY_CODES, + UNITED_KINGDOM_GEOGRAPHY_CODE, +) SCALE_CONFIGS = { "small": {"geographies": 5, "metrics": 10, "days": 30}, @@ -37,22 +42,26 @@ def add_arguments(self, parser: CommandParser) -> None: "--dataset", choices=["cms", "metrics", "both"], default="both", + help="Which dataset to seed: CMS, metrics, or both.", ) parser.add_argument( "--scale", choices=["small", "medium", "large"], default="small", + help="Size of the random metrics dataset to generate.", ) parser.add_argument( "--seed", type=int, required=False, default=None, + help="Optional random seed for reproducible metric values.", ) parser.add_argument( "--truncate-first", action="store_true", default=False, + help="Clear existing metrics tables before seeding to avoid duplicates.", ) def handle(self, *args, **options) -> None: @@ -82,13 +91,18 @@ def handle(self, *args, **options) -> None: if should_seed_metrics: scale_config = SCALE_CONFIGS[scale] + self.stderr.write("Seeding metrics dataset...") counts = self._seed_metrics_data( scale_config=scale_config, truncate_first=truncate_first, + progress_callback=self.stderr.write, ) + self.stderr.write("Metrics dataset seeding complete.") if should_seed_cms: + self.stderr.write("Building CMS site data...") call_command("build_cms_site") + self.stderr.write("CMS site build complete.") runtime_seconds = time.perf_counter() - started_at self._print_summary( @@ -101,35 +115,51 @@ def handle(self, *args, **options) -> None: @classmethod def _seed_metrics_data( - cls, *, scale_config: dict[str, int], truncate_first: bool + cls, + *, + scale_config: dict[str, int], + truncate_first: bool, + progress_callback: Callable[[str], None] | None = None, ) -> dict[str, int]: - if truncate_first: - cls._truncate_metrics_data() + """Seed supporting metric models and time series rows for the selected scale.""" + if progress_callback is not None: + progress_callback("Preparing metric taxonomy and geography records...") with transaction.atomic(): + if truncate_first: + cls._truncate_metrics_data() + + ( + theme_names, + sub_theme_rows, + topic_rows, + ) = cls._build_theme_hierarchy_records() themes = cls._bulk_create( Theme, - [Theme(name=f"Theme {index + 1}") for index in range(3)], + [Theme(name=name) for name in theme_names], ) + themes_by_name = {theme.name: theme for theme in themes} sub_themes = cls._bulk_create( SubTheme, [ - SubTheme( - name=f"SubTheme {index + 1}", theme=themes[index % len(themes)] - ) - for index in range(6) + SubTheme(name=name, theme=themes_by_name[theme_name]) + for name, theme_name in sub_theme_rows ], ) + sub_themes_by_key = { + (sub_theme.name, sub_theme.theme.name): sub_theme + for sub_theme in sub_themes + } topics = cls._bulk_create( Topic, [ Topic( - name=f"Topic {index + 1}", - sub_theme=sub_themes[index % len(sub_themes)], + name=topic_name, + sub_theme=sub_themes_by_key[(sub_theme_name, theme_name)], ) - for index in range(12) + for topic_name, sub_theme_name, theme_name in topic_rows ], ) @@ -144,29 +174,47 @@ def _seed_metrics_data( ], ) - geography_type = GeographyType.objects.create(name="Nation") + geography_seed_values = cls._build_geography_seed_values( + count=scale_config["geographies"] + ) + geography_type_names = { + record["geography_type"] for record in geography_seed_values + } + geography_types = cls._bulk_create( + GeographyType, + [GeographyType(name=name) for name in sorted(geography_type_names)], + ) + geography_types_by_name = { + geography_type.name: geography_type + for geography_type in geography_types + } geographies = cls._bulk_create( Geography, [ Geography( - name=f"Area {index + 1}", - geography_code=f"RND{index + 1:04d}", - geography_type=geography_type, + name=record["name"], + geography_code=record["geography_code"], + geography_type=geography_types_by_name[ + record["geography_type"] + ], ) - for index in range(scale_config["geographies"]) + for record in geography_seed_values ], ) stratum = Stratum.objects.create(name="All") age = Age.objects.create(name="All ages") + if progress_callback is not None: + progress_callback("Generating Core/API time series rows...") core_count, api_count = cls._seed_time_series_rows( metrics=metrics, geographies=geographies, stratum=stratum, age=age, days=scale_config["days"], + progress_callback=progress_callback, ) return { @@ -181,6 +229,7 @@ def _seed_metrics_data( @classmethod def _truncate_metrics_data(cls) -> None: + """Delete all seeded metrics-related rows in dependency-safe order.""" APITimeSeries.objects.all().delete() CoreTimeSeries.objects.all().delete() Metric.objects.all().delete() @@ -201,6 +250,7 @@ def _seed_time_series_rows( stratum: Stratum, age: Age, days: int, + progress_callback: Callable[[str], None] | None = None, ) -> tuple[int, int]: frequency = TimePeriod.Weekly.value today = date.today() @@ -210,8 +260,11 @@ def _seed_time_series_rows( api_rows: list[APITimeSeries] = [] core_count = 0 api_count = 0 + total_metrics = len(metrics) + total_row_count = total_metrics * len(geographies) * days + log_interval = max(1, total_metrics // 10) if total_metrics else 1 - for metric in metrics: + for metric_index, metric in enumerate(metrics, start=1): topic = metric.topic sub_theme = topic.sub_theme theme = sub_theme.theme @@ -281,6 +334,19 @@ def _seed_time_series_rows( api_count += len(api_rows) api_rows = [] + if ( + progress_callback is not None + and ( + metric_index == total_metrics + or metric_index % log_interval == 0 + ) + ): + processed_row_count = metric_index * len(geographies) * days + progress_callback( + f"Processed {metric_index}/{total_metrics} metrics " + f"({processed_row_count:,}/{total_row_count:,} row groups)." + ) + if core_rows: CoreTimeSeries.objects.bulk_create(core_rows, batch_size=batch_size) core_count += len(core_rows) @@ -289,12 +355,105 @@ def _seed_time_series_rows( APITimeSeries.objects.bulk_create(api_rows, batch_size=batch_size) api_count += len(api_rows) + if progress_callback is not None: + progress_callback( + "Inserted " + f"{core_count:,} CoreTimeSeries rows and " + f"{api_count:,} APITimeSeries rows." + ) + return core_count, api_count @staticmethod def _bulk_create(model, records: Iterable): + """Materialise and bulk insert a sequence of model instances.""" return model.objects.bulk_create(list(records)) + @classmethod + def _build_theme_hierarchy_records( + cls, + ) -> tuple[list[str], list[tuple[str, str]], list[tuple[str, str, str]]]: + child_to_parent: dict[str, str] = {} + normalised_to_child: dict[str, str] = {} + parent_by_name = validation_enums.ParentTheme.__members__ + + for child_theme_group in validation_enums.ChildTheme: + resolved_parent = ( + parent_by_name[child_theme_group.name].value + if child_theme_group.name in parent_by_name + else validation_enums.ParentTheme.INFECTIOUS_DISEASE.value + ) + for sub_theme_name in child_theme_group.return_list(): + child_to_parent[sub_theme_name] = resolved_parent + normalised_to_child[cls._normalise_key(sub_theme_name)] = ( + sub_theme_name + ) + + topic_rows: list[tuple[str, str, str]] = [] + sub_theme_pairs: set[tuple[str, str]] = set() + for topic_group in validation_enums.Topic: + normalised_topic_group = cls._normalise_key(topic_group.name) + sub_theme_name = normalised_to_child.get(normalised_topic_group) + if sub_theme_name is None: + continue + + parent_theme_name = child_to_parent[sub_theme_name] + sub_theme_pairs.add((sub_theme_name, parent_theme_name)) + for topic_value in topic_group.return_list(): + topic_rows.append((topic_value, sub_theme_name, parent_theme_name)) + + theme_names = sorted({parent_name for _, parent_name in sub_theme_pairs}) + sub_theme_rows = sorted( + sub_theme_pairs, + key=lambda value: (value[1], value[0]), + ) + return theme_names, sub_theme_rows, topic_rows + + @classmethod + def _build_geography_seed_values(cls, *, count: int) -> list[dict[str, str]]: + geographies: list[dict[str, str]] = [ + { + "name": "United Kingdom", + "geography_code": UNITED_KINGDOM_GEOGRAPHY_CODE, + "geography_type": ( + validation_enums.GeographyType.UNITED_KINGDOM.value + ), + } + ] + + geographies.extend( + { + "name": name, + "geography_code": code, + "geography_type": validation_enums.GeographyType.NATION.value, + } + for name, code in NATION_GEOGRAPHY_CODES.items() + ) + + if len(geographies) >= count: + return geographies[:count] + + extra_required = count - len(geographies) + geographies.extend( + { + "name": cls._format_enum_name(ltla.name), + "geography_code": ltla.value, + "geography_type": ( + validation_enums.GeographyType.LOWER_TIER_LOCAL_AUTHORITY.value + ), + } + for ltla in list(validation_enums.LTLAs)[:extra_required] + ) + return geographies[:count] + + @staticmethod + def _normalise_key(value: str) -> str: + return value.lower().replace("-", "_") + + @staticmethod + def _format_enum_name(value: str) -> str: + return value.replace("_", " ").title() + def _print_summary( self, *, From 84b276c13c7691f3298a13b38c2815798bd57865 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Tue, 10 Mar 2026 00:14:15 +0000 Subject: [PATCH 08/36] ruff and pytest fixes --- .../management/commands/seed_random.py | 13 +- .../interfaces/management/test_seed_random.py | 151 ++++++++++++++---- 2 files changed, 128 insertions(+), 36 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 6405eef88..b0f9617fa 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -3,6 +3,7 @@ from collections.abc import Callable, Iterable from datetime import date, timedelta from decimal import Decimal +from operator import itemgetter from typing import override from django.core.management import CommandParser, call_command @@ -120,7 +121,7 @@ def _seed_metrics_data( scale_config: dict[str, int], truncate_first: bool, progress_callback: Callable[[str], None] | None = None, - ) -> dict[str, int]: + ) -> dict[str, int]: # noqa: PLR0914 """Seed supporting metric models and time series rows for the selected scale.""" if progress_callback is not None: progress_callback("Preparing metric taxonomy and geography records...") @@ -251,7 +252,7 @@ def _seed_time_series_rows( age: Age, days: int, progress_callback: Callable[[str], None] | None = None, - ) -> tuple[int, int]: + ) -> tuple[int, int]: # noqa: PLR0914 frequency = TimePeriod.Weekly.value today = date.today() start_date = today - timedelta(days=days - 1) @@ -399,13 +400,15 @@ def _build_theme_hierarchy_records( parent_theme_name = child_to_parent[sub_theme_name] sub_theme_pairs.add((sub_theme_name, parent_theme_name)) - for topic_value in topic_group.return_list(): - topic_rows.append((topic_value, sub_theme_name, parent_theme_name)) + topic_rows.extend( + (topic_value, sub_theme_name, parent_theme_name) + for topic_value in topic_group.return_list() + ) theme_names = sorted({parent_name for _, parent_name in sub_theme_pairs}) sub_theme_rows = sorted( sub_theme_pairs, - key=lambda value: (value[1], value[0]), + key=itemgetter(1, 0), ) return theme_names, sub_theme_rows, topic_rows diff --git a/tests/unit/metrics/interfaces/management/test_seed_random.py b/tests/unit/metrics/interfaces/management/test_seed_random.py index f09077e2d..1c644ab66 100644 --- a/tests/unit/metrics/interfaces/management/test_seed_random.py +++ b/tests/unit/metrics/interfaces/management/test_seed_random.py @@ -4,12 +4,15 @@ from unittest import mock import pytest -from django.core.management.base import CommandError from django.core.management import CommandParser +from django.core.management.base import CommandError -from metrics.interfaces.management.commands.seed_random import Command, SCALE_CONFIGS +from metrics.interfaces.management.commands.seed_random import SCALE_CONFIGS, Command MODULE_PATH = "metrics.interfaces.management.commands.seed_random" +FULL_BATCH_DAYS = 5000 +SMALL_GEO_COUNT = 3 +LARGE_GEO_COUNT = 7 def _fake_metric_hierarchy() -> SimpleNamespace: @@ -70,6 +73,7 @@ def test_handle_metrics_dataset( spy_seed_metrics_data.assert_called_once_with( scale_config=SCALE_CONFIGS["small"], truncate_first=True, + progress_callback=mock.ANY, ) spy_call_command.assert_not_called() spy_print_summary.assert_called_once_with( @@ -121,55 +125,87 @@ def test_handle_cms_dataset_uses_time_seed_and_builds_cms( @mock.patch.object(Command, "_truncate_metrics_data") @mock.patch.object(Command, "_seed_time_series_rows") + @mock.patch.object(Command, "_build_geography_seed_values") + @mock.patch.object(Command, "_build_theme_hierarchy_records") @mock.patch.object(Command, "_bulk_create") @mock.patch(f"{MODULE_PATH}.Geography") @mock.patch(f"{MODULE_PATH}.Metric") @mock.patch(f"{MODULE_PATH}.Topic") @mock.patch(f"{MODULE_PATH}.SubTheme") @mock.patch(f"{MODULE_PATH}.Theme") + @mock.patch(f"{MODULE_PATH}.GeographyType") @mock.patch(f"{MODULE_PATH}.transaction.atomic") - @mock.patch(f"{MODULE_PATH}.GeographyType.objects.create") @mock.patch(f"{MODULE_PATH}.Stratum.objects.create") @mock.patch(f"{MODULE_PATH}.Age.objects.create") def test_seed_metrics_data_builds_expected_counts_and_calls( self, spy_age_create: mock.MagicMock, spy_stratum_create: mock.MagicMock, - spy_geography_type_create: mock.MagicMock, spy_atomic: mock.MagicMock, + spy_geography_type: mock.MagicMock, spy_theme: mock.MagicMock, spy_sub_theme: mock.MagicMock, spy_topic: mock.MagicMock, spy_metric: mock.MagicMock, spy_geography: mock.MagicMock, spy_bulk_create: mock.MagicMock, + spy_build_theme_hierarchy_records: mock.MagicMock, + spy_build_geography_seed_values: mock.MagicMock, spy_seed_time_series_rows: mock.MagicMock, spy_truncate: mock.MagicMock, ): + spy_progress_callback = mock.MagicMock() spy_atomic.return_value = nullcontext() - spy_theme.side_effect = lambda **kwargs: SimpleNamespace(**kwargs) - spy_sub_theme.side_effect = lambda **kwargs: SimpleNamespace(**kwargs) - spy_topic.side_effect = lambda **kwargs: SimpleNamespace(**kwargs) - spy_metric.side_effect = lambda **kwargs: SimpleNamespace(**kwargs) - spy_geography.side_effect = lambda **kwargs: SimpleNamespace(**kwargs) - spy_geography_type_create.return_value = SimpleNamespace(name="Nation") + spy_geography_type.side_effect = SimpleNamespace + spy_theme.side_effect = SimpleNamespace + spy_sub_theme.side_effect = SimpleNamespace + spy_topic.side_effect = SimpleNamespace + spy_metric.side_effect = SimpleNamespace + spy_geography.side_effect = SimpleNamespace spy_stratum_create.return_value = SimpleNamespace(name="All") spy_age_create.return_value = SimpleNamespace(name="All ages") spy_seed_time_series_rows.return_value = (77, 88) + spy_build_theme_hierarchy_records.return_value = ( + ["infectious_disease", "climate_and_environment"], + [ + ("respiratory", "infectious_disease"), + ("vectors", "climate_and_environment"), + ], + [ + ("COVID-19", "respiratory", "infectious_disease"), + ("ticks", "vectors", "climate_and_environment"), + ], + ) + spy_build_geography_seed_values.return_value = [ + { + "name": "England", + "geography_code": "E92000001", + "geography_type": "Nation", + }, + { + "name": "Area 2", + "geography_code": "E09000002", + "geography_type": "Lower Tier Local Authority", + }, + ] - themes = [SimpleNamespace(name=f"Theme {index + 1}") for index in range(3)] + themes = [ + SimpleNamespace(name="infectious_disease"), + SimpleNamespace(name="climate_and_environment"), + ] sub_themes = [ - SimpleNamespace( - name=f"SubTheme {index + 1}", theme=themes[index % len(themes)] - ) - for index in range(6) + SimpleNamespace(name="respiratory", theme=themes[0]), + SimpleNamespace(name="vectors", theme=themes[1]), ] topics = [ SimpleNamespace( - name=f"Topic {index + 1}", - sub_theme=sub_themes[index % len(sub_themes)], - ) - for index in range(12) + name="COVID-19", + sub_theme=sub_themes[0], + ), + SimpleNamespace( + name="ticks", + sub_theme=sub_themes[1], + ), ] metrics = [ SimpleNamespace( @@ -177,25 +213,41 @@ def test_seed_metrics_data_builds_expected_counts_and_calls( ) for index in range(4) ] + geography_types = [ + SimpleNamespace(name="Nation"), + SimpleNamespace(name="Lower Tier Local Authority"), + ] geographies = [ SimpleNamespace( - name=f"Area {index + 1}", - geography_code=f"RND{index + 1:04d}", - geography_type=spy_geography_type_create.return_value, - ) - for index in range(2) + name="England", + geography_code="E92000001", + geography_type=geography_types[0], + ), + SimpleNamespace( + name="Area 2", + geography_code="E09000002", + geography_type=geography_types[1], + ), + ] + spy_bulk_create.side_effect = [ + themes, + sub_themes, + topics, + metrics, + geography_types, + geographies, ] - spy_bulk_create.side_effect = [themes, sub_themes, topics, metrics, geographies] result = Command._seed_metrics_data( scale_config={"geographies": 2, "metrics": 4, "days": 9}, truncate_first=True, + progress_callback=spy_progress_callback, ) assert result == { - "Theme": 3, - "SubTheme": 6, - "Topic": 12, + "Theme": 2, + "SubTheme": 2, + "Topic": 2, "Metric": 4, "Geography": 2, "CoreTimeSeries": 77, @@ -208,7 +260,12 @@ def test_seed_metrics_data_builds_expected_counts_and_calls( stratum=spy_stratum_create.return_value, age=spy_age_create.return_value, days=9, + progress_callback=spy_progress_callback, ) + spy_progress_callback.assert_any_call( + "Preparing metric taxonomy and geography records..." + ) + spy_progress_callback.assert_any_call("Generating Core/API time series rows...") def test_truncate_metrics_data_deletes_from_all_models(self): model_names = [ @@ -248,6 +305,7 @@ def test_seed_time_series_rows_flushes_remainder( ): spy_core_time_series.side_effect = lambda **kwargs: kwargs spy_api_time_series.side_effect = lambda **kwargs: kwargs + spy_progress_callback = mock.MagicMock() core_count, api_count = Command._seed_time_series_rows( metrics=[_fake_metric_hierarchy()], @@ -255,12 +313,16 @@ def test_seed_time_series_rows_flushes_remainder( stratum=SimpleNamespace(name="All"), age=SimpleNamespace(name="All ages"), days=1, + progress_callback=spy_progress_callback, ) assert core_count == 1 assert api_count == 1 spy_core_time_series.objects.bulk_create.assert_called_once() spy_api_time_series.objects.bulk_create.assert_called_once() + progress_messages = [call.args[0] for call in spy_progress_callback.call_args_list] + assert any(message.startswith("Processed 1/1 metrics") for message in progress_messages) + assert any(message.startswith("Inserted ") for message in progress_messages) @mock.patch(f"{MODULE_PATH}.APITimeSeries") @mock.patch(f"{MODULE_PATH}.CoreTimeSeries") @@ -277,11 +339,11 @@ def test_seed_time_series_rows_flushes_at_batch_size( geographies=[_fake_geography()], stratum=SimpleNamespace(name="All"), age=SimpleNamespace(name="All ages"), - days=5000, + days=FULL_BATCH_DAYS, ) - assert core_count == 5000 - assert api_count == 5000 + assert core_count == FULL_BATCH_DAYS + assert api_count == FULL_BATCH_DAYS spy_core_time_series.objects.bulk_create.assert_called_once() spy_api_time_series.objects.bulk_create.assert_called_once() @@ -346,3 +408,30 @@ def test_add_arguments_rejects_invalid_dataset_value(): with pytest.raises(CommandError): parser.parse_args(["--dataset", "invalid"]) + + +def test_build_theme_hierarchy_records_contains_expected_real_values(): + theme_names, sub_theme_rows, topic_rows = Command._build_theme_hierarchy_records() + + assert "infectious_disease" in theme_names + assert any(sub_theme == "respiratory" for sub_theme, _ in sub_theme_rows) + assert any(topic == "COVID-19" and sub_theme == "respiratory" for topic, sub_theme, _ in topic_rows) + + +def test_build_geography_seed_values_returns_required_count(): + small_geographies = Command._build_geography_seed_values(count=SMALL_GEO_COUNT) + larger_geographies = Command._build_geography_seed_values(count=LARGE_GEO_COUNT) + + assert len(small_geographies) == SMALL_GEO_COUNT + assert len(larger_geographies) == LARGE_GEO_COUNT + assert small_geographies[0]["name"] == "United Kingdom" + assert larger_geographies[-1]["geography_type"] in { + "Nation", + "Lower Tier Local Authority", + } + + +def test_format_enum_name_replaces_underscores_and_title_cases(): + assert Command._format_enum_name("LOWER_TIER_LOCAL_AUTHORITY") == ( + "Lower Tier Local Authority" + ) From b55f4987a96140f2e8e8215fd4ea36c6313fbd5f Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Tue, 10 Mar 2026 00:25:20 +0000 Subject: [PATCH 09/36] linting fix #2 --- .../management/commands/seed_random.py | 293 +++++++++--------- .../interfaces/management/test_seed_random.py | 24 +- 2 files changed, 168 insertions(+), 149 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index b0f9617fa..46cfc51fd 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -121,7 +121,7 @@ def _seed_metrics_data( scale_config: dict[str, int], truncate_first: bool, progress_callback: Callable[[str], None] | None = None, - ) -> dict[str, int]: # noqa: PLR0914 + ) -> dict[str, int]: """Seed supporting metric models and time series rows for the selected scale.""" if progress_callback is not None: progress_callback("Preparing metric taxonomy and geography records...") @@ -130,39 +130,7 @@ def _seed_metrics_data( if truncate_first: cls._truncate_metrics_data() - ( - theme_names, - sub_theme_rows, - topic_rows, - ) = cls._build_theme_hierarchy_records() - themes = cls._bulk_create( - Theme, - [Theme(name=name) for name in theme_names], - ) - themes_by_name = {theme.name: theme for theme in themes} - - sub_themes = cls._bulk_create( - SubTheme, - [ - SubTheme(name=name, theme=themes_by_name[theme_name]) - for name, theme_name in sub_theme_rows - ], - ) - sub_themes_by_key = { - (sub_theme.name, sub_theme.theme.name): sub_theme - for sub_theme in sub_themes - } - - topics = cls._bulk_create( - Topic, - [ - Topic( - name=topic_name, - sub_theme=sub_themes_by_key[(sub_theme_name, theme_name)], - ) - for topic_name, sub_theme_name, theme_name in topic_rows - ], - ) + themes, sub_themes, topics = cls._seed_theme_hierarchy() metrics = cls._bulk_create( Metric, @@ -175,34 +143,7 @@ def _seed_metrics_data( ], ) - geography_seed_values = cls._build_geography_seed_values( - count=scale_config["geographies"] - ) - geography_type_names = { - record["geography_type"] for record in geography_seed_values - } - geography_types = cls._bulk_create( - GeographyType, - [GeographyType(name=name) for name in sorted(geography_type_names)], - ) - geography_types_by_name = { - geography_type.name: geography_type - for geography_type in geography_types - } - - geographies = cls._bulk_create( - Geography, - [ - Geography( - name=record["name"], - geography_code=record["geography_code"], - geography_type=geography_types_by_name[ - record["geography_type"] - ], - ) - for record in geography_seed_values - ], - ) + geographies = cls._seed_geographies(count=scale_config["geographies"]) stratum = Stratum.objects.create(name="All") age = Age.objects.create(name="All ages") @@ -252,10 +193,9 @@ def _seed_time_series_rows( age: Age, days: int, progress_callback: Callable[[str], None] | None = None, - ) -> tuple[int, int]: # noqa: PLR0914 + ) -> tuple[int, int]: frequency = TimePeriod.Weekly.value - today = date.today() - start_date = today - timedelta(days=days - 1) + start_date = date.today() - timedelta(days=days - 1) batch_size = 5000 core_rows: list[CoreTimeSeries] = [] api_rows: list[APITimeSeries] = [] @@ -266,81 +206,29 @@ def _seed_time_series_rows( log_interval = max(1, total_metrics // 10) if total_metrics else 1 for metric_index, metric in enumerate(metrics, start=1): - topic = metric.topic - sub_theme = topic.sub_theme - theme = sub_theme.theme - - for geography in geographies: - for day_offset in range(days): - current_date = start_date + timedelta(days=day_offset) - base_value = random.uniform(5.0, 250.0) # noqa: S311 # nosec B311 - metric_value = round( - base_value - + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 - 2, - ) - epidemiological_week = current_date.isocalendar().week - - core_rows.append( - CoreTimeSeries( - metric=metric, - metric_frequency=frequency, - geography=geography, - stratum=stratum, - age=age, - sex=None, - year=current_date.year, - month=current_date.month, - epiweek=epidemiological_week, - date=current_date, - metric_value=Decimal(str(metric_value)), - is_public=True, - ) - ) - - if len(core_rows) >= batch_size: - CoreTimeSeries.objects.bulk_create( - core_rows, batch_size=batch_size - ) - core_count += len(core_rows) - core_rows = [] - - api_rows.append( - APITimeSeries( - metric_frequency=frequency, - age=age.name, - month=current_date.month, - geography_code=geography.geography_code, - metric_group=None, - theme=theme.name, - sub_theme=sub_theme.name, - topic=topic.name, - geography_type=geography.geography_type.name, - geography=geography.name, - metric=metric.name, - stratum=stratum.name, - sex=None, - year=current_date.year, - epiweek=epidemiological_week, - date=current_date, - metric_value=float(metric_value), - is_public=True, - ) - ) - - if len(api_rows) >= batch_size: - APITimeSeries.objects.bulk_create( - api_rows, batch_size=batch_size - ) - api_count += len(api_rows) - api_rows = [] - - if ( - progress_callback is not None - and ( - metric_index == total_metrics - or metric_index % log_interval == 0 - ) + for core_row, api_row in cls._build_time_series_rows_for_metric( + metric=metric, + geographies=geographies, + stratum=stratum, + age=age, + days=days, + start_date=start_date, + frequency=frequency, + ): + core_rows.append(core_row) + if len(core_rows) >= batch_size: + CoreTimeSeries.objects.bulk_create(core_rows, batch_size=batch_size) + core_count += len(core_rows) + core_rows = [] + + api_rows.append(api_row) + if len(api_rows) >= batch_size: + APITimeSeries.objects.bulk_create(api_rows, batch_size=batch_size) + api_count += len(api_rows) + api_rows = [] + + if progress_callback is not None and ( + metric_index == total_metrics or metric_index % log_interval == 0 ): processed_row_count = metric_index * len(geographies) * days progress_callback( @@ -365,6 +253,123 @@ def _seed_time_series_rows( return core_count, api_count + @classmethod + def _seed_theme_hierarchy(cls) -> tuple[list[Theme], list[SubTheme], list[Topic]]: + theme_names, sub_theme_rows, topic_rows = cls._build_theme_hierarchy_records() + themes = cls._bulk_create(Theme, [Theme(name=name) for name in theme_names]) + themes_by_name = {theme.name: theme for theme in themes} + sub_themes = cls._bulk_create( + SubTheme, + [ + SubTheme(name=name, theme=themes_by_name[theme_name]) + for name, theme_name in sub_theme_rows + ], + ) + sub_themes_by_key = { + (sub_theme.name, sub_theme.theme.name): sub_theme + for sub_theme in sub_themes + } + topics = cls._bulk_create( + Topic, + [ + Topic( + name=topic_name, + sub_theme=sub_themes_by_key[(sub_theme_name, theme_name)], + ) + for topic_name, sub_theme_name, theme_name in topic_rows + ], + ) + return themes, sub_themes, topics + + @classmethod + def _seed_geographies(cls, *, count: int) -> list[Geography]: + geography_seed_values = cls._build_geography_seed_values(count=count) + geography_type_names = { + record["geography_type"] for record in geography_seed_values + } + geography_types = cls._bulk_create( + GeographyType, + [GeographyType(name=name) for name in sorted(geography_type_names)], + ) + geography_types_by_name = { + geography_type.name: geography_type for geography_type in geography_types + } + return cls._bulk_create( + Geography, + [ + Geography( + name=record["name"], + geography_code=record["geography_code"], + geography_type=geography_types_by_name[record["geography_type"]], + ) + for record in geography_seed_values + ], + ) + + @classmethod + def _build_time_series_rows_for_metric( + cls, + *, + metric: Metric, + geographies: list[Geography], + stratum: Stratum, + age: Age, + days: int, + start_date: date, + frequency: str, + ) -> Iterable[tuple[CoreTimeSeries, APITimeSeries]]: + topic = metric.topic + sub_theme = topic.sub_theme + theme = sub_theme.theme + + for geography in geographies: + for day_offset in range(days): + current_date = start_date + timedelta(days=day_offset) + base_value = random.uniform(5.0, 250.0) # noqa: S311 # nosec B311 + metric_value = round( + base_value + + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 + 2, + ) + epidemiological_week = current_date.isocalendar().week + + yield ( + CoreTimeSeries( + metric=metric, + metric_frequency=frequency, + geography=geography, + stratum=stratum, + age=age, + sex=None, + year=current_date.year, + month=current_date.month, + epiweek=epidemiological_week, + date=current_date, + metric_value=Decimal(str(metric_value)), + is_public=True, + ), + APITimeSeries( + metric_frequency=frequency, + age=age.name, + month=current_date.month, + geography_code=geography.geography_code, + metric_group=None, + theme=theme.name, + sub_theme=sub_theme.name, + topic=topic.name, + geography_type=geography.geography_type.name, + geography=geography.name, + metric=metric.name, + stratum=stratum.name, + sex=None, + year=current_date.year, + epiweek=epidemiological_week, + date=current_date, + metric_value=float(metric_value), + is_public=True, + ), + ) + @staticmethod def _bulk_create(model, records: Iterable): """Materialise and bulk insert a sequence of model instances.""" @@ -386,9 +391,7 @@ def _build_theme_hierarchy_records( ) for sub_theme_name in child_theme_group.return_list(): child_to_parent[sub_theme_name] = resolved_parent - normalised_to_child[cls._normalise_key(sub_theme_name)] = ( - sub_theme_name - ) + normalised_to_child[cls._normalise_key(sub_theme_name)] = sub_theme_name topic_rows: list[tuple[str, str, str]] = [] sub_theme_pairs: set[tuple[str, str]] = set() @@ -418,9 +421,7 @@ def _build_geography_seed_values(cls, *, count: int) -> list[dict[str, str]]: { "name": "United Kingdom", "geography_code": UNITED_KINGDOM_GEOGRAPHY_CODE, - "geography_type": ( - validation_enums.GeographyType.UNITED_KINGDOM.value - ), + "geography_type": (validation_enums.GeographyType.UNITED_KINGDOM.value), } ] diff --git a/tests/unit/metrics/interfaces/management/test_seed_random.py b/tests/unit/metrics/interfaces/management/test_seed_random.py index 1c644ab66..a1de97672 100644 --- a/tests/unit/metrics/interfaces/management/test_seed_random.py +++ b/tests/unit/metrics/interfaces/management/test_seed_random.py @@ -320,8 +320,12 @@ def test_seed_time_series_rows_flushes_remainder( assert api_count == 1 spy_core_time_series.objects.bulk_create.assert_called_once() spy_api_time_series.objects.bulk_create.assert_called_once() - progress_messages = [call.args[0] for call in spy_progress_callback.call_args_list] - assert any(message.startswith("Processed 1/1 metrics") for message in progress_messages) + progress_messages = [ + call.args[0] for call in spy_progress_callback.call_args_list + ] + assert any( + message.startswith("Processed 1/1 metrics") for message in progress_messages + ) assert any(message.startswith("Inserted ") for message in progress_messages) @mock.patch(f"{MODULE_PATH}.APITimeSeries") @@ -415,7 +419,21 @@ def test_build_theme_hierarchy_records_contains_expected_real_values(): assert "infectious_disease" in theme_names assert any(sub_theme == "respiratory" for sub_theme, _ in sub_theme_rows) - assert any(topic == "COVID-19" and sub_theme == "respiratory" for topic, sub_theme, _ in topic_rows) + assert any( + topic == "COVID-19" and sub_theme == "respiratory" + for topic, sub_theme, _ in topic_rows + ) + + +def test_build_theme_hierarchy_records_skips_unmatched_topic_group(): + fake_topic_group = mock.Mock() + fake_topic_group.name = "DOES_NOT_MATCH_CHILD_THEME" + fake_topic_group.return_list.return_value = ["dummy-topic"] + + with mock.patch(f"{MODULE_PATH}.validation_enums.Topic", [fake_topic_group]): + _, _, topic_rows = Command._build_theme_hierarchy_records() + + assert topic_rows == [] def test_build_geography_seed_values_returns_required_count(): From 2eb12b29a728a28aecb6ff9e72f2a454f5e98b8c Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Tue, 24 Mar 2026 13:39:38 +0000 Subject: [PATCH 10/36] make metrics seeding idempotent and enrich generated rows --- .../management/commands/seed_random.py | 235 +++++++++++++++--- .../interfaces/management/test_seed_random.py | 81 ++---- 2 files changed, 221 insertions(+), 95 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 46cfc51fd..c06a72167 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -1,4 +1,5 @@ import random +import re import time from collections.abc import Callable, Iterable from datetime import date, timedelta @@ -30,10 +31,13 @@ ) SCALE_CONFIGS = { + # Approximate time-series row counts generated by scale: + # small ~1,500, medium ~180,000, large ~7,300,000. "small": {"geographies": 5, "metrics": 10, "days": 30}, "medium": {"geographies": 20, "metrics": 50, "days": 180}, "large": {"geographies": 100, "metrics": 200, "days": 365}, } +SEED_RANDOM_SEX_OPTIONS = ("all", "f", "m") class Command(BaseCommand): @@ -122,7 +126,16 @@ def _seed_metrics_data( truncate_first: bool, progress_callback: Callable[[str], None] | None = None, ) -> dict[str, int]: - """Seed supporting metric models and time series rows for the selected scale.""" + """Seed supporting metric models and time series rows for the selected scale. + + Args: + scale_config: Scale-specific object counts for generated records. + truncate_first: Whether to clear existing metrics-related tables before seeding. + progress_callback: Optional callback used to report progress updates. + + Returns: + Count of created records keyed by model or dataset name. + """ if progress_callback is not None: progress_callback("Preparing metric taxonomy and geography records...") @@ -131,12 +144,13 @@ def _seed_metrics_data( cls._truncate_metrics_data() themes, sub_themes, topics = cls._seed_theme_hierarchy() + metric_start_index = cls._get_next_random_metric_index() metrics = cls._bulk_create( Metric, [ Metric( - name=f"Random Metric {index + 1}", + name=f"Random Metric {metric_start_index + index}", topic=topics[index % len(topics)], ) for index in range(scale_config["metrics"]) @@ -145,8 +159,8 @@ def _seed_metrics_data( geographies = cls._seed_geographies(count=scale_config["geographies"]) - stratum = Stratum.objects.create(name="All") - age = Age.objects.create(name="All ages") + stratum, _ = Stratum.objects.get_or_create(name="All") + age, _ = Age.objects.get_or_create(name="All ages") if progress_callback is not None: progress_callback("Generating Core/API time series rows...") @@ -256,29 +270,108 @@ def _seed_time_series_rows( @classmethod def _seed_theme_hierarchy(cls) -> tuple[list[Theme], list[SubTheme], list[Topic]]: theme_names, sub_theme_rows, topic_rows = cls._build_theme_hierarchy_records() - themes = cls._bulk_create(Theme, [Theme(name=name) for name in theme_names]) - themes_by_name = {theme.name: theme for theme in themes} - sub_themes = cls._bulk_create( - SubTheme, - [ - SubTheme(name=name, theme=themes_by_name[theme_name]) - for name, theme_name in sub_theme_rows - ], + themes_by_name = {theme.name: theme for theme in Theme.objects.filter(name__in=theme_names)} + missing_theme_names = [name for name in theme_names if name not in themes_by_name] + if missing_theme_names: + cls._bulk_create(Theme, [Theme(name=name) for name in missing_theme_names]) + themes_by_name.update( + {theme.name: theme for theme in Theme.objects.filter(name__in=missing_theme_names)} + ) + themes = [themes_by_name[name] for name in theme_names] + + sub_theme_keys = list(dict.fromkeys(sub_theme_rows)) + existing_sub_themes = SubTheme.objects.select_related("theme").filter( + theme__name__in=theme_names, + name__in={name for name, _ in sub_theme_keys}, ) sub_themes_by_key = { (sub_theme.name, sub_theme.theme.name): sub_theme - for sub_theme in sub_themes + for sub_theme in existing_sub_themes } - topics = cls._bulk_create( - Topic, - [ - Topic( - name=topic_name, - sub_theme=sub_themes_by_key[(sub_theme_name, theme_name)], - ) + missing_sub_theme_keys = [ + (sub_theme_name, theme_name) + for sub_theme_name, theme_name in sub_theme_keys + if (sub_theme_name, theme_name) not in sub_themes_by_key + ] + if missing_sub_theme_keys: + cls._bulk_create( + SubTheme, + [ + SubTheme(name=sub_theme_name, theme=themes_by_name[theme_name]) + for sub_theme_name, theme_name in missing_sub_theme_keys + ], + ) + sub_themes_by_key.update( + { + (sub_theme.name, sub_theme.theme.name): sub_theme + for sub_theme in SubTheme.objects.select_related("theme").filter( + theme__name__in={theme_name for _, theme_name in missing_sub_theme_keys}, + name__in={sub_theme_name for sub_theme_name, _ in missing_sub_theme_keys}, + ) + } + ) + sub_themes = [sub_themes_by_key[key] for key in sub_theme_keys] + + topic_keys = list( + dict.fromkeys( + (topic_name, sub_theme_name, theme_name) for topic_name, sub_theme_name, theme_name in topic_rows - ], + ) ) + sub_themes_by_id_key = { + (sub_theme_name, theme_name): sub_themes_by_key[(sub_theme_name, theme_name)] + for _, sub_theme_name, theme_name in topic_keys + } + candidate_sub_theme_ids = [sub_theme.id for sub_theme in sub_themes_by_id_key.values()] + existing_topics = Topic.objects.filter( + sub_theme_id__in=candidate_sub_theme_ids, + name__in={topic_name for topic_name, _, _ in topic_keys}, + ) + topics_by_key = { + (topic.name, topic.sub_theme_id): topic + for topic in existing_topics + } + missing_topic_keys = [ + topic_key + for topic_key in topic_keys + if ( + topic_key[0], + sub_themes_by_id_key[(topic_key[1], topic_key[2])].id, + ) + not in topics_by_key + ] + if missing_topic_keys: + cls._bulk_create( + Topic, + [ + Topic( + name=topic_name, + sub_theme=sub_themes_by_id_key[(sub_theme_name, theme_name)], + ) + for topic_name, sub_theme_name, theme_name in missing_topic_keys + ], + ) + topics_by_key.update( + { + (topic.name, topic.sub_theme_id): topic + for topic in Topic.objects.filter( + sub_theme_id__in=[ + sub_themes_by_id_key[(sub_theme_name, theme_name)].id + for _, sub_theme_name, theme_name in missing_topic_keys + ], + name__in={topic_name for topic_name, _, _ in missing_topic_keys}, + ) + } + ) + topics = [ + topics_by_key[ + ( + topic_name, + sub_themes_by_id_key[(sub_theme_name, theme_name)].id, + ) + ] + for topic_name, sub_theme_name, theme_name in topic_keys + ] return themes, sub_themes, topics @classmethod @@ -287,24 +380,78 @@ def _seed_geographies(cls, *, count: int) -> list[Geography]: geography_type_names = { record["geography_type"] for record in geography_seed_values } - geography_types = cls._bulk_create( - GeographyType, - [GeographyType(name=name) for name in sorted(geography_type_names)], - ) + geography_type_names = sorted(geography_type_names) geography_types_by_name = { - geography_type.name: geography_type for geography_type in geography_types + geography_type.name: geography_type + for geography_type in GeographyType.objects.filter(name__in=geography_type_names) } - return cls._bulk_create( - Geography, - [ - Geography( - name=record["name"], - geography_code=record["geography_code"], - geography_type=geography_types_by_name[record["geography_type"]], - ) + missing_geography_type_names = [ + name for name in geography_type_names if name not in geography_types_by_name + ] + if missing_geography_type_names: + cls._bulk_create( + GeographyType, + [GeographyType(name=name) for name in missing_geography_type_names], + ) + geography_types_by_name.update( + { + geography_type.name: geography_type + for geography_type in GeographyType.objects.filter( + name__in=missing_geography_type_names + ) + } + ) + geography_types_by_name = { + name: geography_types_by_name[name] for name in geography_type_names + } + + geography_keys = list( + dict.fromkeys( + (record["name"], record["geography_type"], record["geography_code"]) for record in geography_seed_values - ], + ) + ) + existing_geographies = Geography.objects.select_related("geography_type").filter( + name__in={name for name, _, _ in geography_keys}, + geography_type__name__in={geography_type for _, geography_type, _ in geography_keys}, ) + geographies_by_key = { + (geography.name, geography.geography_type.name): geography + for geography in existing_geographies + } + missing_geography_keys = [ + (name, geography_type, geography_code) + for name, geography_type, geography_code in geography_keys + if (name, geography_type) not in geographies_by_key + ] + if missing_geography_keys: + cls._bulk_create( + Geography, + [ + Geography( + name=name, + geography_code=geography_code, + geography_type=geography_types_by_name[geography_type], + ) + for name, geography_type, geography_code in missing_geography_keys + ], + ) + geographies_by_key.update( + { + (geography.name, geography.geography_type.name): geography + for geography in Geography.objects.select_related("geography_type").filter( + name__in={name for name, _, _ in missing_geography_keys}, + geography_type__name__in={ + geography_type for _, geography_type, _ in missing_geography_keys + }, + ) + } + ) + + return [ + geographies_by_key[(name, geography_type)] + for name, geography_type, _ in geography_keys + ] @classmethod def _build_time_series_rows_for_metric( @@ -331,6 +478,7 @@ def _build_time_series_rows_for_metric( + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 2, ) + sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 epidemiological_week = current_date.isocalendar().week yield ( @@ -340,7 +488,7 @@ def _build_time_series_rows_for_metric( geography=geography, stratum=stratum, age=age, - sex=None, + sex=sex, year=current_date.year, month=current_date.month, epiweek=epidemiological_week, @@ -361,7 +509,7 @@ def _build_time_series_rows_for_metric( geography=geography.name, metric=metric.name, stratum=stratum.name, - sex=None, + sex=sex, year=current_date.year, epiweek=epidemiological_week, date=current_date, @@ -375,6 +523,19 @@ def _bulk_create(model, records: Iterable): """Materialise and bulk insert a sequence of model instances.""" return model.objects.bulk_create(list(records)) + @staticmethod + def _get_next_random_metric_index() -> int: + max_metric_index = 0 + for metric_name in Metric.objects.filter(name__startswith="Random Metric ").values_list( + "name", + flat=True, + ): + match = re.fullmatch(r"Random Metric (\d+)", metric_name) + if match is None: + continue + max_metric_index = max(max_metric_index, int(match.group(1))) + return max_metric_index + 1 + @classmethod def _build_theme_hierarchy_records( cls, diff --git a/tests/unit/metrics/interfaces/management/test_seed_random.py b/tests/unit/metrics/interfaces/management/test_seed_random.py index a1de97672..7e965f1e2 100644 --- a/tests/unit/metrics/interfaces/management/test_seed_random.py +++ b/tests/unit/metrics/interfaces/management/test_seed_random.py @@ -125,69 +125,34 @@ def test_handle_cms_dataset_uses_time_seed_and_builds_cms( @mock.patch.object(Command, "_truncate_metrics_data") @mock.patch.object(Command, "_seed_time_series_rows") - @mock.patch.object(Command, "_build_geography_seed_values") - @mock.patch.object(Command, "_build_theme_hierarchy_records") + @mock.patch.object(Command, "_seed_geographies") + @mock.patch.object(Command, "_seed_theme_hierarchy") + @mock.patch.object(Command, "_get_next_random_metric_index") @mock.patch.object(Command, "_bulk_create") - @mock.patch(f"{MODULE_PATH}.Geography") @mock.patch(f"{MODULE_PATH}.Metric") - @mock.patch(f"{MODULE_PATH}.Topic") - @mock.patch(f"{MODULE_PATH}.SubTheme") - @mock.patch(f"{MODULE_PATH}.Theme") - @mock.patch(f"{MODULE_PATH}.GeographyType") @mock.patch(f"{MODULE_PATH}.transaction.atomic") - @mock.patch(f"{MODULE_PATH}.Stratum.objects.create") - @mock.patch(f"{MODULE_PATH}.Age.objects.create") + @mock.patch(f"{MODULE_PATH}.Stratum.objects.get_or_create") + @mock.patch(f"{MODULE_PATH}.Age.objects.get_or_create") def test_seed_metrics_data_builds_expected_counts_and_calls( self, - spy_age_create: mock.MagicMock, - spy_stratum_create: mock.MagicMock, + spy_age_get_or_create: mock.MagicMock, + spy_stratum_get_or_create: mock.MagicMock, spy_atomic: mock.MagicMock, - spy_geography_type: mock.MagicMock, - spy_theme: mock.MagicMock, - spy_sub_theme: mock.MagicMock, - spy_topic: mock.MagicMock, spy_metric: mock.MagicMock, - spy_geography: mock.MagicMock, spy_bulk_create: mock.MagicMock, - spy_build_theme_hierarchy_records: mock.MagicMock, - spy_build_geography_seed_values: mock.MagicMock, + spy_get_next_random_metric_index: mock.MagicMock, + spy_seed_theme_hierarchy: mock.MagicMock, + spy_seed_geographies: mock.MagicMock, spy_seed_time_series_rows: mock.MagicMock, spy_truncate: mock.MagicMock, ): spy_progress_callback = mock.MagicMock() spy_atomic.return_value = nullcontext() - spy_geography_type.side_effect = SimpleNamespace - spy_theme.side_effect = SimpleNamespace - spy_sub_theme.side_effect = SimpleNamespace - spy_topic.side_effect = SimpleNamespace spy_metric.side_effect = SimpleNamespace - spy_geography.side_effect = SimpleNamespace - spy_stratum_create.return_value = SimpleNamespace(name="All") - spy_age_create.return_value = SimpleNamespace(name="All ages") + spy_get_next_random_metric_index.return_value = 1 + spy_stratum_get_or_create.return_value = (SimpleNamespace(name="All"), False) + spy_age_get_or_create.return_value = (SimpleNamespace(name="All ages"), False) spy_seed_time_series_rows.return_value = (77, 88) - spy_build_theme_hierarchy_records.return_value = ( - ["infectious_disease", "climate_and_environment"], - [ - ("respiratory", "infectious_disease"), - ("vectors", "climate_and_environment"), - ], - [ - ("COVID-19", "respiratory", "infectious_disease"), - ("ticks", "vectors", "climate_and_environment"), - ], - ) - spy_build_geography_seed_values.return_value = [ - { - "name": "England", - "geography_code": "E92000001", - "geography_type": "Nation", - }, - { - "name": "Area 2", - "geography_code": "E09000002", - "geography_type": "Lower Tier Local Authority", - }, - ] themes = [ SimpleNamespace(name="infectious_disease"), @@ -229,14 +194,9 @@ def test_seed_metrics_data_builds_expected_counts_and_calls( geography_type=geography_types[1], ), ] - spy_bulk_create.side_effect = [ - themes, - sub_themes, - topics, - metrics, - geography_types, - geographies, - ] + spy_seed_theme_hierarchy.return_value = (themes, sub_themes, topics) + spy_seed_geographies.return_value = geographies + spy_bulk_create.return_value = metrics result = Command._seed_metrics_data( scale_config={"geographies": 2, "metrics": 4, "days": 9}, @@ -257,8 +217,8 @@ def test_seed_metrics_data_builds_expected_counts_and_calls( spy_seed_time_series_rows.assert_called_once_with( metrics=metrics, geographies=geographies, - stratum=spy_stratum_create.return_value, - age=spy_age_create.return_value, + stratum=spy_stratum_get_or_create.return_value[0], + age=spy_age_get_or_create.return_value[0], days=9, progress_callback=spy_progress_callback, ) @@ -298,11 +258,14 @@ def test_truncate_metrics_data_deletes_from_all_models(self): @mock.patch(f"{MODULE_PATH}.APITimeSeries") @mock.patch(f"{MODULE_PATH}.CoreTimeSeries") + @mock.patch(f"{MODULE_PATH}.random.choice") def test_seed_time_series_rows_flushes_remainder( self, + spy_random_choice: mock.MagicMock, spy_core_time_series: mock.MagicMock, spy_api_time_series: mock.MagicMock, ): + spy_random_choice.return_value = "f" spy_core_time_series.side_effect = lambda **kwargs: kwargs spy_api_time_series.side_effect = lambda **kwargs: kwargs spy_progress_callback = mock.MagicMock() @@ -320,6 +283,8 @@ def test_seed_time_series_rows_flushes_remainder( assert api_count == 1 spy_core_time_series.objects.bulk_create.assert_called_once() spy_api_time_series.objects.bulk_create.assert_called_once() + assert spy_core_time_series.call_args.kwargs["sex"] == "f" + assert spy_api_time_series.call_args.kwargs["sex"] == "f" progress_messages = [ call.args[0] for call in spy_progress_callback.call_args_list ] From 500ac2171de4fe5deedd04655e061c1c473b0a5d Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Tue, 24 Mar 2026 13:52:48 +0000 Subject: [PATCH 11/36] fixed linting and test coverage --- .../management/commands/seed_random.py | 107 ++++++-- .../interfaces/management/test_seed_random.py | 256 ++++++++++++++++++ 2 files changed, 337 insertions(+), 26 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index c06a72167..773ddfaef 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -270,15 +270,48 @@ def _seed_time_series_rows( @classmethod def _seed_theme_hierarchy(cls) -> tuple[list[Theme], list[SubTheme], list[Topic]]: theme_names, sub_theme_rows, topic_rows = cls._build_theme_hierarchy_records() - themes_by_name = {theme.name: theme for theme in Theme.objects.filter(name__in=theme_names)} - missing_theme_names = [name for name in theme_names if name not in themes_by_name] + themes, themes_by_name = cls._upsert_themes(theme_names=theme_names) + sub_themes, sub_themes_by_key = cls._upsert_sub_themes( + theme_names=theme_names, + sub_theme_rows=sub_theme_rows, + themes_by_name=themes_by_name, + ) + topics = cls._upsert_topics( + topic_rows=topic_rows, + sub_themes_by_key=sub_themes_by_key, + ) + return themes, sub_themes, topics + + @classmethod + def _upsert_themes( + cls, + *, + theme_names: list[str], + ) -> tuple[list[Theme], dict[str, Theme]]: + themes_by_name = { + theme.name: theme for theme in Theme.objects.filter(name__in=theme_names) + } + missing_theme_names = [ + name for name in theme_names if name not in themes_by_name + ] if missing_theme_names: cls._bulk_create(Theme, [Theme(name=name) for name in missing_theme_names]) themes_by_name.update( - {theme.name: theme for theme in Theme.objects.filter(name__in=missing_theme_names)} + { + theme.name: theme + for theme in Theme.objects.filter(name__in=missing_theme_names) + } ) - themes = [themes_by_name[name] for name in theme_names] + return [themes_by_name[name] for name in theme_names], themes_by_name + @classmethod + def _upsert_sub_themes( + cls, + *, + theme_names: list[str], + sub_theme_rows: list[tuple[str, str]], + themes_by_name: dict[str, Theme], + ) -> tuple[list[SubTheme], dict[tuple[str, str], SubTheme]]: sub_theme_keys = list(dict.fromkeys(sub_theme_rows)) existing_sub_themes = SubTheme.objects.select_related("theme").filter( theme__name__in=theme_names, @@ -305,31 +338,41 @@ def _seed_theme_hierarchy(cls) -> tuple[list[Theme], list[SubTheme], list[Topic] { (sub_theme.name, sub_theme.theme.name): sub_theme for sub_theme in SubTheme.objects.select_related("theme").filter( - theme__name__in={theme_name for _, theme_name in missing_sub_theme_keys}, - name__in={sub_theme_name for sub_theme_name, _ in missing_sub_theme_keys}, + theme__name__in={ + theme_name for _, theme_name in missing_sub_theme_keys + }, + name__in={ + sub_theme_name + for sub_theme_name, _ in missing_sub_theme_keys + }, ) } ) - sub_themes = [sub_themes_by_key[key] for key in sub_theme_keys] + return [sub_themes_by_key[key] for key in sub_theme_keys], sub_themes_by_key - topic_keys = list( - dict.fromkeys( - (topic_name, sub_theme_name, theme_name) - for topic_name, sub_theme_name, theme_name in topic_rows - ) - ) + @classmethod + def _upsert_topics( + cls, + *, + topic_rows: list[tuple[str, str, str]], + sub_themes_by_key: dict[tuple[str, str], SubTheme], + ) -> list[Topic]: + topic_keys = list(dict.fromkeys(topic_rows)) sub_themes_by_id_key = { - (sub_theme_name, theme_name): sub_themes_by_key[(sub_theme_name, theme_name)] + (sub_theme_name, theme_name): sub_themes_by_key[ + (sub_theme_name, theme_name) + ] for _, sub_theme_name, theme_name in topic_keys } - candidate_sub_theme_ids = [sub_theme.id for sub_theme in sub_themes_by_id_key.values()] + candidate_sub_theme_ids = [ + sub_theme.id for sub_theme in sub_themes_by_id_key.values() + ] existing_topics = Topic.objects.filter( sub_theme_id__in=candidate_sub_theme_ids, name__in={topic_name for topic_name, _, _ in topic_keys}, ) topics_by_key = { - (topic.name, topic.sub_theme_id): topic - for topic in existing_topics + (topic.name, topic.sub_theme_id): topic for topic in existing_topics } missing_topic_keys = [ topic_key @@ -359,11 +402,13 @@ def _seed_theme_hierarchy(cls) -> tuple[list[Theme], list[SubTheme], list[Topic] sub_themes_by_id_key[(sub_theme_name, theme_name)].id for _, sub_theme_name, theme_name in missing_topic_keys ], - name__in={topic_name for topic_name, _, _ in missing_topic_keys}, + name__in={ + topic_name for topic_name, _, _ in missing_topic_keys + }, ) } ) - topics = [ + return [ topics_by_key[ ( topic_name, @@ -372,7 +417,6 @@ def _seed_theme_hierarchy(cls) -> tuple[list[Theme], list[SubTheme], list[Topic] ] for topic_name, sub_theme_name, theme_name in topic_keys ] - return themes, sub_themes, topics @classmethod def _seed_geographies(cls, *, count: int) -> list[Geography]: @@ -383,7 +427,9 @@ def _seed_geographies(cls, *, count: int) -> list[Geography]: geography_type_names = sorted(geography_type_names) geography_types_by_name = { geography_type.name: geography_type - for geography_type in GeographyType.objects.filter(name__in=geography_type_names) + for geography_type in GeographyType.objects.filter( + name__in=geography_type_names + ) } missing_geography_type_names = [ name for name in geography_type_names if name not in geography_types_by_name @@ -411,9 +457,13 @@ def _seed_geographies(cls, *, count: int) -> list[Geography]: for record in geography_seed_values ) ) - existing_geographies = Geography.objects.select_related("geography_type").filter( + existing_geographies = Geography.objects.select_related( + "geography_type" + ).filter( name__in={name for name, _, _ in geography_keys}, - geography_type__name__in={geography_type for _, geography_type, _ in geography_keys}, + geography_type__name__in={ + geography_type for _, geography_type, _ in geography_keys + }, ) geographies_by_key = { (geography.name, geography.geography_type.name): geography @@ -439,10 +489,13 @@ def _seed_geographies(cls, *, count: int) -> list[Geography]: geographies_by_key.update( { (geography.name, geography.geography_type.name): geography - for geography in Geography.objects.select_related("geography_type").filter( + for geography in Geography.objects.select_related( + "geography_type" + ).filter( name__in={name for name, _, _ in missing_geography_keys}, geography_type__name__in={ - geography_type for _, geography_type, _ in missing_geography_keys + geography_type + for _, geography_type, _ in missing_geography_keys }, ) } @@ -526,7 +579,9 @@ def _bulk_create(model, records: Iterable): @staticmethod def _get_next_random_metric_index() -> int: max_metric_index = 0 - for metric_name in Metric.objects.filter(name__startswith="Random Metric ").values_list( + for metric_name in Metric.objects.filter( + name__startswith="Random Metric " + ).values_list( "name", flat=True, ): diff --git a/tests/unit/metrics/interfaces/management/test_seed_random.py b/tests/unit/metrics/interfaces/management/test_seed_random.py index 7e965f1e2..77d6e0b90 100644 --- a/tests/unit/metrics/interfaces/management/test_seed_random.py +++ b/tests/unit/metrics/interfaces/management/test_seed_random.py @@ -13,6 +13,8 @@ FULL_BATCH_DAYS = 5000 SMALL_GEO_COUNT = 3 LARGE_GEO_COUNT = 7 +EXPECTED_BULK_CREATE_CALLS = 2 +EXPECTED_NEXT_METRIC_INDEX = 11 def _fake_metric_hierarchy() -> SimpleNamespace: @@ -418,3 +420,257 @@ def test_format_enum_name_replaces_underscores_and_title_cases(): assert Command._format_enum_name("LOWER_TIER_LOCAL_AUTHORITY") == ( "Lower Tier Local Authority" ) + + +@mock.patch.object(Command, "_upsert_topics") +@mock.patch.object(Command, "_upsert_sub_themes") +@mock.patch.object(Command, "_upsert_themes") +@mock.patch.object(Command, "_build_theme_hierarchy_records") +def test_seed_theme_hierarchy_delegates_to_upsert_helpers( + spy_build_theme_hierarchy_records: mock.MagicMock, + spy_upsert_themes: mock.MagicMock, + spy_upsert_sub_themes: mock.MagicMock, + spy_upsert_topics: mock.MagicMock, +): + theme_names = ["theme_1"] + sub_theme_rows = [("sub_1", "theme_1")] + topic_rows = [("topic_1", "sub_1", "theme_1")] + themes = [SimpleNamespace(name="theme_1")] + sub_themes = [SimpleNamespace(name="sub_1", theme=themes[0])] + sub_theme_map = {("sub_1", "theme_1"): sub_themes[0]} + topics = [SimpleNamespace(name="topic_1", sub_theme=sub_themes[0])] + themes_by_name = {"theme_1": themes[0]} + + spy_build_theme_hierarchy_records.return_value = ( + theme_names, + sub_theme_rows, + topic_rows, + ) + spy_upsert_themes.return_value = (themes, themes_by_name) + spy_upsert_sub_themes.return_value = (sub_themes, sub_theme_map) + spy_upsert_topics.return_value = topics + + result = Command._seed_theme_hierarchy() + + assert result == (themes, sub_themes, topics) + spy_upsert_themes.assert_called_once_with(theme_names=theme_names) + spy_upsert_sub_themes.assert_called_once_with( + theme_names=theme_names, + sub_theme_rows=sub_theme_rows, + themes_by_name=themes_by_name, + ) + spy_upsert_topics.assert_called_once_with( + topic_rows=topic_rows, + sub_themes_by_key=sub_theme_map, + ) + + +@mock.patch.object(Command, "_bulk_create") +@mock.patch(f"{MODULE_PATH}.Theme") +def test_upsert_themes_creates_missing_and_returns_requested_order( + spy_theme: mock.MagicMock, + spy_bulk_create: mock.MagicMock, +): + existing_theme = SimpleNamespace(name="theme_1") + created_theme = SimpleNamespace(name="theme_2") + spy_theme.side_effect = SimpleNamespace + spy_theme.objects.filter.side_effect = [[existing_theme], [created_theme]] + + themes, themes_by_name = Command._upsert_themes(theme_names=["theme_1", "theme_2"]) + + assert [theme.name for theme in themes] == ["theme_1", "theme_2"] + assert themes_by_name == {"theme_1": existing_theme, "theme_2": created_theme} + spy_bulk_create.assert_called_once() + + +@mock.patch.object(Command, "_bulk_create") +@mock.patch(f"{MODULE_PATH}.SubTheme") +def test_upsert_sub_themes_creates_missing_and_returns_requested_order( + spy_sub_theme: mock.MagicMock, + spy_bulk_create: mock.MagicMock, +): + theme_1 = SimpleNamespace(name="theme_1") + theme_2 = SimpleNamespace(name="theme_2") + existing_sub_theme = SimpleNamespace(name="sub_1", theme=theme_1) + created_sub_theme = SimpleNamespace(name="sub_2", theme=theme_2) + + spy_sub_theme.side_effect = SimpleNamespace + spy_sub_theme.objects.select_related.return_value.filter.side_effect = [ + [existing_sub_theme], + [created_sub_theme], + ] + + sub_themes, sub_theme_map = Command._upsert_sub_themes( + theme_names=["theme_1", "theme_2"], + sub_theme_rows=[("sub_1", "theme_1"), ("sub_2", "theme_2")], + themes_by_name={"theme_1": theme_1, "theme_2": theme_2}, + ) + + assert [(sub_theme.name, sub_theme.theme.name) for sub_theme in sub_themes] == [ + ("sub_1", "theme_1"), + ("sub_2", "theme_2"), + ] + assert sub_theme_map == { + ("sub_1", "theme_1"): existing_sub_theme, + ("sub_2", "theme_2"): created_sub_theme, + } + spy_bulk_create.assert_called_once() + + +@mock.patch.object(Command, "_bulk_create") +@mock.patch(f"{MODULE_PATH}.Topic") +def test_upsert_topics_creates_missing_and_returns_requested_order( + spy_topic: mock.MagicMock, + spy_bulk_create: mock.MagicMock, +): + sub_theme_1 = SimpleNamespace(id=1, name="sub_1") + sub_theme_2 = SimpleNamespace(id=2, name="sub_2") + existing_topic = SimpleNamespace(name="topic_1", sub_theme_id=1) + created_topic = SimpleNamespace(name="topic_2", sub_theme_id=2) + + spy_topic.side_effect = lambda **kwargs: SimpleNamespace( + name=kwargs["name"], + sub_theme=kwargs["sub_theme"], + sub_theme_id=kwargs["sub_theme"].id, + ) + spy_topic.objects.filter.side_effect = [[existing_topic], [created_topic]] + + topics = Command._upsert_topics( + topic_rows=[("topic_1", "sub_1", "theme_1"), ("topic_2", "sub_2", "theme_2")], + sub_themes_by_key={ + ("sub_1", "theme_1"): sub_theme_1, + ("sub_2", "theme_2"): sub_theme_2, + }, + ) + + assert [(topic.name, topic.sub_theme_id) for topic in topics] == [ + ("topic_1", 1), + ("topic_2", 2), + ] + spy_bulk_create.assert_called_once() + + +@mock.patch.object(Command, "_build_geography_seed_values") +@mock.patch.object(Command, "_bulk_create") +@mock.patch(f"{MODULE_PATH}.Geography") +@mock.patch(f"{MODULE_PATH}.GeographyType") +def test_seed_geographies_creates_missing_types_and_geographies( + spy_geography_type: mock.MagicMock, + spy_geography: mock.MagicMock, + spy_bulk_create: mock.MagicMock, + spy_build_geography_seed_values: mock.MagicMock, +): + nation_type = SimpleNamespace(name="Nation") + ltla_type = SimpleNamespace(name="Lower Tier Local Authority") + existing_geography = SimpleNamespace( + name="England", + geography_type=nation_type, + geography_code="E92000001", + ) + created_geography = SimpleNamespace( + name="Area 2", + geography_type=ltla_type, + geography_code="E09000002", + ) + spy_geography_type.side_effect = SimpleNamespace + spy_geography.side_effect = SimpleNamespace + spy_build_geography_seed_values.return_value = [ + { + "name": "England", + "geography_code": "E92000001", + "geography_type": "Nation", + }, + { + "name": "Area 2", + "geography_code": "E09000002", + "geography_type": "Lower Tier Local Authority", + }, + ] + spy_geography_type.objects.filter.side_effect = [[nation_type], [ltla_type]] + spy_geography.objects.select_related.return_value.filter.side_effect = [ + [existing_geography], + [created_geography], + ] + + result = Command._seed_geographies(count=2) + + assert [ + (geography.name, geography.geography_type.name) for geography in result + ] == [ + ("England", "Nation"), + ("Area 2", "Lower Tier Local Authority"), + ] + assert spy_bulk_create.call_count == EXPECTED_BULK_CREATE_CALLS + + +@mock.patch.object(Command, "_build_geography_seed_values") +@mock.patch.object(Command, "_bulk_create") +@mock.patch(f"{MODULE_PATH}.Geography") +@mock.patch(f"{MODULE_PATH}.GeographyType") +def test_seed_geographies_reuses_existing_without_creating( + spy_geography_type: mock.MagicMock, + spy_geography: mock.MagicMock, + spy_bulk_create: mock.MagicMock, + spy_build_geography_seed_values: mock.MagicMock, +): + nation_type = SimpleNamespace(name="Nation") + ltla_type = SimpleNamespace(name="Lower Tier Local Authority") + england = SimpleNamespace( + name="England", + geography_type=nation_type, + geography_code="E92000001", + ) + area_2 = SimpleNamespace( + name="Area 2", + geography_type=ltla_type, + geography_code="E09000002", + ) + spy_build_geography_seed_values.return_value = [ + { + "name": "England", + "geography_code": "E92000001", + "geography_type": "Nation", + }, + { + "name": "Area 2", + "geography_code": "E09000002", + "geography_type": "Lower Tier Local Authority", + }, + ] + spy_geography_type.objects.filter.return_value = [nation_type, ltla_type] + spy_geography.objects.select_related.return_value.filter.return_value = [ + england, + area_2, + ] + + result = Command._seed_geographies(count=2) + + assert result == [england, area_2] + spy_bulk_create.assert_not_called() + + +@mock.patch(f"{MODULE_PATH}.Metric.objects.filter") +def test_get_next_random_metric_index_ignores_non_matching_names( + spy_metric_filter: mock.MagicMock, +): + spy_metric_filter.return_value.values_list.return_value = [ + "Random Metric 2", + "Random Metric x", + "Some Other Metric", + "Random Metric 10", + ] + + result = Command._get_next_random_metric_index() + + assert result == EXPECTED_NEXT_METRIC_INDEX + + +@mock.patch(f"{MODULE_PATH}.Metric.objects.filter") +def test_get_next_random_metric_index_defaults_to_one_when_no_matches( + spy_metric_filter: mock.MagicMock, +): + spy_metric_filter.return_value.values_list.return_value = ["Some Other Metric"] + + result = Command._get_next_random_metric_index() + + assert result == 1 From 61135a1555628b93627942c4953586e1c185426b Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Tue, 24 Mar 2026 14:06:20 +0000 Subject: [PATCH 12/36] sonarqube check fixed --- metrics/interfaces/management/commands/seed_random.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 773ddfaef..f52e52fe5 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -5,11 +5,12 @@ from datetime import date, timedelta from decimal import Decimal from operator import itemgetter -from typing import override +from typing import TypeVar, override from django.core.management import CommandParser, call_command from django.core.management.base import BaseCommand from django.db import transaction +from django.db.models import Model from metrics.data.enums import TimePeriod from metrics.data.models.api_models import APITimeSeries @@ -38,6 +39,7 @@ "large": {"geographies": 100, "metrics": 200, "days": 365}, } SEED_RANDOM_SEX_OPTIONS = ("all", "f", "m") +TModel = TypeVar("TModel", bound=Model) class Command(BaseCommand): @@ -572,7 +574,7 @@ def _build_time_series_rows_for_metric( ) @staticmethod - def _bulk_create(model, records: Iterable): + def _bulk_create(model: type[TModel], records: Iterable[TModel]) -> list[TModel]: """Materialise and bulk insert a sequence of model instances.""" return model.objects.bulk_create(list(records)) From d26a418b589448affbf259500bb194ec4a9be16a Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Tue, 24 Mar 2026 14:10:08 +0000 Subject: [PATCH 13/36] sonarqube fix #2 --- .../management/commands/seed_random.py | 8 +++-- .../interfaces/management/test_seed_random.py | 30 ++++++++++++++----- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index f52e52fe5..e5e755513 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -5,7 +5,7 @@ from datetime import date, timedelta from decimal import Decimal from operator import itemgetter -from typing import TypeVar, override +from typing import TypeVar, cast, override from django.core.management import CommandParser, call_command from django.core.management.base import BaseCommand @@ -161,8 +161,10 @@ def _seed_metrics_data( geographies = cls._seed_geographies(count=scale_config["geographies"]) - stratum, _ = Stratum.objects.get_or_create(name="All") - age, _ = Age.objects.get_or_create(name="All ages") + stratum_record, _ = Stratum.objects.get_or_create(name="All") + age_record, _ = Age.objects.get_or_create(name="All ages") + stratum = cast(Stratum, stratum_record) + age = cast(Age, age_record) if progress_callback is not None: progress_callback("Generating Core/API time series rows...") diff --git a/tests/unit/metrics/interfaces/management/test_seed_random.py b/tests/unit/metrics/interfaces/management/test_seed_random.py index 77d6e0b90..beb5ada33 100644 --- a/tests/unit/metrics/interfaces/management/test_seed_random.py +++ b/tests/unit/metrics/interfaces/management/test_seed_random.py @@ -1,12 +1,14 @@ from collections.abc import Iterator from contextlib import ExitStack, nullcontext from types import SimpleNamespace +from typing import cast from unittest import mock import pytest from django.core.management import CommandParser from django.core.management.base import CommandError +from metrics.data.models.core_models.supporting import Age, Stratum from metrics.interfaces.management.commands.seed_random import SCALE_CONFIGS, Command MODULE_PATH = "metrics.interfaces.management.commands.seed_random" @@ -33,6 +35,21 @@ def _fake_geography() -> SimpleNamespace: ) +def _fake_stratum() -> Stratum: + return cast(Stratum, SimpleNamespace(name="All")) + + +def _fake_age() -> Age: + return cast(Age, SimpleNamespace(name="All ages")) + + +def _assert_progress_messages(progress_messages: list[str]) -> None: + assert any( + message.startswith("Processed 1/1 metrics") for message in progress_messages + ) + assert any(message.startswith("Inserted ") for message in progress_messages) + + class TestSeedRandomCommand: def test_add_arguments_parses_defaults(self): parser = CommandParser(prog="manage.py seed_random") @@ -275,8 +292,8 @@ def test_seed_time_series_rows_flushes_remainder( core_count, api_count = Command._seed_time_series_rows( metrics=[_fake_metric_hierarchy()], geographies=[_fake_geography()], - stratum=SimpleNamespace(name="All"), - age=SimpleNamespace(name="All ages"), + stratum=_fake_stratum(), + age=_fake_age(), days=1, progress_callback=spy_progress_callback, ) @@ -290,10 +307,7 @@ def test_seed_time_series_rows_flushes_remainder( progress_messages = [ call.args[0] for call in spy_progress_callback.call_args_list ] - assert any( - message.startswith("Processed 1/1 metrics") for message in progress_messages - ) - assert any(message.startswith("Inserted ") for message in progress_messages) + _assert_progress_messages(progress_messages) @mock.patch(f"{MODULE_PATH}.APITimeSeries") @mock.patch(f"{MODULE_PATH}.CoreTimeSeries") @@ -308,8 +322,8 @@ def test_seed_time_series_rows_flushes_at_batch_size( core_count, api_count = Command._seed_time_series_rows( metrics=[_fake_metric_hierarchy()], geographies=[_fake_geography()], - stratum=SimpleNamespace(name="All"), - age=SimpleNamespace(name="All ages"), + stratum=_fake_stratum(), + age=_fake_age(), days=FULL_BATCH_DAYS, ) From d891ef5b9b9ee495b45eaef4e39b718b978caaee Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Tue, 24 Mar 2026 14:12:56 +0000 Subject: [PATCH 14/36] sonarqube fix #3 --- .../management/commands/seed_random.py | 67 ++++++++++++++----- 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index e5e755513..d485f9cb0 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -234,16 +234,20 @@ def _seed_time_series_rows( frequency=frequency, ): core_rows.append(core_row) - if len(core_rows) >= batch_size: - CoreTimeSeries.objects.bulk_create(core_rows, batch_size=batch_size) - core_count += len(core_rows) - core_rows = [] + core_rows, core_count = cls._flush_batch( + model=CoreTimeSeries, + rows=core_rows, + batch_size=batch_size, + current_count=core_count, + ) api_rows.append(api_row) - if len(api_rows) >= batch_size: - APITimeSeries.objects.bulk_create(api_rows, batch_size=batch_size) - api_count += len(api_rows) - api_rows = [] + api_rows, api_count = cls._flush_batch( + model=APITimeSeries, + rows=api_rows, + batch_size=batch_size, + current_count=api_count, + ) if progress_callback is not None and ( metric_index == total_metrics or metric_index % log_interval == 0 @@ -254,13 +258,18 @@ def _seed_time_series_rows( f"({processed_row_count:,}/{total_row_count:,} row groups)." ) - if core_rows: - CoreTimeSeries.objects.bulk_create(core_rows, batch_size=batch_size) - core_count += len(core_rows) - - if api_rows: - APITimeSeries.objects.bulk_create(api_rows, batch_size=batch_size) - api_count += len(api_rows) + core_count = cls._flush_remaining( + model=CoreTimeSeries, + rows=core_rows, + batch_size=batch_size, + current_count=core_count, + ) + api_count = cls._flush_remaining( + model=APITimeSeries, + rows=api_rows, + batch_size=batch_size, + current_count=api_count, + ) if progress_callback is not None: progress_callback( @@ -271,6 +280,34 @@ def _seed_time_series_rows( return core_count, api_count + @staticmethod + def _flush_batch( + *, + model: type[TModel], + rows: list[TModel], + batch_size: int, + current_count: int, + ) -> tuple[list[TModel], int]: + if len(rows) < batch_size: + return rows, current_count + + model.objects.bulk_create(rows, batch_size=batch_size) + return [], current_count + len(rows) + + @staticmethod + def _flush_remaining( + *, + model: type[TModel], + rows: list[TModel], + batch_size: int, + current_count: int, + ) -> int: + if not rows: + return current_count + + model.objects.bulk_create(rows, batch_size=batch_size) + return current_count + len(rows) + @classmethod def _seed_theme_hierarchy(cls) -> tuple[list[Theme], list[SubTheme], list[Topic]]: theme_names, sub_theme_rows, topic_rows = cls._build_theme_hierarchy_records() From 3243354a5fcc379a0b6aa083f9889d7355a1cb26 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Thu, 26 Mar 2026 17:04:14 +0000 Subject: [PATCH 15/36] feat(seed-random): add S3 delivery mode, non-public option, idempotent hierarchy seeding, and end-to-end test coverage --- ingestion/aws_client.py | 26 +- .../management/commands/seed_random.py | 282 +++++++++++------- tests/system/test_seed_random.py | 67 +++++ tests/unit/ingestion/test_aws_client.py | 114 +++---- .../interfaces/management/test_seed_random.py | 233 +++++++++++++-- 5 files changed, 504 insertions(+), 218 deletions(-) create mode 100644 tests/system/test_seed_random.py diff --git a/ingestion/aws_client.py b/ingestion/aws_client.py index b5febae62..0a31f27f8 100644 --- a/ingestion/aws_client.py +++ b/ingestion/aws_client.py @@ -1,4 +1,5 @@ import datetime +import json import logging import boto3 @@ -100,6 +101,15 @@ def move_file_to_failed_folder(self, *, key: str) -> None: self._copy_file_to_failed(key=key) self._delete_file_from_inbound(key=key) + def upload_json_to_inbound(self, *, key: str, payload: dict) -> None: + """Uploads a JSON payload to the inbound folder in the ingest bucket.""" + self._client.put_object( + Bucket=self._bucket_name, + Key=key, + Body=json.dumps(payload).encode("utf-8"), + ContentType="application/json", + ) + def _copy_file_to_processed(self, *, key: str) -> None: """Copies the file matching the given `key` into the processed folder within the s3 bucket @@ -125,9 +135,7 @@ def _copy_file_to_processed(self, *, key: str) -> None: Key=self._build_processed_key(key=key), ) except botocore.client.ClientError: - logger.warning( - "Failed to move `%s` to `%s` folder", key, self._processed_folder - ) + logger.warning("Failed to move `%s` to `%s` folder", key, self._processed_folder) def _copy_file_to_processed_archive(self, *, key: str) -> None: """Copies the file matching the given `key` into the ingest archive s3 bucket @@ -151,9 +159,7 @@ def _copy_file_to_processed_archive(self, *, key: str) -> None: }, ) except botocore.client.ClientError: - logger.warning( - "Failed to move `%s` to `%s` bucket", key, self._archive_bucket_name - ) + logger.warning("Failed to move `%s` to `%s` bucket", key, self._archive_bucket_name) def _copy_file_to_failed(self, *, key: str) -> None: """Copies the file matching the given `key` into the failed folder within the s3 bucket @@ -180,9 +186,7 @@ def _copy_file_to_failed(self, *, key: str) -> None: Key=self._build_failed_key(key=key), ) except botocore.client.ClientError: - logger.warning( - "Failed to move `%s` to `%s` folder", key, self._failed_folder - ) + logger.warning("Failed to move `%s` to `%s` folder", key, self._failed_folder) def _delete_file_from_inbound(self, *, key: str) -> None: """Deletes the file matching the given `key` from the inbound folder within the s3 bucket @@ -205,9 +209,7 @@ def _delete_file_from_inbound(self, *, key: str) -> None: try: self._client.delete_object(Bucket=self._bucket_name, Key=key) except botocore.client.ClientError: - logger.warning( - "Failed to delete `%s` from `%s` folder", key, self._inbound_folder - ) + logger.warning("Failed to delete `%s` from `%s` folder", key, self._inbound_folder) def _get_filename_from_key(self, *, key: str) -> str: """Extracts the filename from the `key` diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index d485f9cb0..f3878ba43 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -2,7 +2,7 @@ import re import time from collections.abc import Callable, Iterable -from datetime import date, timedelta +from datetime import date, datetime, timedelta from decimal import Decimal from operator import itemgetter from typing import TypeVar, cast, override @@ -12,6 +12,7 @@ from django.db import transaction from django.db.models import Model +from ingestion.aws_client import AWSClient from metrics.data.enums import TimePeriod from metrics.data.models.api_models import APITimeSeries from metrics.data.models.core_models.supporting import ( @@ -70,16 +71,28 @@ def add_arguments(self, parser: CommandParser) -> None: default=False, help="Clear existing metrics tables before seeding to avoid duplicates.", ) + parser.add_argument( + "--delivery", + choices=["db", "s3"], + default="db", + help="Delivery mode for metrics dataset: database insert or s3 ingestion files.", + ) + parser.add_argument( + "--non-public", + action="store_true", + default=False, + help="Mark generated metric points as non-public (`is_public=False`).", + ) def handle(self, *args, **options) -> None: started_at = time.perf_counter() dataset: str = options["dataset"] scale: str = options["scale"] truncate_first: bool = options["truncate_first"] + delivery: str = options["delivery"] + is_public: bool = not options["non_public"] - selected_seed = ( - options["seed"] if options["seed"] is not None else int(time.time()) - ) + selected_seed = options["seed"] if options["seed"] is not None else int(time.time()) random.seed(selected_seed) # nosec B311 self.stdout.write(f"Seed used: {selected_seed}") @@ -99,11 +112,19 @@ def handle(self, *args, **options) -> None: if should_seed_metrics: scale_config = SCALE_CONFIGS[scale] self.stderr.write("Seeding metrics dataset...") - counts = self._seed_metrics_data( - scale_config=scale_config, - truncate_first=truncate_first, - progress_callback=self.stderr.write, - ) + if delivery == "s3": + counts = self._seed_metrics_data_to_s3( + scale_config=scale_config, + is_public=is_public, + progress_callback=self.stderr.write, + ) + else: + counts = self._seed_metrics_data( + scale_config=scale_config, + truncate_first=truncate_first, + is_public=is_public, + progress_callback=self.stderr.write, + ) self.stderr.write("Metrics dataset seeding complete.") if should_seed_cms: @@ -126,6 +147,7 @@ def _seed_metrics_data( *, scale_config: dict[str, int], truncate_first: bool, + is_public: bool, progress_callback: Callable[[str], None] | None = None, ) -> dict[str, int]: """Seed supporting metric models and time series rows for the selected scale. @@ -133,6 +155,7 @@ def _seed_metrics_data( Args: scale_config: Scale-specific object counts for generated records. truncate_first: Whether to clear existing metrics-related tables before seeding. + is_public: Whether generated metric rows should be marked as public. progress_callback: Optional callback used to report progress updates. Returns: @@ -174,6 +197,7 @@ def _seed_metrics_data( stratum=stratum, age=age, days=scale_config["days"], + is_public=is_public, progress_callback=progress_callback, ) @@ -187,6 +211,50 @@ def _seed_metrics_data( "APITimeSeries": api_count, } + @classmethod + def _seed_metrics_data_to_s3( + cls, + *, + scale_config: dict[str, int], + is_public: bool, + progress_callback: Callable[[str], None] | None = None, + ) -> dict[str, int]: + if progress_callback is not None: + progress_callback("Generating ingestion payloads for S3 upload...") + + payloads = cls._build_timeseries_ingestion_payloads( + scale_config=scale_config, + is_public=is_public, + ) + client = AWSClient() + uploaded_files = 0 + for payload_index, payload in enumerate(payloads, start=1): + key = cls._build_s3_object_key(payload=payload, payload_index=payload_index) + client.upload_json_to_inbound(key=key, payload=payload) + uploaded_files += 1 + + if progress_callback is not None: + progress_callback(f"Uploaded {uploaded_files:,} files to ingest bucket in/.") + + topic_rows = cls._build_theme_hierarchy_records()[2] + theme_count = len({theme_name for _, _, theme_name in topic_rows}) + sub_theme_count = len({(sub_theme_name, theme_name) for _, sub_theme_name, theme_name in topic_rows}) + topic_count = len( + {(topic_name, sub_theme_name, theme_name) for topic_name, sub_theme_name, theme_name in topic_rows} + ) + geography_count = len(cls._build_geography_seed_values(count=scale_config["geographies"])) + row_count = scale_config["metrics"] * geography_count * scale_config["days"] + + return { + "Theme": theme_count, + "SubTheme": sub_theme_count, + "Topic": topic_count, + "Metric": scale_config["metrics"], + "Geography": geography_count, + "CoreTimeSeries": row_count, + "APITimeSeries": row_count, + } + @classmethod def _truncate_metrics_data(cls) -> None: """Delete all seeded metrics-related rows in dependency-safe order.""" @@ -210,6 +278,7 @@ def _seed_time_series_rows( stratum: Stratum, age: Age, days: int, + is_public: bool, progress_callback: Callable[[str], None] | None = None, ) -> tuple[int, int]: frequency = TimePeriod.Weekly.value @@ -230,6 +299,7 @@ def _seed_time_series_rows( stratum=stratum, age=age, days=days, + is_public=is_public, start_date=start_date, frequency=frequency, ): @@ -249,9 +319,7 @@ def _seed_time_series_rows( current_count=api_count, ) - if progress_callback is not None and ( - metric_index == total_metrics or metric_index % log_interval == 0 - ): + if progress_callback is not None and (metric_index == total_metrics or metric_index % log_interval == 0): processed_row_count = metric_index * len(geographies) * days progress_callback( f"Processed {metric_index}/{total_metrics} metrics " @@ -272,11 +340,7 @@ def _seed_time_series_rows( ) if progress_callback is not None: - progress_callback( - "Inserted " - f"{core_count:,} CoreTimeSeries rows and " - f"{api_count:,} APITimeSeries rows." - ) + progress_callback(f"Inserted {core_count:,} CoreTimeSeries rows and {api_count:,} APITimeSeries rows.") return core_count, api_count @@ -329,20 +393,11 @@ def _upsert_themes( *, theme_names: list[str], ) -> tuple[list[Theme], dict[str, Theme]]: - themes_by_name = { - theme.name: theme for theme in Theme.objects.filter(name__in=theme_names) - } - missing_theme_names = [ - name for name in theme_names if name not in themes_by_name - ] + themes_by_name = {theme.name: theme for theme in Theme.objects.filter(name__in=theme_names)} + missing_theme_names = [name for name in theme_names if name not in themes_by_name] if missing_theme_names: cls._bulk_create(Theme, [Theme(name=name) for name in missing_theme_names]) - themes_by_name.update( - { - theme.name: theme - for theme in Theme.objects.filter(name__in=missing_theme_names) - } - ) + themes_by_name.update({theme.name: theme for theme in Theme.objects.filter(name__in=missing_theme_names)}) return [themes_by_name[name] for name in theme_names], themes_by_name @classmethod @@ -358,10 +413,7 @@ def _upsert_sub_themes( theme__name__in=theme_names, name__in={name for name, _ in sub_theme_keys}, ) - sub_themes_by_key = { - (sub_theme.name, sub_theme.theme.name): sub_theme - for sub_theme in existing_sub_themes - } + sub_themes_by_key = {(sub_theme.name, sub_theme.theme.name): sub_theme for sub_theme in existing_sub_themes} missing_sub_theme_keys = [ (sub_theme_name, theme_name) for sub_theme_name, theme_name in sub_theme_keys @@ -379,13 +431,8 @@ def _upsert_sub_themes( { (sub_theme.name, sub_theme.theme.name): sub_theme for sub_theme in SubTheme.objects.select_related("theme").filter( - theme__name__in={ - theme_name for _, theme_name in missing_sub_theme_keys - }, - name__in={ - sub_theme_name - for sub_theme_name, _ in missing_sub_theme_keys - }, + theme__name__in={theme_name for _, theme_name in missing_sub_theme_keys}, + name__in={sub_theme_name for sub_theme_name, _ in missing_sub_theme_keys}, ) } ) @@ -400,21 +447,15 @@ def _upsert_topics( ) -> list[Topic]: topic_keys = list(dict.fromkeys(topic_rows)) sub_themes_by_id_key = { - (sub_theme_name, theme_name): sub_themes_by_key[ - (sub_theme_name, theme_name) - ] + (sub_theme_name, theme_name): sub_themes_by_key[(sub_theme_name, theme_name)] for _, sub_theme_name, theme_name in topic_keys } - candidate_sub_theme_ids = [ - sub_theme.id for sub_theme in sub_themes_by_id_key.values() - ] + candidate_sub_theme_ids = [sub_theme.id for sub_theme in sub_themes_by_id_key.values()] existing_topics = Topic.objects.filter( sub_theme_id__in=candidate_sub_theme_ids, name__in={topic_name for topic_name, _, _ in topic_keys}, ) - topics_by_key = { - (topic.name, topic.sub_theme_id): topic for topic in existing_topics - } + topics_by_key = {(topic.name, topic.sub_theme_id): topic for topic in existing_topics} missing_topic_keys = [ topic_key for topic_key in topic_keys @@ -443,9 +484,7 @@ def _upsert_topics( sub_themes_by_id_key[(sub_theme_name, theme_name)].id for _, sub_theme_name, theme_name in missing_topic_keys ], - name__in={ - topic_name for topic_name, _, _ in missing_topic_keys - }, + name__in={topic_name for topic_name, _, _ in missing_topic_keys}, ) } ) @@ -462,19 +501,13 @@ def _upsert_topics( @classmethod def _seed_geographies(cls, *, count: int) -> list[Geography]: geography_seed_values = cls._build_geography_seed_values(count=count) - geography_type_names = { - record["geography_type"] for record in geography_seed_values - } + geography_type_names = {record["geography_type"] for record in geography_seed_values} geography_type_names = sorted(geography_type_names) geography_types_by_name = { geography_type.name: geography_type - for geography_type in GeographyType.objects.filter( - name__in=geography_type_names - ) + for geography_type in GeographyType.objects.filter(name__in=geography_type_names) } - missing_geography_type_names = [ - name for name in geography_type_names if name not in geography_types_by_name - ] + missing_geography_type_names = [name for name in geography_type_names if name not in geography_types_by_name] if missing_geography_type_names: cls._bulk_create( GeographyType, @@ -483,32 +516,22 @@ def _seed_geographies(cls, *, count: int) -> list[Geography]: geography_types_by_name.update( { geography_type.name: geography_type - for geography_type in GeographyType.objects.filter( - name__in=missing_geography_type_names - ) + for geography_type in GeographyType.objects.filter(name__in=missing_geography_type_names) } ) - geography_types_by_name = { - name: geography_types_by_name[name] for name in geography_type_names - } + geography_types_by_name = {name: geography_types_by_name[name] for name in geography_type_names} geography_keys = list( dict.fromkeys( - (record["name"], record["geography_type"], record["geography_code"]) - for record in geography_seed_values + (record["name"], record["geography_type"], record["geography_code"]) for record in geography_seed_values ) ) - existing_geographies = Geography.objects.select_related( - "geography_type" - ).filter( + existing_geographies = Geography.objects.select_related("geography_type").filter( name__in={name for name, _, _ in geography_keys}, - geography_type__name__in={ - geography_type for _, geography_type, _ in geography_keys - }, + geography_type__name__in={geography_type for _, geography_type, _ in geography_keys}, ) geographies_by_key = { - (geography.name, geography.geography_type.name): geography - for geography in existing_geographies + (geography.name, geography.geography_type.name): geography for geography in existing_geographies } missing_geography_keys = [ (name, geography_type, geography_code) @@ -530,22 +553,14 @@ def _seed_geographies(cls, *, count: int) -> list[Geography]: geographies_by_key.update( { (geography.name, geography.geography_type.name): geography - for geography in Geography.objects.select_related( - "geography_type" - ).filter( + for geography in Geography.objects.select_related("geography_type").filter( name__in={name for name, _, _ in missing_geography_keys}, - geography_type__name__in={ - geography_type - for _, geography_type, _ in missing_geography_keys - }, + geography_type__name__in={geography_type for _, geography_type, _ in missing_geography_keys}, ) } ) - return [ - geographies_by_key[(name, geography_type)] - for name, geography_type, _ in geography_keys - ] + return [geographies_by_key[(name, geography_type)] for name, geography_type, _ in geography_keys] @classmethod def _build_time_series_rows_for_metric( @@ -556,6 +571,7 @@ def _build_time_series_rows_for_metric( stratum: Stratum, age: Age, days: int, + is_public: bool, start_date: date, frequency: str, ) -> Iterable[tuple[CoreTimeSeries, APITimeSeries]]: @@ -568,8 +584,7 @@ def _build_time_series_rows_for_metric( current_date = start_date + timedelta(days=day_offset) base_value = random.uniform(5.0, 250.0) # noqa: S311 # nosec B311 metric_value = round( - base_value - + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 + base_value + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 2, ) sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 @@ -588,7 +603,7 @@ def _build_time_series_rows_for_metric( epiweek=epidemiological_week, date=current_date, metric_value=Decimal(str(metric_value)), - is_public=True, + is_public=is_public, ), APITimeSeries( metric_frequency=frequency, @@ -608,10 +623,82 @@ def _build_time_series_rows_for_metric( epiweek=epidemiological_week, date=current_date, metric_value=float(metric_value), - is_public=True, + is_public=is_public, ), ) + @classmethod + def _build_timeseries_ingestion_payloads( + cls, + *, + scale_config: dict[str, int], + is_public: bool, + ) -> list[dict[str, object]]: + _, _, topic_rows = cls._build_theme_hierarchy_records() + geographies = cls._build_geography_seed_values(count=scale_config["geographies"]) + refresh_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + start_date = date.today() - timedelta(days=scale_config["days"] - 1) + payloads: list[dict[str, object]] = [] + + for metric_index in range(scale_config["metrics"]): + topic_name, sub_theme_name, theme_name = topic_rows[metric_index % len(topic_rows)] + metric_name = f"{topic_name}_cases_randomByDay_{metric_index + 1}" + for geography in geographies: + time_series_rows: list[dict[str, object]] = [] + for day_offset in range(scale_config["days"]): + current_date = start_date + timedelta(days=day_offset) + metric_value = round( + random.uniform(5.0, 250.0), # noqa: S311 # nosec B311 + 2, + ) + time_series_rows.append( + { + "epiweek": current_date.isocalendar().week, + "date": current_date.isoformat(), + "metric_value": metric_value, + "embargo": None, + "is_public": is_public, + } + ) + + payloads.append( + { + "parent_theme": theme_name, + "child_theme": sub_theme_name, + "topic": topic_name, + "metric_group": "cases", + "metric": metric_name, + "metric_frequency": TimePeriod.Weekly.value, + "geography_type": geography["geography_type"], + "geography": geography["name"], + "geography_code": geography["geography_code"], + "age": "all", + "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # noqa: S311 # nosec B311 + "stratum": "default", + "refresh_date": refresh_date, + "time_series": time_series_rows, + } + ) + + return payloads + + @classmethod + def _build_s3_object_key( + cls, + *, + payload: dict[str, object], + payload_index: int, + ) -> str: + topic_name = str(payload["topic"]) + metric_name = str(payload["metric"]) + geography_code = str(payload["geography_code"]) + age = str(payload["age"]) + sex = str(payload["sex"]) + stratum = str(payload["stratum"]) + safe_topic = cls._normalise_key(topic_name) + safe_metric = cls._normalise_key(metric_name) + return f"in/{safe_topic}_cases_{safe_metric}_{geography_code}_{age}_{sex}_{stratum}_{payload_index}.json" + @staticmethod def _bulk_create(model: type[TModel], records: Iterable[TModel]) -> list[TModel]: """Materialise and bulk insert a sequence of model instances.""" @@ -620,9 +707,7 @@ def _bulk_create(model: type[TModel], records: Iterable[TModel]) -> list[TModel] @staticmethod def _get_next_random_metric_index() -> int: max_metric_index = 0 - for metric_name in Metric.objects.filter( - name__startswith="Random Metric " - ).values_list( + for metric_name in Metric.objects.filter(name__startswith="Random Metric ").values_list( "name", flat=True, ): @@ -661,8 +746,7 @@ def _build_theme_hierarchy_records( parent_theme_name = child_to_parent[sub_theme_name] sub_theme_pairs.add((sub_theme_name, parent_theme_name)) topic_rows.extend( - (topic_value, sub_theme_name, parent_theme_name) - for topic_value in topic_group.return_list() + (topic_value, sub_theme_name, parent_theme_name) for topic_value in topic_group.return_list() ) theme_names = sorted({parent_name for _, parent_name in sub_theme_pairs}) @@ -699,9 +783,7 @@ def _build_geography_seed_values(cls, *, count: int) -> list[dict[str, str]]: { "name": cls._format_enum_name(ltla.name), "geography_code": ltla.value, - "geography_type": ( - validation_enums.GeographyType.LOWER_TIER_LOCAL_AUTHORITY.value - ), + "geography_type": (validation_enums.GeographyType.LOWER_TIER_LOCAL_AUTHORITY.value), } for ltla in list(validation_enums.LTLAs)[:extra_required] ) diff --git a/tests/system/test_seed_random.py b/tests/system/test_seed_random.py new file mode 100644 index 000000000..1d392bbc9 --- /dev/null +++ b/tests/system/test_seed_random.py @@ -0,0 +1,67 @@ +import pytest +from django.core.management import call_command +from rest_framework.test import APIClient + +from metrics.data.models.api_models import APITimeSeries +from metrics.data.models.core_models.supporting import Geography, Metric +from metrics.data.models.core_models.timeseries import CoreTimeSeries + +EXPECTED_METRIC_COUNT = 10 +EXPECTED_GEOGRAPHY_COUNT = 5 +EXPECTED_TIME_SERIES_COUNT = 1_500 +HTTP_OK = 200 + + +class TestSeedRandomCommand: + @pytest.mark.django_db + def test_command_seeds_metrics_dataset_and_data_is_queryable_via_api(self): + """ + Given an empty metrics dataset + When the `seed_random` management command is run for small metrics scale + Then the expected amount of data is inserted + And it can be queried from the public tables API endpoint + """ + # Given + assert Metric.objects.count() == 0 + assert Geography.objects.count() == 0 + assert CoreTimeSeries.objects.count() == 0 + assert APITimeSeries.objects.count() == 0 + + # When + call_command( + "seed_random", + dataset="metrics", + scale="small", + seed=12345, + truncate_first=True, + ) + + # Then + assert Metric.objects.count() == EXPECTED_METRIC_COUNT + assert Geography.objects.count() == EXPECTED_GEOGRAPHY_COUNT + assert CoreTimeSeries.objects.count() == EXPECTED_TIME_SERIES_COUNT + assert APITimeSeries.objects.count() == EXPECTED_TIME_SERIES_COUNT + + sample_row = APITimeSeries.objects.order_by("id").first() + assert sample_row is not None + + api_client = APIClient() + response = api_client.post( + path="/api/tables/v4/", + data={ + "file_format": "svg", + "plots": [ + { + "topic": sample_row.topic, + "metric": sample_row.metric, + "date_from": "2020-01-01", + "chart_type": "bar", + } + ], + }, + format="json", + ) + + assert response.status_code == HTTP_OK + assert len(response.data) > 0 + assert len(response.data[0]["values"]) > 0 diff --git a/tests/unit/ingestion/test_aws_client.py b/tests/unit/ingestion/test_aws_client.py index 578feb7bc..ae7d9d72c 100644 --- a/tests/unit/ingestion/test_aws_client.py +++ b/tests/unit/ingestion/test_aws_client.py @@ -21,9 +21,7 @@ def aws_client_with_mocked_boto_client() -> AWSClient: class TestAWSClient: # Tests for the `__init__` @mock.patch.object(AWSClient, "create_client") - def test_create_client_called_when_client_not_provided( - self, spy_create_client: mock.MagicMock - ): + def test_create_client_called_when_client_not_provided(self, spy_create_client: mock.MagicMock): """ Given no provided boto3 client When an instance of `AWSClient` is created @@ -109,15 +107,11 @@ def test_create_client(self, spy_boto3: mock.MagicMock): boto3_client = AWSClient.create_client(profile_name=aws_profile_name) # Then - spy_boto3.setup_default_session.assert_called_once_with( - profile_name=aws_profile_name - ) + spy_boto3.setup_default_session.assert_called_once_with(profile_name=aws_profile_name) assert boto3_client == spy_boto3.client.return_value @mock.patch(f"{MODULE_PATH}.boto3") - def test_create_client_does_not_setup_default_session_for_no_profile_name( - self, spy_boto3: mock.MagicMock - ): + def test_create_client_does_not_setup_default_session_for_no_profile_name(self, spy_boto3: mock.MagicMock): """ Given no provided AWS profile name When `create_client()` is called from the `AWSClient` class @@ -141,9 +135,7 @@ def test_create_client_does_not_setup_default_session_for_no_profile_name( # Tests for the `move_file_to_processed_folder()` method - def test_move_file_to_processed_folder( - self, aws_client_with_mocked_boto_client: AWSClient - ): + def test_move_file_to_processed_folder(self, aws_client_with_mocked_boto_client: AWSClient): """ Given a fake key for an item When `move_file_to_processed_folder()` @@ -157,17 +149,13 @@ def test_move_file_to_processed_folder( fake_archive_bucket_name = "fake-archive-bucket" spy_client = aws_client_with_mocked_boto_client._client aws_client_with_mocked_boto_client._bucket_name = fake_bucket_name - aws_client_with_mocked_boto_client._archive_bucket_name = ( - fake_archive_bucket_name - ) + aws_client_with_mocked_boto_client._archive_bucket_name = fake_archive_bucket_name # When aws_client_with_mocked_boto_client.move_file_to_processed_folder(key=fake_key) # Then - processed_key: str = aws_client_with_mocked_boto_client._build_processed_key( - key=fake_key - ) + processed_key: str = aws_client_with_mocked_boto_client._build_processed_key(key=fake_key) # Check that the call to copy the file is made correctly expected_copy_file_to_processed_call = mock.call.copy( CopySource={"Bucket": fake_bucket_name, "Key": fake_key}, @@ -178,18 +166,14 @@ def test_move_file_to_processed_folder( expected_copy_file_to_processed_archive_call = mock.call.copy( CopySource={"Bucket": fake_bucket_name, "Key": fake_key}, Bucket=fake_archive_bucket_name, - Key=aws_client_with_mocked_boto_client._build_processed_archive_key( - key=fake_key - ), + Key=aws_client_with_mocked_boto_client._build_processed_archive_key(key=fake_key), ExtraArgs={ "StorageClass": "GLACIER_IR", "MetadataDirective": "COPY", }, ) # Check that the call to delete the origin file is made correctly - expected_delete_file_from_origin_call = mock.call.delete_object( - Bucket=fake_bucket_name, Key=fake_key - ) + expected_delete_file_from_origin_call = mock.call.delete_object(Bucket=fake_bucket_name, Key=fake_key) expected_calls = [ expected_copy_file_to_processed_call, expected_copy_file_to_processed_archive_call, @@ -215,28 +199,17 @@ def test_move_file_to_processed_folder_records_correct_log( aws_client_with_mocked_boto_client.move_file_to_processed_folder(key=fake_key) # Then - expected_filename: str = ( - aws_client_with_mocked_boto_client._get_filename_from_key(key=fake_key) - ) - expected_inbound_folder: str = ( - aws_client_with_mocked_boto_client._inbound_folder - ) - expected_processed_folder: str = ( - aws_client_with_mocked_boto_client._processed_folder - ) + expected_filename: str = aws_client_with_mocked_boto_client._get_filename_from_key(key=fake_key) + expected_inbound_folder: str = aws_client_with_mocked_boto_client._inbound_folder + expected_processed_folder: str = aws_client_with_mocked_boto_client._processed_folder expected_log = ( - f"Moving `{expected_filename}` " - f"from `{expected_inbound_folder}` " - f"to `{expected_processed_folder}` " - f"in s3" + f"Moving `{expected_filename}` from `{expected_inbound_folder}` to `{expected_processed_folder}` in s3" ) assert expected_log in caplog.text # Tests for the `move_file_to_failed_folder()` method - def test_move_file_to_failed_folder( - self, aws_client_with_mocked_boto_client: AWSClient - ): + def test_move_file_to_failed_folder(self, aws_client_with_mocked_boto_client: AWSClient): """ Given a fake key for an item When `move_file_to_failed_folder()` @@ -254,9 +227,7 @@ def test_move_file_to_failed_folder( # Then bucket_name: str = aws_client_with_mocked_boto_client._bucket_name - failed_key: str = aws_client_with_mocked_boto_client._build_failed_key( - key=fake_key - ) + failed_key: str = aws_client_with_mocked_boto_client._build_failed_key(key=fake_key) # Check that the call to copy the file is made correctly expected_copy_file_to_failed_call = mock.call.copy( @@ -265,9 +236,7 @@ def test_move_file_to_failed_folder( Key=failed_key, ) # Check that the call to delete the origin file is made correctly - expected_delete_file_from_origin_call = mock.call.delete_object( - Bucket=bucket_name, Key=fake_key - ) + expected_delete_file_from_origin_call = mock.call.delete_object(Bucket=bucket_name, Key=fake_key) expected_calls = [ expected_copy_file_to_failed_call, expected_delete_file_from_origin_call, @@ -292,18 +261,11 @@ def test_move_file_to_failed_folder_records_correct_log( aws_client_with_mocked_boto_client.move_file_to_failed_folder(key=fake_key) # Then - expected_filename: str = ( - aws_client_with_mocked_boto_client._get_filename_from_key(key=fake_key) - ) - expected_inbound_folder: str = ( - aws_client_with_mocked_boto_client._inbound_folder - ) + expected_filename: str = aws_client_with_mocked_boto_client._get_filename_from_key(key=fake_key) + expected_inbound_folder: str = aws_client_with_mocked_boto_client._inbound_folder expected_failed_folder: str = aws_client_with_mocked_boto_client._failed_folder expected_log = ( - f"Moving `{expected_filename}` " - f"from `{expected_inbound_folder}` " - f"to `{expected_failed_folder}` " - f"in s3" + f"Moving `{expected_filename}` from `{expected_inbound_folder}` to `{expected_failed_folder}` in s3" ) assert expected_log in caplog.text @@ -500,9 +462,7 @@ def test_copy_file_to_processed_archive_records_log_when_client_error_occurs( aws_client_with_mocked_boto_client._copy_file_to_processed_archive(key=key) # Then - _archive_bucket_name: str = ( - aws_client_with_mocked_boto_client._archive_bucket_name - ) + _archive_bucket_name: str = aws_client_with_mocked_boto_client._archive_bucket_name expected_log = f"Failed to move `{key}` to `{_archive_bucket_name}` bucket" assert expected_log in caplog.text @@ -550,9 +510,7 @@ def test_get_filename_from_key(self, aws_client_with_mocked_boto_client: AWSClie # This is similar to a filepath as we would see it on a filesystem # When - filename: str = aws_client_with_mocked_boto_client._get_filename_from_key( - key=key - ) + filename: str = aws_client_with_mocked_boto_client._get_filename_from_key(key=key) # Then assert filename == FAKE_FILE_NAME @@ -568,9 +526,7 @@ def test_build_processed_key(self, aws_client_with_mocked_boto_client: AWSClient fake_key = FAKE_KEY # When - processed_key: str = aws_client_with_mocked_boto_client._build_processed_key( - key=fake_key - ) + processed_key: str = aws_client_with_mocked_boto_client._build_processed_key(key=fake_key) # Then assert processed_key == f"processed/{FAKE_FILE_NAME}" @@ -586,17 +542,13 @@ def test_build_failed_key(self, aws_client_with_mocked_boto_client: AWSClient): fake_key = FAKE_KEY # When - failed_key: str = aws_client_with_mocked_boto_client._build_failed_key( - key=fake_key - ) + failed_key: str = aws_client_with_mocked_boto_client._build_failed_key(key=fake_key) # Then assert failed_key == f"failed/{FAKE_FILE_NAME}" @freezegun.freeze_time("2025-01-01") - def test_build_processed_archive_key( - self, aws_client_with_mocked_boto_client: AWSClient - ): + def test_build_processed_archive_key(self, aws_client_with_mocked_boto_client: AWSClient): """ Given a key from the s3 bucket for an item When `_build_processed_archive_key()` is called @@ -607,12 +559,22 @@ def test_build_processed_archive_key( fake_key = FAKE_KEY # When - processed_archive_key: str = ( - aws_client_with_mocked_boto_client._build_processed_archive_key( - key=fake_key - ) - ) + processed_archive_key: str = aws_client_with_mocked_boto_client._build_processed_archive_key(key=fake_key) # Then expected_key = f"processed/2025-01-01/COVID-19/{FAKE_FILE_NAME}" assert processed_archive_key == expected_key + + def test_upload_json_to_inbound_delegates_to_put_object(self, aws_client_with_mocked_boto_client: AWSClient): + payload = {"key": "value"} + + aws_client_with_mocked_boto_client.upload_json_to_inbound( + key="in/sample.json", + payload=payload, + ) + + aws_client_with_mocked_boto_client._client.put_object.assert_called_once() + kwargs = aws_client_with_mocked_boto_client._client.put_object.call_args.kwargs + assert kwargs["Bucket"] == aws_client_with_mocked_boto_client._bucket_name + assert kwargs["Key"] == "in/sample.json" + assert kwargs["ContentType"] == "application/json" diff --git a/tests/unit/metrics/interfaces/management/test_seed_random.py b/tests/unit/metrics/interfaces/management/test_seed_random.py index beb5ada33..c533e6fcd 100644 --- a/tests/unit/metrics/interfaces/management/test_seed_random.py +++ b/tests/unit/metrics/interfaces/management/test_seed_random.py @@ -17,6 +17,8 @@ LARGE_GEO_COUNT = 7 EXPECTED_BULK_CREATE_CALLS = 2 EXPECTED_NEXT_METRIC_INDEX = 11 +EXPECTED_TIME_SERIES_POINTS = 2 +EXPECTED_METRIC_VALUE = 123.45 def _fake_metric_hierarchy() -> SimpleNamespace: @@ -44,9 +46,7 @@ def _fake_age() -> Age: def _assert_progress_messages(progress_messages: list[str]) -> None: - assert any( - message.startswith("Processed 1/1 metrics") for message in progress_messages - ) + assert any(message.startswith("Processed 1/1 metrics") for message in progress_messages) assert any(message.startswith("Inserted ") for message in progress_messages) @@ -61,6 +61,8 @@ def test_add_arguments_parses_defaults(self): assert options.scale == "small" assert options.seed is None assert options.truncate_first is False + assert options.delivery == "db" + assert options.non_public is False @mock.patch(f"{MODULE_PATH}.random.seed") @mock.patch(f"{MODULE_PATH}.call_command") @@ -86,12 +88,20 @@ def test_handle_metrics_dataset( "APITimeSeries": 1, } - Command().handle(dataset="metrics", scale="small", truncate_first=True, seed=42) + Command().handle( + dataset="metrics", + scale="small", + truncate_first=True, + seed=42, + delivery="db", + non_public=False, + ) spy_random_seed.assert_called_once_with(42) spy_seed_metrics_data.assert_called_once_with( scale_config=SCALE_CONFIGS["small"], truncate_first=True, + is_public=True, progress_callback=mock.ANY, ) spy_call_command.assert_not_called() @@ -121,7 +131,14 @@ def test_handle_cms_dataset_uses_time_seed_and_builds_cms( spy_perf_counter.side_effect = [20.0, 22.0] spy_time.return_value = 1234 - Command().handle(dataset="cms", scale="large", truncate_first=False, seed=None) + Command().handle( + dataset="cms", + scale="large", + truncate_first=False, + seed=None, + delivery="db", + non_public=False, + ) spy_random_seed.assert_called_once_with(1234) spy_seed_metrics_data.assert_not_called() @@ -142,6 +159,54 @@ def test_handle_cms_dataset_uses_time_seed_and_builds_cms( runtime_seconds=2.0, ) + @mock.patch(f"{MODULE_PATH}.random.seed") + @mock.patch.object(Command, "_seed_metrics_data") + @mock.patch.object(Command, "_seed_metrics_data_to_s3") + @mock.patch.object(Command, "_print_summary") + @mock.patch(f"{MODULE_PATH}.time.perf_counter") + def test_handle_metrics_dataset_s3_delivery( + self, + spy_perf_counter: mock.MagicMock, + spy_print_summary: mock.MagicMock, + spy_seed_metrics_data_to_s3: mock.MagicMock, + spy_seed_metrics_data: mock.MagicMock, + spy_random_seed: mock.MagicMock, + ): + spy_perf_counter.side_effect = [11.0, 13.0] + spy_seed_metrics_data_to_s3.return_value = { + "Theme": 1, + "SubTheme": 1, + "Topic": 1, + "Metric": 1, + "Geography": 1, + "CoreTimeSeries": 10, + "APITimeSeries": 10, + } + + Command().handle( + dataset="metrics", + scale="small", + truncate_first=False, + seed=99, + delivery="s3", + non_public=True, + ) + + spy_random_seed.assert_called_once_with(99) + spy_seed_metrics_data.assert_not_called() + spy_seed_metrics_data_to_s3.assert_called_once_with( + scale_config=SCALE_CONFIGS["small"], + is_public=False, + progress_callback=mock.ANY, + ) + spy_print_summary.assert_called_once_with( + dataset="metrics", + scale="small", + seed=99, + counts=spy_seed_metrics_data_to_s3.return_value, + runtime_seconds=2.0, + ) + @mock.patch.object(Command, "_truncate_metrics_data") @mock.patch.object(Command, "_seed_time_series_rows") @mock.patch.object(Command, "_seed_geographies") @@ -191,12 +256,7 @@ def test_seed_metrics_data_builds_expected_counts_and_calls( sub_theme=sub_themes[1], ), ] - metrics = [ - SimpleNamespace( - name=f"Metric {index + 1}", topic=topics[index % len(topics)] - ) - for index in range(4) - ] + metrics = [SimpleNamespace(name=f"Metric {index + 1}", topic=topics[index % len(topics)]) for index in range(4)] geography_types = [ SimpleNamespace(name="Nation"), SimpleNamespace(name="Lower Tier Local Authority"), @@ -220,6 +280,7 @@ def test_seed_metrics_data_builds_expected_counts_and_calls( result = Command._seed_metrics_data( scale_config={"geographies": 2, "metrics": 4, "days": 9}, truncate_first=True, + is_public=False, progress_callback=spy_progress_callback, ) @@ -239,11 +300,10 @@ def test_seed_metrics_data_builds_expected_counts_and_calls( stratum=spy_stratum_get_or_create.return_value[0], age=spy_age_get_or_create.return_value[0], days=9, + is_public=False, progress_callback=spy_progress_callback, ) - spy_progress_callback.assert_any_call( - "Preparing metric taxonomy and geography records..." - ) + spy_progress_callback.assert_any_call("Preparing metric taxonomy and geography records...") spy_progress_callback.assert_any_call("Generating Core/API time series rows...") def test_truncate_metrics_data_deletes_from_all_models(self): @@ -265,9 +325,7 @@ def test_truncate_metrics_data_deletes_from_all_models(self): for model_name in model_names: manager = mock.MagicMock() managers[model_name] = manager - stack.enter_context( - mock.patch(f"{MODULE_PATH}.{model_name}.objects", manager) - ) + stack.enter_context(mock.patch(f"{MODULE_PATH}.{model_name}.objects", manager)) Command._truncate_metrics_data() @@ -295,6 +353,7 @@ def test_seed_time_series_rows_flushes_remainder( stratum=_fake_stratum(), age=_fake_age(), days=1, + is_public=False, progress_callback=spy_progress_callback, ) @@ -304,9 +363,9 @@ def test_seed_time_series_rows_flushes_remainder( spy_api_time_series.objects.bulk_create.assert_called_once() assert spy_core_time_series.call_args.kwargs["sex"] == "f" assert spy_api_time_series.call_args.kwargs["sex"] == "f" - progress_messages = [ - call.args[0] for call in spy_progress_callback.call_args_list - ] + assert spy_core_time_series.call_args.kwargs["is_public"] is False + assert spy_api_time_series.call_args.kwargs["is_public"] is False + progress_messages = [call.args[0] for call in spy_progress_callback.call_args_list] _assert_progress_messages(progress_messages) @mock.patch(f"{MODULE_PATH}.APITimeSeries") @@ -325,6 +384,7 @@ def test_seed_time_series_rows_flushes_at_batch_size( stratum=_fake_stratum(), age=_fake_age(), days=FULL_BATCH_DAYS, + is_public=True, ) assert core_count == FULL_BATCH_DAYS @@ -400,10 +460,7 @@ def test_build_theme_hierarchy_records_contains_expected_real_values(): assert "infectious_disease" in theme_names assert any(sub_theme == "respiratory" for sub_theme, _ in sub_theme_rows) - assert any( - topic == "COVID-19" and sub_theme == "respiratory" - for topic, sub_theme, _ in topic_rows - ) + assert any(topic == "COVID-19" and sub_theme == "respiratory" for topic, sub_theme, _ in topic_rows) def test_build_theme_hierarchy_records_skips_unmatched_topic_group(): @@ -431,9 +488,7 @@ def test_build_geography_seed_values_returns_required_count(): def test_format_enum_name_replaces_underscores_and_title_cases(): - assert Command._format_enum_name("LOWER_TIER_LOCAL_AUTHORITY") == ( - "Lower Tier Local Authority" - ) + assert Command._format_enum_name("LOWER_TIER_LOCAL_AUTHORITY") == ("Lower Tier Local Authority") @mock.patch.object(Command, "_upsert_topics") @@ -608,9 +663,7 @@ def test_seed_geographies_creates_missing_types_and_geographies( result = Command._seed_geographies(count=2) - assert [ - (geography.name, geography.geography_type.name) for geography in result - ] == [ + assert [(geography.name, geography.geography_type.name) for geography in result] == [ ("England", "Nation"), ("Area 2", "Lower Tier Local Authority"), ] @@ -688,3 +741,123 @@ def test_get_next_random_metric_index_defaults_to_one_when_no_matches( result = Command._get_next_random_metric_index() assert result == 1 + + +@mock.patch(f"{MODULE_PATH}.AWSClient") +@mock.patch.object(Command, "_build_s3_object_key") +@mock.patch.object(Command, "_build_geography_seed_values") +@mock.patch.object(Command, "_build_theme_hierarchy_records") +@mock.patch.object(Command, "_build_timeseries_ingestion_payloads") +def test_seed_metrics_data_to_s3_uploads_payloads_and_returns_counts( + spy_build_payloads: mock.MagicMock, + spy_build_theme_hierarchy_records: mock.MagicMock, + spy_build_geography_seed_values: mock.MagicMock, + spy_build_s3_object_key: mock.MagicMock, + spy_aws_client: mock.MagicMock, +): + spy_progress_callback = mock.MagicMock() + payload = { + "topic": "COVID-19", + "metric": "COVID-19_cases_randomByDay_1", + "geography_code": "E92000001", + "age": "all", + "sex": "all", + "stratum": "default", + } + spy_build_payloads.return_value = [payload] + spy_build_s3_object_key.return_value = "in/key.json" + spy_build_theme_hierarchy_records.return_value = ( + [], + [], + [("COVID-19", "respiratory", "infectious_disease")], + ) + spy_build_geography_seed_values.return_value = [ + { + "name": "England", + "geography_code": "E92000001", + "geography_type": "Nation", + } + ] + + result = Command._seed_metrics_data_to_s3( + scale_config={"geographies": 1, "metrics": 2, "days": 3}, + is_public=False, + progress_callback=spy_progress_callback, + ) + + assert result == { + "Theme": 1, + "SubTheme": 1, + "Topic": 1, + "Metric": 2, + "Geography": 1, + "CoreTimeSeries": 6, + "APITimeSeries": 6, + } + spy_aws_client.return_value.upload_json_to_inbound.assert_called_once_with( + key="in/key.json", + payload=payload, + ) + spy_progress_callback.assert_any_call("Generating ingestion payloads for S3 upload...") + spy_progress_callback.assert_any_call("Uploaded 1 files to ingest bucket in/.") + + +@mock.patch(f"{MODULE_PATH}.random.choice") +@mock.patch(f"{MODULE_PATH}.random.uniform") +@mock.patch.object(Command, "_build_geography_seed_values") +@mock.patch.object(Command, "_build_theme_hierarchy_records") +def test_build_timeseries_ingestion_payloads_builds_expected_shape( + spy_build_theme_hierarchy_records: mock.MagicMock, + spy_build_geography_seed_values: mock.MagicMock, + spy_random_uniform: mock.MagicMock, + spy_random_choice: mock.MagicMock, +): + spy_build_theme_hierarchy_records.return_value = ( + [], + [], + [("COVID-19", "respiratory", "infectious_disease")], + ) + spy_build_geography_seed_values.return_value = [ + { + "name": "England", + "geography_code": "E92000001", + "geography_type": "Nation", + } + ] + spy_random_uniform.return_value = EXPECTED_METRIC_VALUE + spy_random_choice.return_value = "all" + + payloads = Command._build_timeseries_ingestion_payloads( + scale_config={"geographies": 1, "metrics": 1, "days": 2}, + is_public=True, + ) + + assert len(payloads) == 1 + payload = payloads[0] + assert payload["parent_theme"] == "infectious_disease" + assert payload["child_theme"] == "respiratory" + assert payload["topic"] == "COVID-19" + assert payload["metric_group"] == "cases" + assert payload["geography"] == "England" + assert payload["geography_code"] == "E92000001" + assert payload["age"] == "all" + assert payload["sex"] == "all" + assert payload["stratum"] == "default" + assert len(payload["time_series"]) == EXPECTED_TIME_SERIES_POINTS + assert payload["time_series"][0]["metric_value"] == EXPECTED_METRIC_VALUE + assert payload["time_series"][0]["is_public"] is True + + +def test_build_s3_object_key_builds_expected_file_name(): + payload = { + "topic": "COVID-19", + "metric": "COVID-19_cases_randomByDay_1", + "geography_code": "E92000001", + "age": "all", + "sex": "f", + "stratum": "default", + } + + result = Command._build_s3_object_key(payload=payload, payload_index=7) + + assert result == ("in/covid_19_cases_covid_19_cases_randombyday_1_E92000001_all_f_default_7.json") From e74869e972362ea83dcfa31362684a6ea2324366 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Thu, 26 Mar 2026 17:25:16 +0000 Subject: [PATCH 16/36] updated test seed random --- tests/system/test_seed_random.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/tests/system/test_seed_random.py b/tests/system/test_seed_random.py index 1d392bbc9..d60dbf767 100644 --- a/tests/system/test_seed_random.py +++ b/tests/system/test_seed_random.py @@ -1,3 +1,5 @@ +from urllib.parse import quote + import pytest from django.core.management import call_command from rest_framework.test import APIClient @@ -46,22 +48,17 @@ def test_command_seeds_metrics_dataset_and_data_is_queryable_via_api(self): assert sample_row is not None api_client = APIClient() - response = api_client.post( - path="/api/tables/v4/", - data={ - "file_format": "svg", - "plots": [ - { - "topic": sample_row.topic, - "metric": sample_row.metric, - "date_from": "2020-01-01", - "chart_type": "bar", - } - ], - }, - format="json", + path = ( + "/api/public/timeseries/" + f"themes/{quote(sample_row.theme, safe='')}/" + f"sub_themes/{quote(sample_row.sub_theme, safe='')}/" + f"topics/{quote(sample_row.topic, safe='')}/" + f"geography_types/{quote(sample_row.geography_type, safe='')}/" + f"geographies/{quote(sample_row.geography, safe='')}/" + "metrics/" ) + response = api_client.get(path=path, format="json") assert response.status_code == HTTP_OK - assert len(response.data) > 0 - assert len(response.data[0]["values"]) > 0 + assert "metrics" in response.data + assert sample_row.metric in response.data["metrics"] From 75cc893d88c1da9066d345a44d3fe1c0062c89cb Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Thu, 26 Mar 2026 17:28:58 +0000 Subject: [PATCH 17/36] updated the system test request to force JSON response --- tests/system/test_seed_random.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/system/test_seed_random.py b/tests/system/test_seed_random.py index d60dbf767..1c5a22423 100644 --- a/tests/system/test_seed_random.py +++ b/tests/system/test_seed_random.py @@ -57,7 +57,11 @@ def test_command_seeds_metrics_dataset_and_data_is_queryable_via_api(self): f"geographies/{quote(sample_row.geography, safe='')}/" "metrics/" ) - response = api_client.get(path=path, format="json") + response = api_client.get( + path=path, + format="json", + HTTP_ACCEPT="application/json", + ) assert response.status_code == HTTP_OK assert "metrics" in response.data From 74c40d8843a85872a37921fcb738adaab2024e36 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 13:01:32 +0000 Subject: [PATCH 18/36] Fix: apply formatting black/ruff --- .../crawler/private_api_crawler.py | 3 +- .../management/commands/build_cms_site.py | 54 +-- .../build_cms_site_helpers/index_pages.py | 8 +- .../build_cms_site_helpers/landing_page.py | 4 +- .../commands/build_cms_site_helpers/menu.py | 4 +- .../commands/build_cms_site_helpers/pages.py | 10 +- ingestion/aws_client.py | 16 +- metrics/api/permissions/fluent_permissions.py | 2 +- .../charts/single_category_charts.py | 1 - .../api/serializers/charts/subplot_charts.py | 10 +- .../downloads/subplot_downloads/api_view.py | 2 +- .../geography_relationships/utla_to_region.py | 308 +++++++++--------- .../rbac_models/rbac_group_permissions.py | 1 - .../chart_settings/subplot_chart_settings.py | 1 - metrics/domain/charts/utils.py | 1 - metrics/domain/models/plots_text.py | 5 +- .../management/commands/seed_random.py | 151 +++++++-- metrics/interfaces/plots/access.py | 5 +- .../weather_health_alerts/access.py | 2 +- tests/unit/ingestion/test_aws_client.py | 100 ++++-- .../interfaces/management/test_seed_random.py | 44 ++- 21 files changed, 416 insertions(+), 316 deletions(-) diff --git a/caching/private_api/crawler/private_api_crawler.py b/caching/private_api/crawler/private_api_crawler.py index 4f945d3e3..6ba3c2346 100644 --- a/caching/private_api/crawler/private_api_crawler.py +++ b/caching/private_api/crawler/private_api_crawler.py @@ -113,8 +113,7 @@ def process_pages(self, *, pages: list[TopicPage, CommonPage]) -> None: self.process_all_sections_in_page(page=page) except AttributeError: logger.info( - "`%s` page has no dynamic content blocks. " - "So only the headless CMS API detail has been processed", + "`%s` page has no dynamic content blocks. So only the headless CMS API detail has been processed", page.title, ) logger.info("Completed %s / %s pages", index, pages_count) diff --git a/cms/dashboard/management/commands/build_cms_site.py b/cms/dashboard/management/commands/build_cms_site.py index 890dd29a4..98671be08 100644 --- a/cms/dashboard/management/commands/build_cms_site.py +++ b/cms/dashboard/management/commands/build_cms_site.py @@ -70,12 +70,8 @@ def handle(self, *args, **options): build_cms_site_helpers.create_landing_page(parent_page=root_page) - build_cms_site_helpers.create_acknowledgement_page( - name="acknowledgement", parent_page=root_page - ) - build_cms_site_helpers.create_feedback_page( - name="feedback", parent_page=root_page - ) + build_cms_site_helpers.create_acknowledgement_page(name="acknowledgement", parent_page=root_page) + build_cms_site_helpers.create_feedback_page(name="feedback", parent_page=root_page) build_cms_site_helpers.create_menu_snippet() @classmethod @@ -93,12 +89,8 @@ def _build_weather_health_alerts_section(cls, root_page: UKHSARootPage) -> None: weather_health_alerts_page = build_cms_site_helpers.create_composite_page( name="weather_health_alerts", parent_page=root_page ) - build_cms_site_helpers.create_composite_page( - name="heat_health_alerts", parent_page=weather_health_alerts_page - ) - build_cms_site_helpers.create_composite_page( - name="cold_health_alerts", parent_page=weather_health_alerts_page - ) + build_cms_site_helpers.create_composite_page(name="heat_health_alerts", parent_page=weather_health_alerts_page) + build_cms_site_helpers.create_composite_page(name="cold_health_alerts", parent_page=weather_health_alerts_page) @classmethod def _build_access_our_data_section(cls, root_page: UKHSARootPage) -> None: @@ -113,33 +105,23 @@ def _build_access_our_data_section(cls, root_page: UKHSARootPage) -> None: name="access_our_data_data_structure", parent_page=access_our_data_parent_page, ) - build_cms_site_helpers.create_bulk_downloads_page( - name="bulk_downloads", parent_page=root_page - ) + build_cms_site_helpers.create_bulk_downloads_page(name="bulk_downloads", parent_page=root_page) @classmethod def _build_respiratory_viruses_section(cls, root_page: UKHSARootPage) -> None: - covid_19_page = build_cms_site_helpers.create_topic_page( - name="covid_19", parent_page=root_page - ) - influenza_page = build_cms_site_helpers.create_topic_page( - name="influenza", parent_page=root_page - ) + covid_19_page = build_cms_site_helpers.create_topic_page(name="covid_19", parent_page=root_page) + influenza_page = build_cms_site_helpers.create_topic_page(name="influenza", parent_page=root_page) other_respiratory_viruses_page = build_cms_site_helpers.create_topic_page( name="other_respiratory_viruses", parent_page=root_page ) # Because the index page links to these pages # they need to be created first, referenced and then moved under the index page - respiratory_viruses_index_page = ( - build_cms_site_helpers.create_respiratory_viruses_index_page( - name="respiratory-viruses", parent_page=root_page - ) + respiratory_viruses_index_page = build_cms_site_helpers.create_respiratory_viruses_index_page( + name="respiratory-viruses", parent_page=root_page ) - other_respiratory_viruses_page.move( - target=respiratory_viruses_index_page, pos="last-child" - ) + other_respiratory_viruses_page.move(target=respiratory_viruses_index_page, pos="last-child") influenza_page.move(target=respiratory_viruses_index_page, pos="last-child") covid_19_page.move(target=respiratory_viruses_index_page, pos="last-child") @@ -160,19 +142,11 @@ def _build_cover_section(cls, root_page: UKHSARootPage) -> None: @classmethod def _build_common_pages(cls, root_page: UKHSARootPage) -> None: build_cms_site_helpers.create_common_page(name="about", parent_page=root_page) - build_cms_site_helpers.create_common_page( - name="location_based_data", parent_page=root_page - ) - build_cms_site_helpers.create_common_page( - name="whats_coming", parent_page=root_page - ) + build_cms_site_helpers.create_common_page(name="location_based_data", parent_page=root_page) + build_cms_site_helpers.create_common_page(name="whats_coming", parent_page=root_page) build_cms_site_helpers.create_common_page(name="cookies", parent_page=root_page) - build_cms_site_helpers.create_common_page( - name="accessibility_statement", parent_page=root_page - ) - build_cms_site_helpers.create_common_page( - name="compliance", parent_page=root_page - ) + build_cms_site_helpers.create_common_page(name="accessibility_statement", parent_page=root_page) + build_cms_site_helpers.create_common_page(name="compliance", parent_page=root_page) @staticmethod def _clear_cms() -> None: diff --git a/cms/dashboard/management/commands/build_cms_site_helpers/index_pages.py b/cms/dashboard/management/commands/build_cms_site_helpers/index_pages.py index 0550ae1ec..f41d815da 100644 --- a/cms/dashboard/management/commands/build_cms_site_helpers/index_pages.py +++ b/cms/dashboard/management/commands/build_cms_site_helpers/index_pages.py @@ -4,9 +4,7 @@ def create_respiratory_viruses_index_page_body() -> list[dict]: covid_page = TopicPage.objects.get(slug="covid-19") influenza_page = TopicPage.objects.get(slug="influenza") - other_respiratory_viruses_page = TopicPage.objects.get( - slug="other-respiratory-viruses" - ) + other_respiratory_viruses_page = TopicPage.objects.get(slug="other-respiratory-viruses") return [ { @@ -45,9 +43,7 @@ def create_respiratory_viruses_index_page_body() -> list[dict]: def create_cover_index_page_body() -> list[dict]: - childhood_vaccinations_topic_page = TopicPage.objects.get( - slug="childhood-vaccinations" - ) + childhood_vaccinations_topic_page = TopicPage.objects.get(slug="childhood-vaccinations") return [ { "type": "text", diff --git a/cms/dashboard/management/commands/build_cms_site_helpers/landing_page.py b/cms/dashboard/management/commands/build_cms_site_helpers/landing_page.py index ca310a706..f02058f78 100644 --- a/cms/dashboard/management/commands/build_cms_site_helpers/landing_page.py +++ b/cms/dashboard/management/commands/build_cms_site_helpers/landing_page.py @@ -6,9 +6,7 @@ def create_landing_page_body_wih_page_links() -> list[dict]: respiratory_viruses_index = CompositePage.objects.get(slug="respiratory-viruses") covid_page = TopicPage.objects.get(slug="covid-19") influenza_page = TopicPage.objects.get(slug="influenza") - other_respiratory_viruses_page = TopicPage.objects.get( - slug="other-respiratory-viruses" - ) + other_respiratory_viruses_page = TopicPage.objects.get(slug="other-respiratory-viruses") weather_health_alerts_page = CompositePage.objects.get(slug="weather-health-alerts") return [ diff --git a/cms/dashboard/management/commands/build_cms_site_helpers/menu.py b/cms/dashboard/management/commands/build_cms_site_helpers/menu.py index 351c3c30f..a341f80d6 100644 --- a/cms/dashboard/management/commands/build_cms_site_helpers/menu.py +++ b/cms/dashboard/management/commands/build_cms_site_helpers/menu.py @@ -19,9 +19,7 @@ def _create_menu_data() -> list[dict]: landing_page = LandingPage.objects.first() covid_page = TopicPage.objects.get(slug="covid-19") flu_page = TopicPage.objects.get(slug="influenza") - other_respiratory_viruses_page = TopicPage.objects.get( - slug="other-respiratory-viruses" - ) + other_respiratory_viruses_page = TopicPage.objects.get(slug="other-respiratory-viruses") childhood_vaccinations_index_page = CompositePage.objects.get(slug="cover") weather_health_alerts_page = CompositePage.objects.get(slug="weather-health-alerts") diff --git a/cms/dashboard/management/commands/build_cms_site_helpers/pages.py b/cms/dashboard/management/commands/build_cms_site_helpers/pages.py index 1888ffa55..b4036c02d 100644 --- a/cms/dashboard/management/commands/build_cms_site_helpers/pages.py +++ b/cms/dashboard/management/commands/build_cms_site_helpers/pages.py @@ -77,9 +77,7 @@ def create_landing_page(*, parent_page: Page) -> LandingPage: return page -def _create_index_page( - *, page_data: dict, parent_page: Page, create_index_page_body_func: Callable -) -> CompositePage: +def _create_index_page(*, page_data: dict, parent_page: Page, create_index_page_body_func: Callable) -> CompositePage: index_page_body: list[dict] = create_index_page_body_func() page = CompositePage( @@ -134,7 +132,7 @@ def create_topic_page(*, name: str, parent_page: Page) -> TopicPage: seo_title=data["meta"]["seo_title"], search_description=data["meta"]["search_description"], is_public=data["is_public"], - page_classification=data["page_classification"] + page_classification=data["page_classification"], ) _add_page_to_parent(page=page, parent_page=parent_page) @@ -198,9 +196,7 @@ def _get_or_create_button_id() -> int: return internal_button_snippet.id -def _add_download_button_to_composite_body( - *, body: dict[list[dict]] -) -> dict[list[dict]]: +def _add_download_button_to_composite_body(*, body: dict[list[dict]]) -> dict[list[dict]]: body.append( { "type": "internal_button", diff --git a/ingestion/aws_client.py b/ingestion/aws_client.py index 0a31f27f8..7fad3ffc8 100644 --- a/ingestion/aws_client.py +++ b/ingestion/aws_client.py @@ -135,7 +135,9 @@ def _copy_file_to_processed(self, *, key: str) -> None: Key=self._build_processed_key(key=key), ) except botocore.client.ClientError: - logger.warning("Failed to move `%s` to `%s` folder", key, self._processed_folder) + logger.warning( + "Failed to move `%s` to `%s` folder", key, self._processed_folder + ) def _copy_file_to_processed_archive(self, *, key: str) -> None: """Copies the file matching the given `key` into the ingest archive s3 bucket @@ -159,7 +161,9 @@ def _copy_file_to_processed_archive(self, *, key: str) -> None: }, ) except botocore.client.ClientError: - logger.warning("Failed to move `%s` to `%s` bucket", key, self._archive_bucket_name) + logger.warning( + "Failed to move `%s` to `%s` bucket", key, self._archive_bucket_name + ) def _copy_file_to_failed(self, *, key: str) -> None: """Copies the file matching the given `key` into the failed folder within the s3 bucket @@ -186,7 +190,9 @@ def _copy_file_to_failed(self, *, key: str) -> None: Key=self._build_failed_key(key=key), ) except botocore.client.ClientError: - logger.warning("Failed to move `%s` to `%s` folder", key, self._failed_folder) + logger.warning( + "Failed to move `%s` to `%s` folder", key, self._failed_folder + ) def _delete_file_from_inbound(self, *, key: str) -> None: """Deletes the file matching the given `key` from the inbound folder within the s3 bucket @@ -209,7 +215,9 @@ def _delete_file_from_inbound(self, *, key: str) -> None: try: self._client.delete_object(Bucket=self._bucket_name, Key=key) except botocore.client.ClientError: - logger.warning("Failed to delete `%s` from `%s` folder", key, self._inbound_folder) + logger.warning( + "Failed to delete `%s` from `%s` folder", key, self._inbound_folder + ) def _get_filename_from_key(self, *, key: str) -> str: """Extracts the filename from the `key` diff --git a/metrics/api/permissions/fluent_permissions.py b/metrics/api/permissions/fluent_permissions.py index cd528254c..215bac06a 100644 --- a/metrics/api/permissions/fluent_permissions.py +++ b/metrics/api/permissions/fluent_permissions.py @@ -12,7 +12,7 @@ def validate_permissions_for_non_public( metric: str, geography: str, geography_type: str, - rbac_permissions: Iterable[RBACPermission] + rbac_permissions: Iterable[RBACPermission], ) -> bool: """Compares the given data parameters to see if the `rbac_permissions` allow access to the non-public data diff --git a/metrics/api/serializers/charts/single_category_charts.py b/metrics/api/serializers/charts/single_category_charts.py index 46b9d958f..76a4896ee 100644 --- a/metrics/api/serializers/charts/single_category_charts.py +++ b/metrics/api/serializers/charts/single_category_charts.py @@ -72,7 +72,6 @@ def __init__(self, *args, **kwargs): class ChartsSerializer(BaseChartsSerializer): - plots = ChartPlotsListSerializer() def to_models(self, request: Request) -> ChartRequestParams: diff --git a/metrics/api/serializers/charts/subplot_charts.py b/metrics/api/serializers/charts/subplot_charts.py index 99475d04c..c1775c871 100644 --- a/metrics/api/serializers/charts/subplot_charts.py +++ b/metrics/api/serializers/charts/subplot_charts.py @@ -47,16 +47,10 @@ def validate(self, data): subplot_theme = data.get("theme") subplot_sub_theme = data.get("sub_theme") if not subplot_theme and not chart_theme: - msg = ( - "'theme' must be specified at either " - "subplot_parameters or chart_parameters level" - ) + msg = "'theme' must be specified at either subplot_parameters or chart_parameters level" raise serializers.ValidationError(msg) if not subplot_sub_theme and not chart_sub_theme: - msg = ( - "'sub_theme' must be specified at either " - "subplot_parameters or chart_parameters level" - ) + msg = "'sub_theme' must be specified at either subplot_parameters or chart_parameters level" raise serializers.ValidationError(msg) return data diff --git a/metrics/api/views/downloads/subplot_downloads/api_view.py b/metrics/api/views/downloads/subplot_downloads/api_view.py index 0e16e0980..6281530c5 100644 --- a/metrics/api/views/downloads/subplot_downloads/api_view.py +++ b/metrics/api/views/downloads/subplot_downloads/api_view.py @@ -106,7 +106,7 @@ def _write_headline_to_csv( metric_group: str, queryset: CoreTimeSeriesQuerySet | CoreHeadlineQuerySet, response: HttpResponse, - headers: list[str] | None + headers: list[str] | None, ) -> None: if DataSourceFileType[metric_group].is_headline: serializer = self._get_serializer_class( diff --git a/metrics/data/in_memory_models/geography_relationships/utla_to_region.py b/metrics/data/in_memory_models/geography_relationships/utla_to_region.py index 054aa8cf1..4696022cf 100644 --- a/metrics/data/in_memory_models/geography_relationships/utla_to_region.py +++ b/metrics/data/in_memory_models/geography_relationships/utla_to_region.py @@ -1,156 +1,156 @@ UTLA_TO_REGION_LOOKUP: dict[str, str] = { - "E06000001": "North East", # Hartlepool - "E06000002": "North East", # Middlesborough - "E06000003": "North East", # Redcar and Cleveland - "E06000004": "North East", # Stockton-on-Tees - "E06000005": "North East", # Darlington - "E06000006": "North West", # Halton - "E06000007": "North West", # Warrington - "E06000008": "North West", # Blackburn with Darwen - "E06000009": "North West", # Blackpool - "E06000010": "Yorkshire and The Humber", # Kingston upon Hull, City of - "E06000011": "Yorkshire and The Humber", # East Riding of Yorkshire - "E06000012": "Yorkshire and The Humber", # North East Lincolnshire - "E06000013": "Yorkshire and The Humber", # North Lincolnshire - "E06000014": "Yorkshire and The Humber", # York - "E06000015": "East Midlands", # Derby - "E06000016": "East Midlands", # Leicester - "E06000017": "East Midlands", # Rutland - "E06000018": "East Midlands", # Nottingham - "E06000019": "West Midlands", # Herefordshire, County of - "E06000020": "West Midlands", # Telford and Wrekin - "E06000021": "West Midlands", # Stoke-on-Trent - "E06000022": "South West", # Bath and North Somerset - "E06000023": "South West", # Bristol, City of - "E06000024": "South West", # North Somerset - "E06000025": "South West", # South Gloucestershire - "E06000026": "South West", # Plymouth - "E06000027": "South West", # Torbay - "E06000030": "South West", # Swindon - "E06000031": "East of England", # Peterborough - "E06000032": "East of England", # Luton - "E06000033": "East of England", # Southend-on-Sea - "E06000034": "East of England", # Thurrock - "E06000035": "South East", # Medway - "E06000036": "South East", # Bracknell Forest - "E06000037": "South East", # West Berkshire - "E06000038": "South East", # Reading - "E06000039": "South East", # Slough - "E06000040": "South East", # Windsor and Maidenhead - "E06000041": "South East", # Wokingham - "E06000042": "South East", # Milton Keynes - "E06000043": "South East", # Brighton and Hove - "E06000044": "South East", # Portsmouth - "E06000045": "South East", # Southampton - "E06000046": "South East", # Isle of Wight - "E06000047": "North East", # County Durham - "E06000049": "North West", # Cheshire East - "E06000050": "North West", # Cheshire West and Chester - "E06000051": "West Midlands", # Shropshire - "E06000052": "South West", # Cornwall - "E06000053": "South West", # Isles of Scilly - "E06000054": "South West", # Wiltshire - "E06000055": "East of England", # Bedford - "E06000056": "East of England", # Central Bedforshire - "E06000057": "North East", # Northumberland - "E06000058": "South West", # Bournemouth, Christchurch and Poole - "E06000059": "South West", # Dorset - "E06000060": "South East", # Buckinghamshire - "E06000061": "East Midlands", # North Northamptonshire - "E06000062": "East Midlands", # West Northamptonshire - "E06000063": "North West", # Cumberland - "E06000064": "North West", # Westmorland and Furness - "E10000023": "Yorkshire and The Humber", # North Yorkshire - "E10000027": "South West", # Somerset - "E08000001": "North West", # Bolton - "E08000002": "North West", # Bury - "E08000003": "North West", # Manchester - "E08000004": "North West", # Oldham - "E08000005": "North West", # Rochdale - "E08000006": "North West", # Salford - "E08000007": "North West", # Stockport - "E08000008": "North West", # Tameside - "E08000009": "North West", # Trafford - "E08000010": "North West", # Wigan - "E08000011": "North West", # Knowsley - "E08000012": "North West", # Liverpool - "E08000013": "North West", # St. Helens - "E08000014": "North West", # Sefton - "E08000015": "North West", # Wirral - "E08000016": "Yorkshire and The Humber", # Barnsley - "E08000017": "Yorkshire and The Humber", # Doncaster - "E08000018": "Yorkshire and The Humber", # Rotherham - "E08000019": "Yorkshire and The Humber", # Sheffield - "E08000021": "North East", # Newcastle upon Tyne - "E08000022": "North East", # North Tyneside - "E08000023": "North East", # South Tyneside - "E08000024": "North East", # Sunderland - "E08000025": "West Midlands", # Birmingham - "E08000026": "West Midlands", # Coventry - "E08000027": "West Midlands", # Dudley - "E08000028": "West Midlands", # Sandwell - "E08000029": "West Midlands", # Solihull - "E08000030": "West Midlands", # Walsall - "E08000031": "West Midlands", # Wolverhampton - "E08000032": "Yorkshire and The Humber", # Bradford - "E08000033": "Yorkshire and The Humber", # Calderdale - "E08000034": "Yorkshire and The Humber", # Kirklees - "E08000035": "Yorkshire and The Humber", # Leeds - "E08000036": "Yorkshire and The Humber", # Wakefield - "E08000037": "North East", # Gateshead - "E09000001": "London", # City of London - "E09000002": "London", # Barking and Dagenham - "E09000003": "London", # Barnet - "E09000004": "London", # Bexley - "E09000005": "London", # Brent - "E09000006": "London", # Bromley - "E09000007": "London", # Camden - "E09000008": "London", # Croydon - "E09000009": "London", # Ealing - "E09000010": "London", # Enfield - "E09000011": "London", # Greenwich - "E09000012": "London", # Hackney - "E09000013": "London", # Hammersmith and Fulham - "E09000014": "London", # Haringey - "E09000015": "London", # Harrow - "E09000016": "London", # Havering - "E09000017": "London", # Hillingdon - "E09000018": "London", # Hounslow - "E09000019": "London", # Islington - "E09000020": "London", # Kensington and Chelsea - "E09000021": "London", # Kingston upon Thames - "E09000022": "London", # Lambeth - "E09000023": "London", # Lewisham - "E09000024": "London", # Merton - "E09000025": "London", # Newham - "E09000026": "London", # Redbridge - "E09000027": "London", # Richmond upon Thames - "E09000028": "London", # Southwark - "E09000029": "London", # Sutton - "E09000030": "London", # Tower Hamlets - "E09000031": "London", # Waltham Forest - "E09000032": "London", # Wandsworth - "E09000033": "London", # Westminster - "E10000003": "East of England", # Cambridgeshire - "E10000006": "North West", # Cumbria - "E10000007": "East Midlands", # Derbyshire - "E10000008": "South West", # Devon - "E10000011": "South East", # East Sussex - "E10000012": "East of England", # Essex - "E10000013": "South West", # Gloucestershire - "E10000014": "South East", # Hampshire - "E10000015": "East of England", # Hertfordshire - "E10000016": "South East", # Kent - "E10000017": "North West", # Lancashire - "E10000018": "East Midlands", # Leicestershire - "E10000019": "East Midlands", # Lincolnshire - "E10000020": "East of England", # Norfolk - "E10000024": "East Midlands", # Nottinghamshire - "E10000025": "South East", # Oxfordshire - "E10000028": "West Midlands", # Staffordshire - "E10000029": "East of England", # Suffolk - "E10000030": "South East", # Surrey - "E10000031": "West Midlands", # Warwickshire - "E10000032": "South East", # West Sussex - "E10000034": "West Midlands", # Worcestershire + "E06000001": "North East", # Hartlepool + "E06000002": "North East", # Middlesborough + "E06000003": "North East", # Redcar and Cleveland + "E06000004": "North East", # Stockton-on-Tees + "E06000005": "North East", # Darlington + "E06000006": "North West", # Halton + "E06000007": "North West", # Warrington + "E06000008": "North West", # Blackburn with Darwen + "E06000009": "North West", # Blackpool + "E06000010": "Yorkshire and The Humber", # Kingston upon Hull, City of + "E06000011": "Yorkshire and The Humber", # East Riding of Yorkshire + "E06000012": "Yorkshire and The Humber", # North East Lincolnshire + "E06000013": "Yorkshire and The Humber", # North Lincolnshire + "E06000014": "Yorkshire and The Humber", # York + "E06000015": "East Midlands", # Derby + "E06000016": "East Midlands", # Leicester + "E06000017": "East Midlands", # Rutland + "E06000018": "East Midlands", # Nottingham + "E06000019": "West Midlands", # Herefordshire, County of + "E06000020": "West Midlands", # Telford and Wrekin + "E06000021": "West Midlands", # Stoke-on-Trent + "E06000022": "South West", # Bath and North Somerset + "E06000023": "South West", # Bristol, City of + "E06000024": "South West", # North Somerset + "E06000025": "South West", # South Gloucestershire + "E06000026": "South West", # Plymouth + "E06000027": "South West", # Torbay + "E06000030": "South West", # Swindon + "E06000031": "East of England", # Peterborough + "E06000032": "East of England", # Luton + "E06000033": "East of England", # Southend-on-Sea + "E06000034": "East of England", # Thurrock + "E06000035": "South East", # Medway + "E06000036": "South East", # Bracknell Forest + "E06000037": "South East", # West Berkshire + "E06000038": "South East", # Reading + "E06000039": "South East", # Slough + "E06000040": "South East", # Windsor and Maidenhead + "E06000041": "South East", # Wokingham + "E06000042": "South East", # Milton Keynes + "E06000043": "South East", # Brighton and Hove + "E06000044": "South East", # Portsmouth + "E06000045": "South East", # Southampton + "E06000046": "South East", # Isle of Wight + "E06000047": "North East", # County Durham + "E06000049": "North West", # Cheshire East + "E06000050": "North West", # Cheshire West and Chester + "E06000051": "West Midlands", # Shropshire + "E06000052": "South West", # Cornwall + "E06000053": "South West", # Isles of Scilly + "E06000054": "South West", # Wiltshire + "E06000055": "East of England", # Bedford + "E06000056": "East of England", # Central Bedforshire + "E06000057": "North East", # Northumberland + "E06000058": "South West", # Bournemouth, Christchurch and Poole + "E06000059": "South West", # Dorset + "E06000060": "South East", # Buckinghamshire + "E06000061": "East Midlands", # North Northamptonshire + "E06000062": "East Midlands", # West Northamptonshire + "E06000063": "North West", # Cumberland + "E06000064": "North West", # Westmorland and Furness + "E10000023": "Yorkshire and The Humber", # North Yorkshire + "E10000027": "South West", # Somerset + "E08000001": "North West", # Bolton + "E08000002": "North West", # Bury + "E08000003": "North West", # Manchester + "E08000004": "North West", # Oldham + "E08000005": "North West", # Rochdale + "E08000006": "North West", # Salford + "E08000007": "North West", # Stockport + "E08000008": "North West", # Tameside + "E08000009": "North West", # Trafford + "E08000010": "North West", # Wigan + "E08000011": "North West", # Knowsley + "E08000012": "North West", # Liverpool + "E08000013": "North West", # St. Helens + "E08000014": "North West", # Sefton + "E08000015": "North West", # Wirral + "E08000016": "Yorkshire and The Humber", # Barnsley + "E08000017": "Yorkshire and The Humber", # Doncaster + "E08000018": "Yorkshire and The Humber", # Rotherham + "E08000019": "Yorkshire and The Humber", # Sheffield + "E08000021": "North East", # Newcastle upon Tyne + "E08000022": "North East", # North Tyneside + "E08000023": "North East", # South Tyneside + "E08000024": "North East", # Sunderland + "E08000025": "West Midlands", # Birmingham + "E08000026": "West Midlands", # Coventry + "E08000027": "West Midlands", # Dudley + "E08000028": "West Midlands", # Sandwell + "E08000029": "West Midlands", # Solihull + "E08000030": "West Midlands", # Walsall + "E08000031": "West Midlands", # Wolverhampton + "E08000032": "Yorkshire and The Humber", # Bradford + "E08000033": "Yorkshire and The Humber", # Calderdale + "E08000034": "Yorkshire and The Humber", # Kirklees + "E08000035": "Yorkshire and The Humber", # Leeds + "E08000036": "Yorkshire and The Humber", # Wakefield + "E08000037": "North East", # Gateshead + "E09000001": "London", # City of London + "E09000002": "London", # Barking and Dagenham + "E09000003": "London", # Barnet + "E09000004": "London", # Bexley + "E09000005": "London", # Brent + "E09000006": "London", # Bromley + "E09000007": "London", # Camden + "E09000008": "London", # Croydon + "E09000009": "London", # Ealing + "E09000010": "London", # Enfield + "E09000011": "London", # Greenwich + "E09000012": "London", # Hackney + "E09000013": "London", # Hammersmith and Fulham + "E09000014": "London", # Haringey + "E09000015": "London", # Harrow + "E09000016": "London", # Havering + "E09000017": "London", # Hillingdon + "E09000018": "London", # Hounslow + "E09000019": "London", # Islington + "E09000020": "London", # Kensington and Chelsea + "E09000021": "London", # Kingston upon Thames + "E09000022": "London", # Lambeth + "E09000023": "London", # Lewisham + "E09000024": "London", # Merton + "E09000025": "London", # Newham + "E09000026": "London", # Redbridge + "E09000027": "London", # Richmond upon Thames + "E09000028": "London", # Southwark + "E09000029": "London", # Sutton + "E09000030": "London", # Tower Hamlets + "E09000031": "London", # Waltham Forest + "E09000032": "London", # Wandsworth + "E09000033": "London", # Westminster + "E10000003": "East of England", # Cambridgeshire + "E10000006": "North West", # Cumbria + "E10000007": "East Midlands", # Derbyshire + "E10000008": "South West", # Devon + "E10000011": "South East", # East Sussex + "E10000012": "East of England", # Essex + "E10000013": "South West", # Gloucestershire + "E10000014": "South East", # Hampshire + "E10000015": "East of England", # Hertfordshire + "E10000016": "South East", # Kent + "E10000017": "North West", # Lancashire + "E10000018": "East Midlands", # Leicestershire + "E10000019": "East Midlands", # Lincolnshire + "E10000020": "East of England", # Norfolk + "E10000024": "East Midlands", # Nottinghamshire + "E10000025": "South East", # Oxfordshire + "E10000028": "West Midlands", # Staffordshire + "E10000029": "East of England", # Suffolk + "E10000030": "South East", # Surrey + "E10000031": "West Midlands", # Warwickshire + "E10000032": "South East", # West Sussex + "E10000034": "West Midlands", # Worcestershire } diff --git a/metrics/data/models/rbac_models/rbac_group_permissions.py b/metrics/data/models/rbac_models/rbac_group_permissions.py index e1008fe5d..bf7e63594 100644 --- a/metrics/data/models/rbac_models/rbac_group_permissions.py +++ b/metrics/data/models/rbac_models/rbac_group_permissions.py @@ -7,7 +7,6 @@ class RBACGroupPermission(models.Model): - class Meta: db_table = "rbac_group_permissions" diff --git a/metrics/domain/charts/chart_settings/subplot_chart_settings.py b/metrics/domain/charts/chart_settings/subplot_chart_settings.py index f24441792..c32610fa5 100644 --- a/metrics/domain/charts/chart_settings/subplot_chart_settings.py +++ b/metrics/domain/charts/chart_settings/subplot_chart_settings.py @@ -36,7 +36,6 @@ class SubplotChartSettings(ChartSettings): - def __init__(self, *, chart_generation_payload: SubplotChartGenerationPayload): super().__init__(chart_generation_payload=chart_generation_payload) self.subplot_data: SubplotGenerationData = chart_generation_payload.subplot_data diff --git a/metrics/domain/charts/utils.py b/metrics/domain/charts/utils.py index c246076f8..cdbea3bef 100644 --- a/metrics/domain/charts/utils.py +++ b/metrics/domain/charts/utils.py @@ -22,7 +22,6 @@ def convert_large_numbers_to_short_text(number: int) -> str: Eg: 1000 = 1k, 2500 = 2k, 2690 = 3k, 100,000,000 = 1m """ if number >= E_NOTATION[1]: - for index in range(len(E_NOTATION)): try: if E_NOTATION[index] <= number < E_NOTATION[index + 1]: diff --git a/metrics/domain/models/plots_text.py b/metrics/domain/models/plots_text.py index 94b9f8fb1..ddf01e88d 100644 --- a/metrics/domain/models/plots_text.py +++ b/metrics/domain/models/plots_text.py @@ -323,10 +323,7 @@ def _build_description_for_section_of_data( if end_value > start_value: part_description = f"It rose from {start_value} on {start_date} to {end_value} on {end_date}. " elif end_value == start_value: - part_description = ( - f"The date fluctuates between {start_value} on {start_date}, " - f"ending with the same value on {end_date}. " - ) + part_description = f"The date fluctuates between {start_value} on {start_date}, ending with the same value on {end_date}. " else: part_description = f"It fell from {start_value} on {start_date} to {end_value} on {end_date}. " diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index f3878ba43..902f7a2b1 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -92,7 +92,9 @@ def handle(self, *args, **options) -> None: delivery: str = options["delivery"] is_public: bool = not options["non_public"] - selected_seed = options["seed"] if options["seed"] is not None else int(time.time()) + selected_seed = ( + options["seed"] if options["seed"] is not None else int(time.time()) + ) random.seed(selected_seed) # nosec B311 self.stdout.write(f"Seed used: {selected_seed}") @@ -234,15 +236,27 @@ def _seed_metrics_data_to_s3( uploaded_files += 1 if progress_callback is not None: - progress_callback(f"Uploaded {uploaded_files:,} files to ingest bucket in/.") + progress_callback( + f"Uploaded {uploaded_files:,} files to ingest bucket in/." + ) topic_rows = cls._build_theme_hierarchy_records()[2] theme_count = len({theme_name for _, _, theme_name in topic_rows}) - sub_theme_count = len({(sub_theme_name, theme_name) for _, sub_theme_name, theme_name in topic_rows}) + sub_theme_count = len( + { + (sub_theme_name, theme_name) + for _, sub_theme_name, theme_name in topic_rows + } + ) topic_count = len( - {(topic_name, sub_theme_name, theme_name) for topic_name, sub_theme_name, theme_name in topic_rows} + { + (topic_name, sub_theme_name, theme_name) + for topic_name, sub_theme_name, theme_name in topic_rows + } + ) + geography_count = len( + cls._build_geography_seed_values(count=scale_config["geographies"]) ) - geography_count = len(cls._build_geography_seed_values(count=scale_config["geographies"])) row_count = scale_config["metrics"] * geography_count * scale_config["days"] return { @@ -319,7 +333,9 @@ def _seed_time_series_rows( current_count=api_count, ) - if progress_callback is not None and (metric_index == total_metrics or metric_index % log_interval == 0): + if progress_callback is not None and ( + metric_index == total_metrics or metric_index % log_interval == 0 + ): processed_row_count = metric_index * len(geographies) * days progress_callback( f"Processed {metric_index}/{total_metrics} metrics " @@ -340,7 +356,9 @@ def _seed_time_series_rows( ) if progress_callback is not None: - progress_callback(f"Inserted {core_count:,} CoreTimeSeries rows and {api_count:,} APITimeSeries rows.") + progress_callback( + f"Inserted {core_count:,} CoreTimeSeries rows and {api_count:,} APITimeSeries rows." + ) return core_count, api_count @@ -393,11 +411,20 @@ def _upsert_themes( *, theme_names: list[str], ) -> tuple[list[Theme], dict[str, Theme]]: - themes_by_name = {theme.name: theme for theme in Theme.objects.filter(name__in=theme_names)} - missing_theme_names = [name for name in theme_names if name not in themes_by_name] + themes_by_name = { + theme.name: theme for theme in Theme.objects.filter(name__in=theme_names) + } + missing_theme_names = [ + name for name in theme_names if name not in themes_by_name + ] if missing_theme_names: cls._bulk_create(Theme, [Theme(name=name) for name in missing_theme_names]) - themes_by_name.update({theme.name: theme for theme in Theme.objects.filter(name__in=missing_theme_names)}) + themes_by_name.update( + { + theme.name: theme + for theme in Theme.objects.filter(name__in=missing_theme_names) + } + ) return [themes_by_name[name] for name in theme_names], themes_by_name @classmethod @@ -413,7 +440,10 @@ def _upsert_sub_themes( theme__name__in=theme_names, name__in={name for name, _ in sub_theme_keys}, ) - sub_themes_by_key = {(sub_theme.name, sub_theme.theme.name): sub_theme for sub_theme in existing_sub_themes} + sub_themes_by_key = { + (sub_theme.name, sub_theme.theme.name): sub_theme + for sub_theme in existing_sub_themes + } missing_sub_theme_keys = [ (sub_theme_name, theme_name) for sub_theme_name, theme_name in sub_theme_keys @@ -431,8 +461,13 @@ def _upsert_sub_themes( { (sub_theme.name, sub_theme.theme.name): sub_theme for sub_theme in SubTheme.objects.select_related("theme").filter( - theme__name__in={theme_name for _, theme_name in missing_sub_theme_keys}, - name__in={sub_theme_name for sub_theme_name, _ in missing_sub_theme_keys}, + theme__name__in={ + theme_name for _, theme_name in missing_sub_theme_keys + }, + name__in={ + sub_theme_name + for sub_theme_name, _ in missing_sub_theme_keys + }, ) } ) @@ -447,15 +482,21 @@ def _upsert_topics( ) -> list[Topic]: topic_keys = list(dict.fromkeys(topic_rows)) sub_themes_by_id_key = { - (sub_theme_name, theme_name): sub_themes_by_key[(sub_theme_name, theme_name)] + (sub_theme_name, theme_name): sub_themes_by_key[ + (sub_theme_name, theme_name) + ] for _, sub_theme_name, theme_name in topic_keys } - candidate_sub_theme_ids = [sub_theme.id for sub_theme in sub_themes_by_id_key.values()] + candidate_sub_theme_ids = [ + sub_theme.id for sub_theme in sub_themes_by_id_key.values() + ] existing_topics = Topic.objects.filter( sub_theme_id__in=candidate_sub_theme_ids, name__in={topic_name for topic_name, _, _ in topic_keys}, ) - topics_by_key = {(topic.name, topic.sub_theme_id): topic for topic in existing_topics} + topics_by_key = { + (topic.name, topic.sub_theme_id): topic for topic in existing_topics + } missing_topic_keys = [ topic_key for topic_key in topic_keys @@ -484,7 +525,9 @@ def _upsert_topics( sub_themes_by_id_key[(sub_theme_name, theme_name)].id for _, sub_theme_name, theme_name in missing_topic_keys ], - name__in={topic_name for topic_name, _, _ in missing_topic_keys}, + name__in={ + topic_name for topic_name, _, _ in missing_topic_keys + }, ) } ) @@ -501,13 +544,19 @@ def _upsert_topics( @classmethod def _seed_geographies(cls, *, count: int) -> list[Geography]: geography_seed_values = cls._build_geography_seed_values(count=count) - geography_type_names = {record["geography_type"] for record in geography_seed_values} + geography_type_names = { + record["geography_type"] for record in geography_seed_values + } geography_type_names = sorted(geography_type_names) geography_types_by_name = { geography_type.name: geography_type - for geography_type in GeographyType.objects.filter(name__in=geography_type_names) + for geography_type in GeographyType.objects.filter( + name__in=geography_type_names + ) } - missing_geography_type_names = [name for name in geography_type_names if name not in geography_types_by_name] + missing_geography_type_names = [ + name for name in geography_type_names if name not in geography_types_by_name + ] if missing_geography_type_names: cls._bulk_create( GeographyType, @@ -516,22 +565,32 @@ def _seed_geographies(cls, *, count: int) -> list[Geography]: geography_types_by_name.update( { geography_type.name: geography_type - for geography_type in GeographyType.objects.filter(name__in=missing_geography_type_names) + for geography_type in GeographyType.objects.filter( + name__in=missing_geography_type_names + ) } ) - geography_types_by_name = {name: geography_types_by_name[name] for name in geography_type_names} + geography_types_by_name = { + name: geography_types_by_name[name] for name in geography_type_names + } geography_keys = list( dict.fromkeys( - (record["name"], record["geography_type"], record["geography_code"]) for record in geography_seed_values + (record["name"], record["geography_type"], record["geography_code"]) + for record in geography_seed_values ) ) - existing_geographies = Geography.objects.select_related("geography_type").filter( + existing_geographies = Geography.objects.select_related( + "geography_type" + ).filter( name__in={name for name, _, _ in geography_keys}, - geography_type__name__in={geography_type for _, geography_type, _ in geography_keys}, + geography_type__name__in={ + geography_type for _, geography_type, _ in geography_keys + }, ) geographies_by_key = { - (geography.name, geography.geography_type.name): geography for geography in existing_geographies + (geography.name, geography.geography_type.name): geography + for geography in existing_geographies } missing_geography_keys = [ (name, geography_type, geography_code) @@ -553,14 +612,22 @@ def _seed_geographies(cls, *, count: int) -> list[Geography]: geographies_by_key.update( { (geography.name, geography.geography_type.name): geography - for geography in Geography.objects.select_related("geography_type").filter( + for geography in Geography.objects.select_related( + "geography_type" + ).filter( name__in={name for name, _, _ in missing_geography_keys}, - geography_type__name__in={geography_type for _, geography_type, _ in missing_geography_keys}, + geography_type__name__in={ + geography_type + for _, geography_type, _ in missing_geography_keys + }, ) } ) - return [geographies_by_key[(name, geography_type)] for name, geography_type, _ in geography_keys] + return [ + geographies_by_key[(name, geography_type)] + for name, geography_type, _ in geography_keys + ] @classmethod def _build_time_series_rows_for_metric( @@ -584,7 +651,8 @@ def _build_time_series_rows_for_metric( current_date = start_date + timedelta(days=day_offset) base_value = random.uniform(5.0, 250.0) # noqa: S311 # nosec B311 metric_value = round( - base_value + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 + base_value + + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 2, ) sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 @@ -635,13 +703,17 @@ def _build_timeseries_ingestion_payloads( is_public: bool, ) -> list[dict[str, object]]: _, _, topic_rows = cls._build_theme_hierarchy_records() - geographies = cls._build_geography_seed_values(count=scale_config["geographies"]) + geographies = cls._build_geography_seed_values( + count=scale_config["geographies"] + ) refresh_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") start_date = date.today() - timedelta(days=scale_config["days"] - 1) payloads: list[dict[str, object]] = [] for metric_index in range(scale_config["metrics"]): - topic_name, sub_theme_name, theme_name = topic_rows[metric_index % len(topic_rows)] + topic_name, sub_theme_name, theme_name = topic_rows[ + metric_index % len(topic_rows) + ] metric_name = f"{topic_name}_cases_randomByDay_{metric_index + 1}" for geography in geographies: time_series_rows: list[dict[str, object]] = [] @@ -673,7 +745,9 @@ def _build_timeseries_ingestion_payloads( "geography": geography["name"], "geography_code": geography["geography_code"], "age": "all", - "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # noqa: S311 # nosec B311 + "sex": random.choice( + SEED_RANDOM_SEX_OPTIONS + ), # noqa: S311 # nosec B311 "stratum": "default", "refresh_date": refresh_date, "time_series": time_series_rows, @@ -707,7 +781,9 @@ def _bulk_create(model: type[TModel], records: Iterable[TModel]) -> list[TModel] @staticmethod def _get_next_random_metric_index() -> int: max_metric_index = 0 - for metric_name in Metric.objects.filter(name__startswith="Random Metric ").values_list( + for metric_name in Metric.objects.filter( + name__startswith="Random Metric " + ).values_list( "name", flat=True, ): @@ -746,7 +822,8 @@ def _build_theme_hierarchy_records( parent_theme_name = child_to_parent[sub_theme_name] sub_theme_pairs.add((sub_theme_name, parent_theme_name)) topic_rows.extend( - (topic_value, sub_theme_name, parent_theme_name) for topic_value in topic_group.return_list() + (topic_value, sub_theme_name, parent_theme_name) + for topic_value in topic_group.return_list() ) theme_names = sorted({parent_name for _, parent_name in sub_theme_pairs}) @@ -783,7 +860,9 @@ def _build_geography_seed_values(cls, *, count: int) -> list[dict[str, str]]: { "name": cls._format_enum_name(ltla.name), "geography_code": ltla.value, - "geography_type": (validation_enums.GeographyType.LOWER_TIER_LOCAL_AUTHORITY.value), + "geography_type": ( + validation_enums.GeographyType.LOWER_TIER_LOCAL_AUTHORITY.value + ), } for ltla in list(validation_enums.LTLAs)[:extra_required] ) diff --git a/metrics/interfaces/plots/access.py b/metrics/interfaces/plots/access.py index 6e4eac34d..e024efdae 100644 --- a/metrics/interfaces/plots/access.py +++ b/metrics/interfaces/plots/access.py @@ -37,10 +37,7 @@ def __init__(self): class DataNotFoundForAnyPlotError(Exception): def __init__(self): - message = ( - "No data was found for the plot(s) requested, " - "please review the request parameters of each plot provided." - ) + message = "No data was found for the plot(s) requested, please review the request parameters of each plot provided." super().__init__(message) diff --git a/metrics/interfaces/weather_health_alerts/access.py b/metrics/interfaces/weather_health_alerts/access.py index 132468e3f..d229c050e 100644 --- a/metrics/interfaces/weather_health_alerts/access.py +++ b/metrics/interfaces/weather_health_alerts/access.py @@ -50,7 +50,7 @@ def build_summary_data_for_alerts( geography_code: self._parse_core_headline_as_alarm_state( topic=topic, core_headline=core_headline ) - for geography_code, core_headline, in headlines_mapping.items() + for geography_code, core_headline in headlines_mapping.items() } alarm_states = { diff --git a/tests/unit/ingestion/test_aws_client.py b/tests/unit/ingestion/test_aws_client.py index ae7d9d72c..be4eb86a2 100644 --- a/tests/unit/ingestion/test_aws_client.py +++ b/tests/unit/ingestion/test_aws_client.py @@ -21,7 +21,9 @@ def aws_client_with_mocked_boto_client() -> AWSClient: class TestAWSClient: # Tests for the `__init__` @mock.patch.object(AWSClient, "create_client") - def test_create_client_called_when_client_not_provided(self, spy_create_client: mock.MagicMock): + def test_create_client_called_when_client_not_provided( + self, spy_create_client: mock.MagicMock + ): """ Given no provided boto3 client When an instance of `AWSClient` is created @@ -107,11 +109,15 @@ def test_create_client(self, spy_boto3: mock.MagicMock): boto3_client = AWSClient.create_client(profile_name=aws_profile_name) # Then - spy_boto3.setup_default_session.assert_called_once_with(profile_name=aws_profile_name) + spy_boto3.setup_default_session.assert_called_once_with( + profile_name=aws_profile_name + ) assert boto3_client == spy_boto3.client.return_value @mock.patch(f"{MODULE_PATH}.boto3") - def test_create_client_does_not_setup_default_session_for_no_profile_name(self, spy_boto3: mock.MagicMock): + def test_create_client_does_not_setup_default_session_for_no_profile_name( + self, spy_boto3: mock.MagicMock + ): """ Given no provided AWS profile name When `create_client()` is called from the `AWSClient` class @@ -135,7 +141,9 @@ def test_create_client_does_not_setup_default_session_for_no_profile_name(self, # Tests for the `move_file_to_processed_folder()` method - def test_move_file_to_processed_folder(self, aws_client_with_mocked_boto_client: AWSClient): + def test_move_file_to_processed_folder( + self, aws_client_with_mocked_boto_client: AWSClient + ): """ Given a fake key for an item When `move_file_to_processed_folder()` @@ -149,13 +157,17 @@ def test_move_file_to_processed_folder(self, aws_client_with_mocked_boto_client: fake_archive_bucket_name = "fake-archive-bucket" spy_client = aws_client_with_mocked_boto_client._client aws_client_with_mocked_boto_client._bucket_name = fake_bucket_name - aws_client_with_mocked_boto_client._archive_bucket_name = fake_archive_bucket_name + aws_client_with_mocked_boto_client._archive_bucket_name = ( + fake_archive_bucket_name + ) # When aws_client_with_mocked_boto_client.move_file_to_processed_folder(key=fake_key) # Then - processed_key: str = aws_client_with_mocked_boto_client._build_processed_key(key=fake_key) + processed_key: str = aws_client_with_mocked_boto_client._build_processed_key( + key=fake_key + ) # Check that the call to copy the file is made correctly expected_copy_file_to_processed_call = mock.call.copy( CopySource={"Bucket": fake_bucket_name, "Key": fake_key}, @@ -166,14 +178,18 @@ def test_move_file_to_processed_folder(self, aws_client_with_mocked_boto_client: expected_copy_file_to_processed_archive_call = mock.call.copy( CopySource={"Bucket": fake_bucket_name, "Key": fake_key}, Bucket=fake_archive_bucket_name, - Key=aws_client_with_mocked_boto_client._build_processed_archive_key(key=fake_key), + Key=aws_client_with_mocked_boto_client._build_processed_archive_key( + key=fake_key + ), ExtraArgs={ "StorageClass": "GLACIER_IR", "MetadataDirective": "COPY", }, ) # Check that the call to delete the origin file is made correctly - expected_delete_file_from_origin_call = mock.call.delete_object(Bucket=fake_bucket_name, Key=fake_key) + expected_delete_file_from_origin_call = mock.call.delete_object( + Bucket=fake_bucket_name, Key=fake_key + ) expected_calls = [ expected_copy_file_to_processed_call, expected_copy_file_to_processed_archive_call, @@ -199,17 +215,23 @@ def test_move_file_to_processed_folder_records_correct_log( aws_client_with_mocked_boto_client.move_file_to_processed_folder(key=fake_key) # Then - expected_filename: str = aws_client_with_mocked_boto_client._get_filename_from_key(key=fake_key) - expected_inbound_folder: str = aws_client_with_mocked_boto_client._inbound_folder - expected_processed_folder: str = aws_client_with_mocked_boto_client._processed_folder - expected_log = ( - f"Moving `{expected_filename}` from `{expected_inbound_folder}` to `{expected_processed_folder}` in s3" + expected_filename: str = ( + aws_client_with_mocked_boto_client._get_filename_from_key(key=fake_key) + ) + expected_inbound_folder: str = ( + aws_client_with_mocked_boto_client._inbound_folder + ) + expected_processed_folder: str = ( + aws_client_with_mocked_boto_client._processed_folder ) + expected_log = f"Moving `{expected_filename}` from `{expected_inbound_folder}` to `{expected_processed_folder}` in s3" assert expected_log in caplog.text # Tests for the `move_file_to_failed_folder()` method - def test_move_file_to_failed_folder(self, aws_client_with_mocked_boto_client: AWSClient): + def test_move_file_to_failed_folder( + self, aws_client_with_mocked_boto_client: AWSClient + ): """ Given a fake key for an item When `move_file_to_failed_folder()` @@ -227,7 +249,9 @@ def test_move_file_to_failed_folder(self, aws_client_with_mocked_boto_client: AW # Then bucket_name: str = aws_client_with_mocked_boto_client._bucket_name - failed_key: str = aws_client_with_mocked_boto_client._build_failed_key(key=fake_key) + failed_key: str = aws_client_with_mocked_boto_client._build_failed_key( + key=fake_key + ) # Check that the call to copy the file is made correctly expected_copy_file_to_failed_call = mock.call.copy( @@ -236,7 +260,9 @@ def test_move_file_to_failed_folder(self, aws_client_with_mocked_boto_client: AW Key=failed_key, ) # Check that the call to delete the origin file is made correctly - expected_delete_file_from_origin_call = mock.call.delete_object(Bucket=bucket_name, Key=fake_key) + expected_delete_file_from_origin_call = mock.call.delete_object( + Bucket=bucket_name, Key=fake_key + ) expected_calls = [ expected_copy_file_to_failed_call, expected_delete_file_from_origin_call, @@ -261,12 +287,14 @@ def test_move_file_to_failed_folder_records_correct_log( aws_client_with_mocked_boto_client.move_file_to_failed_folder(key=fake_key) # Then - expected_filename: str = aws_client_with_mocked_boto_client._get_filename_from_key(key=fake_key) - expected_inbound_folder: str = aws_client_with_mocked_boto_client._inbound_folder - expected_failed_folder: str = aws_client_with_mocked_boto_client._failed_folder - expected_log = ( - f"Moving `{expected_filename}` from `{expected_inbound_folder}` to `{expected_failed_folder}` in s3" + expected_filename: str = ( + aws_client_with_mocked_boto_client._get_filename_from_key(key=fake_key) + ) + expected_inbound_folder: str = ( + aws_client_with_mocked_boto_client._inbound_folder ) + expected_failed_folder: str = aws_client_with_mocked_boto_client._failed_folder + expected_log = f"Moving `{expected_filename}` from `{expected_inbound_folder}` to `{expected_failed_folder}` in s3" assert expected_log in caplog.text # Tests for the _copy_file_to methods @@ -462,7 +490,9 @@ def test_copy_file_to_processed_archive_records_log_when_client_error_occurs( aws_client_with_mocked_boto_client._copy_file_to_processed_archive(key=key) # Then - _archive_bucket_name: str = aws_client_with_mocked_boto_client._archive_bucket_name + _archive_bucket_name: str = ( + aws_client_with_mocked_boto_client._archive_bucket_name + ) expected_log = f"Failed to move `{key}` to `{_archive_bucket_name}` bucket" assert expected_log in caplog.text @@ -510,7 +540,9 @@ def test_get_filename_from_key(self, aws_client_with_mocked_boto_client: AWSClie # This is similar to a filepath as we would see it on a filesystem # When - filename: str = aws_client_with_mocked_boto_client._get_filename_from_key(key=key) + filename: str = aws_client_with_mocked_boto_client._get_filename_from_key( + key=key + ) # Then assert filename == FAKE_FILE_NAME @@ -526,7 +558,9 @@ def test_build_processed_key(self, aws_client_with_mocked_boto_client: AWSClient fake_key = FAKE_KEY # When - processed_key: str = aws_client_with_mocked_boto_client._build_processed_key(key=fake_key) + processed_key: str = aws_client_with_mocked_boto_client._build_processed_key( + key=fake_key + ) # Then assert processed_key == f"processed/{FAKE_FILE_NAME}" @@ -542,13 +576,17 @@ def test_build_failed_key(self, aws_client_with_mocked_boto_client: AWSClient): fake_key = FAKE_KEY # When - failed_key: str = aws_client_with_mocked_boto_client._build_failed_key(key=fake_key) + failed_key: str = aws_client_with_mocked_boto_client._build_failed_key( + key=fake_key + ) # Then assert failed_key == f"failed/{FAKE_FILE_NAME}" @freezegun.freeze_time("2025-01-01") - def test_build_processed_archive_key(self, aws_client_with_mocked_boto_client: AWSClient): + def test_build_processed_archive_key( + self, aws_client_with_mocked_boto_client: AWSClient + ): """ Given a key from the s3 bucket for an item When `_build_processed_archive_key()` is called @@ -559,13 +597,19 @@ def test_build_processed_archive_key(self, aws_client_with_mocked_boto_client: A fake_key = FAKE_KEY # When - processed_archive_key: str = aws_client_with_mocked_boto_client._build_processed_archive_key(key=fake_key) + processed_archive_key: str = ( + aws_client_with_mocked_boto_client._build_processed_archive_key( + key=fake_key + ) + ) # Then expected_key = f"processed/2025-01-01/COVID-19/{FAKE_FILE_NAME}" assert processed_archive_key == expected_key - def test_upload_json_to_inbound_delegates_to_put_object(self, aws_client_with_mocked_boto_client: AWSClient): + def test_upload_json_to_inbound_delegates_to_put_object( + self, aws_client_with_mocked_boto_client: AWSClient + ): payload = {"key": "value"} aws_client_with_mocked_boto_client.upload_json_to_inbound( diff --git a/tests/unit/metrics/interfaces/management/test_seed_random.py b/tests/unit/metrics/interfaces/management/test_seed_random.py index c533e6fcd..e97d1d8a2 100644 --- a/tests/unit/metrics/interfaces/management/test_seed_random.py +++ b/tests/unit/metrics/interfaces/management/test_seed_random.py @@ -46,7 +46,9 @@ def _fake_age() -> Age: def _assert_progress_messages(progress_messages: list[str]) -> None: - assert any(message.startswith("Processed 1/1 metrics") for message in progress_messages) + assert any( + message.startswith("Processed 1/1 metrics") for message in progress_messages + ) assert any(message.startswith("Inserted ") for message in progress_messages) @@ -256,7 +258,12 @@ def test_seed_metrics_data_builds_expected_counts_and_calls( sub_theme=sub_themes[1], ), ] - metrics = [SimpleNamespace(name=f"Metric {index + 1}", topic=topics[index % len(topics)]) for index in range(4)] + metrics = [ + SimpleNamespace( + name=f"Metric {index + 1}", topic=topics[index % len(topics)] + ) + for index in range(4) + ] geography_types = [ SimpleNamespace(name="Nation"), SimpleNamespace(name="Lower Tier Local Authority"), @@ -303,7 +310,9 @@ def test_seed_metrics_data_builds_expected_counts_and_calls( is_public=False, progress_callback=spy_progress_callback, ) - spy_progress_callback.assert_any_call("Preparing metric taxonomy and geography records...") + spy_progress_callback.assert_any_call( + "Preparing metric taxonomy and geography records..." + ) spy_progress_callback.assert_any_call("Generating Core/API time series rows...") def test_truncate_metrics_data_deletes_from_all_models(self): @@ -325,7 +334,9 @@ def test_truncate_metrics_data_deletes_from_all_models(self): for model_name in model_names: manager = mock.MagicMock() managers[model_name] = manager - stack.enter_context(mock.patch(f"{MODULE_PATH}.{model_name}.objects", manager)) + stack.enter_context( + mock.patch(f"{MODULE_PATH}.{model_name}.objects", manager) + ) Command._truncate_metrics_data() @@ -365,7 +376,9 @@ def test_seed_time_series_rows_flushes_remainder( assert spy_api_time_series.call_args.kwargs["sex"] == "f" assert spy_core_time_series.call_args.kwargs["is_public"] is False assert spy_api_time_series.call_args.kwargs["is_public"] is False - progress_messages = [call.args[0] for call in spy_progress_callback.call_args_list] + progress_messages = [ + call.args[0] for call in spy_progress_callback.call_args_list + ] _assert_progress_messages(progress_messages) @mock.patch(f"{MODULE_PATH}.APITimeSeries") @@ -460,7 +473,10 @@ def test_build_theme_hierarchy_records_contains_expected_real_values(): assert "infectious_disease" in theme_names assert any(sub_theme == "respiratory" for sub_theme, _ in sub_theme_rows) - assert any(topic == "COVID-19" and sub_theme == "respiratory" for topic, sub_theme, _ in topic_rows) + assert any( + topic == "COVID-19" and sub_theme == "respiratory" + for topic, sub_theme, _ in topic_rows + ) def test_build_theme_hierarchy_records_skips_unmatched_topic_group(): @@ -488,7 +504,9 @@ def test_build_geography_seed_values_returns_required_count(): def test_format_enum_name_replaces_underscores_and_title_cases(): - assert Command._format_enum_name("LOWER_TIER_LOCAL_AUTHORITY") == ("Lower Tier Local Authority") + assert Command._format_enum_name("LOWER_TIER_LOCAL_AUTHORITY") == ( + "Lower Tier Local Authority" + ) @mock.patch.object(Command, "_upsert_topics") @@ -663,7 +681,9 @@ def test_seed_geographies_creates_missing_types_and_geographies( result = Command._seed_geographies(count=2) - assert [(geography.name, geography.geography_type.name) for geography in result] == [ + assert [ + (geography.name, geography.geography_type.name) for geography in result + ] == [ ("England", "Nation"), ("Area 2", "Lower Tier Local Authority"), ] @@ -798,7 +818,9 @@ def test_seed_metrics_data_to_s3_uploads_payloads_and_returns_counts( key="in/key.json", payload=payload, ) - spy_progress_callback.assert_any_call("Generating ingestion payloads for S3 upload...") + spy_progress_callback.assert_any_call( + "Generating ingestion payloads for S3 upload..." + ) spy_progress_callback.assert_any_call("Uploaded 1 files to ingest bucket in/.") @@ -860,4 +882,6 @@ def test_build_s3_object_key_builds_expected_file_name(): result = Command._build_s3_object_key(payload=payload, payload_index=7) - assert result == ("in/covid_19_cases_covid_19_cases_randombyday_1_E92000001_all_f_default_7.json") + assert result == ( + "in/covid_19_cases_covid_19_cases_randombyday_1_E92000001_all_f_default_7.json" + ) From 37da200e20b496d163ee637f06d8b32e859edf9c Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 13:06:40 +0000 Subject: [PATCH 19/36] properly ignore s311 for random usage --- metrics/interfaces/management/commands/seed_random.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 902f7a2b1..6543a6130 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -745,9 +745,7 @@ def _build_timeseries_ingestion_payloads( "geography": geography["name"], "geography_code": geography["geography_code"], "age": "all", - "sex": random.choice( - SEED_RANDOM_SEX_OPTIONS - ), # noqa: S311 # nosec B311 + "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # noqa: S311 "stratum": "default", "refresh_date": refresh_date, "time_series": time_series_rows, From aca6ea25c9f2c8b910f2c43d260fc14b990e829e Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 13:12:33 +0000 Subject: [PATCH 20/36] Ignore B311 for random usage in seed data --- metrics/interfaces/management/commands/seed_random.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 6543a6130..b26edf5e6 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -745,7 +745,7 @@ def _build_timeseries_ingestion_payloads( "geography": geography["name"], "geography_code": geography["geography_code"], "age": "all", - "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # noqa: S311 + "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # nosec "stratum": "default", "refresh_date": refresh_date, "time_series": time_series_rows, From ace51b4322cfecfb24131a0568a084694043b013 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 13:17:06 +0000 Subject: [PATCH 21/36] Satisfy both ruff and bandit for random usage --- metrics/interfaces/management/commands/seed_random.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index b26edf5e6..b04458bdd 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -745,7 +745,7 @@ def _build_timeseries_ingestion_payloads( "geography": geography["name"], "geography_code": geography["geography_code"], "age": "all", - "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # nosec + "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # noqa: S311 # nosec B311 "stratum": "default", "refresh_date": refresh_date, "time_series": time_series_rows, From 92ada1c9b78904ab1f3e9b8a51bbff5dd6ce5db7 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 13:29:07 +0000 Subject: [PATCH 22/36] Apply final formatting after nosec/noqa change --- metrics/interfaces/management/commands/seed_random.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index b04458bdd..902f7a2b1 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -745,7 +745,9 @@ def _build_timeseries_ingestion_payloads( "geography": geography["name"], "geography_code": geography["geography_code"], "age": "all", - "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # noqa: S311 # nosec B311 + "sex": random.choice( + SEED_RANDOM_SEX_OPTIONS + ), # noqa: S311 # nosec B311 "stratum": "default", "refresh_date": refresh_date, "time_series": time_series_rows, From 8bf6c55a1f7eb7fe887c5be0ebc5dc15b4d59f57 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 13:40:00 +0000 Subject: [PATCH 23/36] resolve lint --- .../management/commands/seed_random.py | 935 +----------------- 1 file changed, 49 insertions(+), 886 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 902f7a2b1..00c7a0d32 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -1,900 +1,63 @@ -import random -import re -import time -from collections.abc import Callable, Iterable -from datetime import date, datetime, timedelta -from decimal import Decimal -from operator import itemgetter -from typing import TypeVar, cast, override - -from django.core.management import CommandParser, call_command -from django.core.management.base import BaseCommand -from django.db import transaction -from django.db.models import Model - -from ingestion.aws_client import AWSClient -from metrics.data.enums import TimePeriod -from metrics.data.models.api_models import APITimeSeries -from metrics.data.models.core_models.supporting import ( - Age, - Geography, - GeographyType, - Metric, - Stratum, - SubTheme, - Theme, - Topic, -) -from metrics.data.models.core_models.timeseries import CoreTimeSeries -from validation import enums as validation_enums -from validation.geography_code import ( - NATION_GEOGRAPHY_CODES, - UNITED_KINGDOM_GEOGRAPHY_CODE, -) - -SCALE_CONFIGS = { - # Approximate time-series row counts generated by scale: - # small ~1,500, medium ~180,000, large ~7,300,000. - "small": {"geographies": 5, "metrics": 10, "days": 30}, - "medium": {"geographies": 20, "metrics": 50, "days": 180}, - "large": {"geographies": 100, "metrics": 200, "days": 365}, -} -SEED_RANDOM_SEX_OPTIONS = ("all", "f", "m") -TModel = TypeVar("TModel", bound=Model) - - -class Command(BaseCommand): - @override - def add_arguments(self, parser: CommandParser) -> None: - parser.add_argument( - "--dataset", - choices=["cms", "metrics", "both"], - default="both", - help="Which dataset to seed: CMS, metrics, or both.", - ) - parser.add_argument( - "--scale", - choices=["small", "medium", "large"], - default="small", - help="Size of the random metrics dataset to generate.", - ) - parser.add_argument( - "--seed", - type=int, - required=False, - default=None, - help="Optional random seed for reproducible metric values.", - ) - parser.add_argument( - "--truncate-first", - action="store_true", - default=False, - help="Clear existing metrics tables before seeding to avoid duplicates.", - ) - parser.add_argument( - "--delivery", - choices=["db", "s3"], - default="db", - help="Delivery mode for metrics dataset: database insert or s3 ingestion files.", - ) - parser.add_argument( - "--non-public", - action="store_true", - default=False, - help="Mark generated metric points as non-public (`is_public=False`).", - ) - - def handle(self, *args, **options) -> None: - started_at = time.perf_counter() - dataset: str = options["dataset"] - scale: str = options["scale"] - truncate_first: bool = options["truncate_first"] - delivery: str = options["delivery"] - is_public: bool = not options["non_public"] - - selected_seed = ( - options["seed"] if options["seed"] is not None else int(time.time()) - ) - random.seed(selected_seed) # nosec B311 - self.stdout.write(f"Seed used: {selected_seed}") - - should_seed_cms = dataset in {"cms", "both"} - should_seed_metrics = dataset in {"metrics", "both"} - - counts: dict[str, int] = { - "Theme": 0, - "SubTheme": 0, - "Topic": 0, - "Metric": 0, - "Geography": 0, - "CoreTimeSeries": 0, - "APITimeSeries": 0, - } - - if should_seed_metrics: - scale_config = SCALE_CONFIGS[scale] - self.stderr.write("Seeding metrics dataset...") - if delivery == "s3": - counts = self._seed_metrics_data_to_s3( - scale_config=scale_config, - is_public=is_public, - progress_callback=self.stderr.write, - ) - else: - counts = self._seed_metrics_data( - scale_config=scale_config, - truncate_first=truncate_first, - is_public=is_public, - progress_callback=self.stderr.write, - ) - self.stderr.write("Metrics dataset seeding complete.") - - if should_seed_cms: - self.stderr.write("Building CMS site data...") - call_command("build_cms_site") - self.stderr.write("CMS site build complete.") - - runtime_seconds = time.perf_counter() - started_at - self._print_summary( - dataset=dataset, - scale=scale, - seed=selected_seed, - counts=counts, - runtime_seconds=runtime_seconds, - ) - - @classmethod - def _seed_metrics_data( - cls, - *, - scale_config: dict[str, int], - truncate_first: bool, - is_public: bool, - progress_callback: Callable[[str], None] | None = None, - ) -> dict[str, int]: - """Seed supporting metric models and time series rows for the selected scale. - - Args: - scale_config: Scale-specific object counts for generated records. - truncate_first: Whether to clear existing metrics-related tables before seeding. - is_public: Whether generated metric rows should be marked as public. - progress_callback: Optional callback used to report progress updates. - - Returns: - Count of created records keyed by model or dataset name. - """ - if progress_callback is not None: - progress_callback("Preparing metric taxonomy and geography records...") - - with transaction.atomic(): - if truncate_first: - cls._truncate_metrics_data() - - themes, sub_themes, topics = cls._seed_theme_hierarchy() - metric_start_index = cls._get_next_random_metric_index() - - metrics = cls._bulk_create( - Metric, - [ - Metric( - name=f"Random Metric {metric_start_index + index}", - topic=topics[index % len(topics)], - ) - for index in range(scale_config["metrics"]) - ], - ) - - geographies = cls._seed_geographies(count=scale_config["geographies"]) - - stratum_record, _ = Stratum.objects.get_or_create(name="All") - age_record, _ = Age.objects.get_or_create(name="All ages") - stratum = cast(Stratum, stratum_record) - age = cast(Age, age_record) - - if progress_callback is not None: - progress_callback("Generating Core/API time series rows...") - core_count, api_count = cls._seed_time_series_rows( - metrics=metrics, - geographies=geographies, - stratum=stratum, - age=age, - days=scale_config["days"], - is_public=is_public, - progress_callback=progress_callback, - ) - - return { - "Theme": len(themes), - "SubTheme": len(sub_themes), - "Topic": len(topics), - "Metric": len(metrics), - "Geography": len(geographies), - "CoreTimeSeries": core_count, - "APITimeSeries": api_count, - } - - @classmethod - def _seed_metrics_data_to_s3( - cls, - *, - scale_config: dict[str, int], - is_public: bool, - progress_callback: Callable[[str], None] | None = None, - ) -> dict[str, int]: - if progress_callback is not None: - progress_callback("Generating ingestion payloads for S3 upload...") - - payloads = cls._build_timeseries_ingestion_payloads( - scale_config=scale_config, - is_public=is_public, - ) - client = AWSClient() - uploaded_files = 0 - for payload_index, payload in enumerate(payloads, start=1): - key = cls._build_s3_object_key(payload=payload, payload_index=payload_index) - client.upload_json_to_inbound(key=key, payload=payload) - uploaded_files += 1 - - if progress_callback is not None: - progress_callback( - f"Uploaded {uploaded_files:,} files to ingest bucket in/." - ) - - topic_rows = cls._build_theme_hierarchy_records()[2] - theme_count = len({theme_name for _, _, theme_name in topic_rows}) - sub_theme_count = len( - { - (sub_theme_name, theme_name) - for _, sub_theme_name, theme_name in topic_rows - } - ) - topic_count = len( - { - (topic_name, sub_theme_name, theme_name) - for topic_name, sub_theme_name, theme_name in topic_rows - } - ) - geography_count = len( - cls._build_geography_seed_values(count=scale_config["geographies"]) - ) - row_count = scale_config["metrics"] * geography_count * scale_config["days"] - - return { - "Theme": theme_count, - "SubTheme": sub_theme_count, - "Topic": topic_count, - "Metric": scale_config["metrics"], - "Geography": geography_count, - "CoreTimeSeries": row_count, - "APITimeSeries": row_count, - } - - @classmethod - def _truncate_metrics_data(cls) -> None: - """Delete all seeded metrics-related rows in dependency-safe order.""" - APITimeSeries.objects.all().delete() - CoreTimeSeries.objects.all().delete() - Metric.objects.all().delete() - Topic.objects.all().delete() - SubTheme.objects.all().delete() - Theme.objects.all().delete() - Geography.objects.all().delete() - GeographyType.objects.all().delete() - Age.objects.all().delete() - Stratum.objects.all().delete() - - @classmethod - def _seed_time_series_rows( - cls, - *, - metrics: list[Metric], - geographies: list[Geography], - stratum: Stratum, - age: Age, - days: int, - is_public: bool, - progress_callback: Callable[[str], None] | None = None, - ) -> tuple[int, int]: - frequency = TimePeriod.Weekly.value - start_date = date.today() - timedelta(days=days - 1) - batch_size = 5000 - core_rows: list[CoreTimeSeries] = [] - api_rows: list[APITimeSeries] = [] - core_count = 0 - api_count = 0 - total_metrics = len(metrics) - total_row_count = total_metrics * len(geographies) * days - log_interval = max(1, total_metrics // 10) if total_metrics else 1 - - for metric_index, metric in enumerate(metrics, start=1): - for core_row, api_row in cls._build_time_series_rows_for_metric( - metric=metric, - geographies=geographies, - stratum=stratum, - age=age, - days=days, - is_public=is_public, - start_date=start_date, - frequency=frequency, - ): - core_rows.append(core_row) - core_rows, core_count = cls._flush_batch( - model=CoreTimeSeries, - rows=core_rows, - batch_size=batch_size, - current_count=core_count, - ) - - api_rows.append(api_row) - api_rows, api_count = cls._flush_batch( - model=APITimeSeries, - rows=api_rows, - batch_size=batch_size, - current_count=api_count, - ) - - if progress_callback is not None and ( - metric_index == total_metrics or metric_index % log_interval == 0 - ): - processed_row_count = metric_index * len(geographies) * days - progress_callback( - f"Processed {metric_index}/{total_metrics} metrics " - f"({processed_row_count:,}/{total_row_count:,} row groups)." - ) - - core_count = cls._flush_remaining( - model=CoreTimeSeries, - rows=core_rows, - batch_size=batch_size, - current_count=core_count, - ) - api_count = cls._flush_remaining( - model=APITimeSeries, - rows=api_rows, - batch_size=batch_size, - current_count=api_count, - ) - - if progress_callback is not None: - progress_callback( - f"Inserted {core_count:,} CoreTimeSeries rows and {api_count:,} APITimeSeries rows." - ) - - return core_count, api_count - - @staticmethod - def _flush_batch( - *, - model: type[TModel], - rows: list[TModel], - batch_size: int, - current_count: int, - ) -> tuple[list[TModel], int]: - if len(rows) < batch_size: - return rows, current_count - - model.objects.bulk_create(rows, batch_size=batch_size) - return [], current_count + len(rows) - - @staticmethod - def _flush_remaining( - *, - model: type[TModel], - rows: list[TModel], - batch_size: int, - current_count: int, - ) -> int: - if not rows: - return current_count - - model.objects.bulk_create(rows, batch_size=batch_size) - return current_count + len(rows) - - @classmethod - def _seed_theme_hierarchy(cls) -> tuple[list[Theme], list[SubTheme], list[Topic]]: - theme_names, sub_theme_rows, topic_rows = cls._build_theme_hierarchy_records() - themes, themes_by_name = cls._upsert_themes(theme_names=theme_names) - sub_themes, sub_themes_by_key = cls._upsert_sub_themes( - theme_names=theme_names, - sub_theme_rows=sub_theme_rows, - themes_by_name=themes_by_name, - ) - topics = cls._upsert_topics( - topic_rows=topic_rows, - sub_themes_by_key=sub_themes_by_key, - ) - return themes, sub_themes, topics - - @classmethod - def _upsert_themes( - cls, - *, - theme_names: list[str], - ) -> tuple[list[Theme], dict[str, Theme]]: - themes_by_name = { - theme.name: theme for theme in Theme.objects.filter(name__in=theme_names) - } - missing_theme_names = [ - name for name in theme_names if name not in themes_by_name - ] - if missing_theme_names: - cls._bulk_create(Theme, [Theme(name=name) for name in missing_theme_names]) - themes_by_name.update( - { - theme.name: theme - for theme in Theme.objects.filter(name__in=missing_theme_names) - } - ) - return [themes_by_name[name] for name in theme_names], themes_by_name - - @classmethod - def _upsert_sub_themes( - cls, - *, - theme_names: list[str], - sub_theme_rows: list[tuple[str, str]], - themes_by_name: dict[str, Theme], - ) -> tuple[list[SubTheme], dict[tuple[str, str], SubTheme]]: - sub_theme_keys = list(dict.fromkeys(sub_theme_rows)) - existing_sub_themes = SubTheme.objects.select_related("theme").filter( - theme__name__in=theme_names, - name__in={name for name, _ in sub_theme_keys}, - ) - sub_themes_by_key = { - (sub_theme.name, sub_theme.theme.name): sub_theme - for sub_theme in existing_sub_themes - } - missing_sub_theme_keys = [ - (sub_theme_name, theme_name) - for sub_theme_name, theme_name in sub_theme_keys - if (sub_theme_name, theme_name) not in sub_themes_by_key - ] - if missing_sub_theme_keys: - cls._bulk_create( - SubTheme, - [ - SubTheme(name=sub_theme_name, theme=themes_by_name[theme_name]) - for sub_theme_name, theme_name in missing_sub_theme_keys - ], - ) - sub_themes_by_key.update( - { - (sub_theme.name, sub_theme.theme.name): sub_theme - for sub_theme in SubTheme.objects.select_related("theme").filter( - theme__name__in={ - theme_name for _, theme_name in missing_sub_theme_keys - }, - name__in={ - sub_theme_name - for sub_theme_name, _ in missing_sub_theme_keys - }, - ) - } - ) - return [sub_themes_by_key[key] for key in sub_theme_keys], sub_themes_by_key - - @classmethod - def _upsert_topics( - cls, - *, - topic_rows: list[tuple[str, str, str]], - sub_themes_by_key: dict[tuple[str, str], SubTheme], - ) -> list[Topic]: - topic_keys = list(dict.fromkeys(topic_rows)) - sub_themes_by_id_key = { - (sub_theme_name, theme_name): sub_themes_by_key[ - (sub_theme_name, theme_name) - ] - for _, sub_theme_name, theme_name in topic_keys - } - candidate_sub_theme_ids = [ - sub_theme.id for sub_theme in sub_themes_by_id_key.values() - ] - existing_topics = Topic.objects.filter( - sub_theme_id__in=candidate_sub_theme_ids, - name__in={topic_name for topic_name, _, _ in topic_keys}, - ) - topics_by_key = { - (topic.name, topic.sub_theme_id): topic for topic in existing_topics - } - missing_topic_keys = [ - topic_key - for topic_key in topic_keys - if ( - topic_key[0], - sub_themes_by_id_key[(topic_key[1], topic_key[2])].id, - ) - not in topics_by_key - ] - if missing_topic_keys: - cls._bulk_create( - Topic, - [ - Topic( - name=topic_name, - sub_theme=sub_themes_by_id_key[(sub_theme_name, theme_name)], - ) - for topic_name, sub_theme_name, theme_name in missing_topic_keys - ], - ) - topics_by_key.update( - { - (topic.name, topic.sub_theme_id): topic - for topic in Topic.objects.filter( - sub_theme_id__in=[ - sub_themes_by_id_key[(sub_theme_name, theme_name)].id - for _, sub_theme_name, theme_name in missing_topic_keys - ], - name__in={ - topic_name for topic_name, _, _ in missing_topic_keys - }, - ) - } - ) - return [ - topics_by_key[ - ( - topic_name, - sub_themes_by_id_key[(sub_theme_name, theme_name)].id, - ) - ] - for topic_name, sub_theme_name, theme_name in topic_keys - ] - - @classmethod - def _seed_geographies(cls, *, count: int) -> list[Geography]: - geography_seed_values = cls._build_geography_seed_values(count=count) - geography_type_names = { - record["geography_type"] for record in geography_seed_values - } - geography_type_names = sorted(geography_type_names) - geography_types_by_name = { - geography_type.name: geography_type - for geography_type in GeographyType.objects.filter( - name__in=geography_type_names - ) - } - missing_geography_type_names = [ - name for name in geography_type_names if name not in geography_types_by_name - ] - if missing_geography_type_names: - cls._bulk_create( - GeographyType, - [GeographyType(name=name) for name in missing_geography_type_names], - ) - geography_types_by_name.update( - { - geography_type.name: geography_type - for geography_type in GeographyType.objects.filter( - name__in=missing_geography_type_names - ) - } - ) - geography_types_by_name = { - name: geography_types_by_name[name] for name in geography_type_names - } - - geography_keys = list( - dict.fromkeys( - (record["name"], record["geography_type"], record["geography_code"]) - for record in geography_seed_values - ) - ) - existing_geographies = Geography.objects.select_related( - "geography_type" - ).filter( - name__in={name for name, _, _ in geography_keys}, - geography_type__name__in={ - geography_type for _, geography_type, _ in geography_keys - }, - ) - geographies_by_key = { - (geography.name, geography.geography_type.name): geography - for geography in existing_geographies - } - missing_geography_keys = [ - (name, geography_type, geography_code) - for name, geography_type, geography_code in geography_keys - if (name, geography_type) not in geographies_by_key - ] - if missing_geography_keys: - cls._bulk_create( - Geography, - [ - Geography( - name=name, - geography_code=geography_code, - geography_type=geography_types_by_name[geography_type], - ) - for name, geography_type, geography_code in missing_geography_keys - ], - ) - geographies_by_key.update( - { - (geography.name, geography.geography_type.name): geography - for geography in Geography.objects.select_related( - "geography_type" - ).filter( - name__in={name for name, _, _ in missing_geography_keys}, - geography_type__name__in={ - geography_type - for _, geography_type, _ in missing_geography_keys - }, - ) - } - ) - - return [ - geographies_by_key[(name, geography_type)] - for name, geography_type, _ in geography_keys - ] - - @classmethod - def _build_time_series_rows_for_metric( - cls, - *, - metric: Metric, - geographies: list[Geography], - stratum: Stratum, - age: Age, - days: int, - is_public: bool, - start_date: date, - frequency: str, - ) -> Iterable[tuple[CoreTimeSeries, APITimeSeries]]: - topic = metric.topic - sub_theme = topic.sub_theme - theme = sub_theme.theme +@classmethod +def _build_timeseries_ingestion_payloads( + cls, + *, + scale_config: dict[str, int], + is_public: bool, +) -> list[dict[str, object]]: + _, _, topic_rows = cls._build_theme_hierarchy_records() + geographies = cls._build_geography_seed_values(count=scale_config["geographies"]) + refresh_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + start_date = date.today() - timedelta(days=scale_config["days"] - 1) + payloads: list[dict[str, object]] = [] + + for metric_index in range(scale_config["metrics"]): + topic_name, sub_theme_name, theme_name = topic_rows[ + metric_index % len(topic_rows) + ] + metric_name = f"{topic_name}_cases_randomByDay_{metric_index + 1}" for geography in geographies: - for day_offset in range(days): + time_series_rows: list[dict[str, object]] = [] + + for day_offset in range(scale_config["days"]): current_date = start_date + timedelta(days=day_offset) - base_value = random.uniform(5.0, 250.0) # noqa: S311 # nosec B311 + metric_value = round( - base_value - + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 + random.uniform(5.0, 250.0), # noqa: S311 2, ) - sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 - epidemiological_week = current_date.isocalendar().week - - yield ( - CoreTimeSeries( - metric=metric, - metric_frequency=frequency, - geography=geography, - stratum=stratum, - age=age, - sex=sex, - year=current_date.year, - month=current_date.month, - epiweek=epidemiological_week, - date=current_date, - metric_value=Decimal(str(metric_value)), - is_public=is_public, - ), - APITimeSeries( - metric_frequency=frequency, - age=age.name, - month=current_date.month, - geography_code=geography.geography_code, - metric_group=None, - theme=theme.name, - sub_theme=sub_theme.name, - topic=topic.name, - geography_type=geography.geography_type.name, - geography=geography.name, - metric=metric.name, - stratum=stratum.name, - sex=sex, - year=current_date.year, - epiweek=epidemiological_week, - date=current_date, - metric_value=float(metric_value), - is_public=is_public, - ), - ) - - @classmethod - def _build_timeseries_ingestion_payloads( - cls, - *, - scale_config: dict[str, int], - is_public: bool, - ) -> list[dict[str, object]]: - _, _, topic_rows = cls._build_theme_hierarchy_records() - geographies = cls._build_geography_seed_values( - count=scale_config["geographies"] - ) - refresh_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - start_date = date.today() - timedelta(days=scale_config["days"] - 1) - payloads: list[dict[str, object]] = [] - for metric_index in range(scale_config["metrics"]): - topic_name, sub_theme_name, theme_name = topic_rows[ - metric_index % len(topic_rows) - ] - metric_name = f"{topic_name}_cases_randomByDay_{metric_index + 1}" - for geography in geographies: - time_series_rows: list[dict[str, object]] = [] - for day_offset in range(scale_config["days"]): - current_date = start_date + timedelta(days=day_offset) - metric_value = round( - random.uniform(5.0, 250.0), # noqa: S311 # nosec B311 - 2, - ) - time_series_rows.append( - { - "epiweek": current_date.isocalendar().week, - "date": current_date.isoformat(), - "metric_value": metric_value, - "embargo": None, - "is_public": is_public, - } - ) - - payloads.append( + time_series_rows.append( { - "parent_theme": theme_name, - "child_theme": sub_theme_name, - "topic": topic_name, - "metric_group": "cases", - "metric": metric_name, - "metric_frequency": TimePeriod.Weekly.value, - "geography_type": geography["geography_type"], - "geography": geography["name"], - "geography_code": geography["geography_code"], - "age": "all", - "sex": random.choice( - SEED_RANDOM_SEX_OPTIONS - ), # noqa: S311 # nosec B311 - "stratum": "default", - "refresh_date": refresh_date, - "time_series": time_series_rows, + "epiweek": current_date.isocalendar().week, + "date": current_date.isoformat(), + "metric_value": metric_value, + "embargo": None, + "is_public": is_public, } ) - return payloads - - @classmethod - def _build_s3_object_key( - cls, - *, - payload: dict[str, object], - payload_index: int, - ) -> str: - topic_name = str(payload["topic"]) - metric_name = str(payload["metric"]) - geography_code = str(payload["geography_code"]) - age = str(payload["age"]) - sex = str(payload["sex"]) - stratum = str(payload["stratum"]) - safe_topic = cls._normalise_key(topic_name) - safe_metric = cls._normalise_key(metric_name) - return f"in/{safe_topic}_cases_{safe_metric}_{geography_code}_{age}_{sex}_{stratum}_{payload_index}.json" - - @staticmethod - def _bulk_create(model: type[TModel], records: Iterable[TModel]) -> list[TModel]: - """Materialise and bulk insert a sequence of model instances.""" - return model.objects.bulk_create(list(records)) - - @staticmethod - def _get_next_random_metric_index() -> int: - max_metric_index = 0 - for metric_name in Metric.objects.filter( - name__startswith="Random Metric " - ).values_list( - "name", - flat=True, - ): - match = re.fullmatch(r"Random Metric (\d+)", metric_name) - if match is None: - continue - max_metric_index = max(max_metric_index, int(match.group(1))) - return max_metric_index + 1 - - @classmethod - def _build_theme_hierarchy_records( - cls, - ) -> tuple[list[str], list[tuple[str, str]], list[tuple[str, str, str]]]: - child_to_parent: dict[str, str] = {} - normalised_to_child: dict[str, str] = {} - parent_by_name = validation_enums.ParentTheme.__members__ + # ✅ FIX: move random.choice OUTSIDE dict + sex_value = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 - for child_theme_group in validation_enums.ChildTheme: - resolved_parent = ( - parent_by_name[child_theme_group.name].value - if child_theme_group.name in parent_by_name - else validation_enums.ParentTheme.INFECTIOUS_DISEASE.value - ) - for sub_theme_name in child_theme_group.return_list(): - child_to_parent[sub_theme_name] = resolved_parent - normalised_to_child[cls._normalise_key(sub_theme_name)] = sub_theme_name - - topic_rows: list[tuple[str, str, str]] = [] - sub_theme_pairs: set[tuple[str, str]] = set() - for topic_group in validation_enums.Topic: - normalised_topic_group = cls._normalise_key(topic_group.name) - sub_theme_name = normalised_to_child.get(normalised_topic_group) - if sub_theme_name is None: - continue - - parent_theme_name = child_to_parent[sub_theme_name] - sub_theme_pairs.add((sub_theme_name, parent_theme_name)) - topic_rows.extend( - (topic_value, sub_theme_name, parent_theme_name) - for topic_value in topic_group.return_list() + payloads.append( + { + "parent_theme": theme_name, + "child_theme": sub_theme_name, + "topic": topic_name, + "metric_group": "cases", + "metric": metric_name, + "metric_frequency": TimePeriod.Weekly.value, + "geography_type": geography["geography_type"], + "geography": geography["name"], + "geography_code": geography["geography_code"], + "age": "all", + "sex": sex_value, + "stratum": "default", + "refresh_date": refresh_date, + "time_series": time_series_rows, + } ) - theme_names = sorted({parent_name for _, parent_name in sub_theme_pairs}) - sub_theme_rows = sorted( - sub_theme_pairs, - key=itemgetter(1, 0), - ) - return theme_names, sub_theme_rows, topic_rows - - @classmethod - def _build_geography_seed_values(cls, *, count: int) -> list[dict[str, str]]: - geographies: list[dict[str, str]] = [ - { - "name": "United Kingdom", - "geography_code": UNITED_KINGDOM_GEOGRAPHY_CODE, - "geography_type": (validation_enums.GeographyType.UNITED_KINGDOM.value), - } - ] - - geographies.extend( - { - "name": name, - "geography_code": code, - "geography_type": validation_enums.GeographyType.NATION.value, - } - for name, code in NATION_GEOGRAPHY_CODES.items() - ) - - if len(geographies) >= count: - return geographies[:count] - - extra_required = count - len(geographies) - geographies.extend( - { - "name": cls._format_enum_name(ltla.name), - "geography_code": ltla.value, - "geography_type": ( - validation_enums.GeographyType.LOWER_TIER_LOCAL_AUTHORITY.value - ), - } - for ltla in list(validation_enums.LTLAs)[:extra_required] - ) - return geographies[:count] - - @staticmethod - def _normalise_key(value: str) -> str: - return value.lower().replace("-", "_") - - @staticmethod - def _format_enum_name(value: str) -> str: - return value.replace("_", " ").title() - - def _print_summary( - self, - *, - dataset: str, - scale: str, - seed: int, - counts: dict[str, int], - runtime_seconds: float, - ) -> None: - self.stdout.write("") - self.stdout.write("Seed random summary:") - self.stdout.write(f" dataset: {dataset}") - self.stdout.write(f" scale: {scale}") - self.stdout.write(f" seed used: {seed}") - self.stdout.write(f" Theme: {counts['Theme']}") - self.stdout.write(f" SubTheme: {counts['SubTheme']}") - self.stdout.write(f" Topic: {counts['Topic']}") - self.stdout.write(f" Metric: {counts['Metric']}") - self.stdout.write(f" Geography: {counts['Geography']}") - self.stdout.write(f" CoreTimeSeries: {counts['CoreTimeSeries']}") - self.stdout.write(f" APITimeSeries: {counts['APITimeSeries']}") - self.stdout.write(f" runtime seconds: {runtime_seconds:.2f}") + return payloads From cf8793413a62c377483141ab75c83633afc20770 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 13:45:10 +0000 Subject: [PATCH 24/36] Supress bandit B311 --- metrics/interfaces/management/commands/seed_random.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 00c7a0d32..82879ff91 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -24,7 +24,7 @@ def _build_timeseries_ingestion_payloads( current_date = start_date + timedelta(days=day_offset) metric_value = round( - random.uniform(5.0, 250.0), # noqa: S311 + random.uniform(5.0, 250.0), # noqa: B311 2, ) @@ -38,7 +38,6 @@ def _build_timeseries_ingestion_payloads( } ) - # ✅ FIX: move random.choice OUTSIDE dict sex_value = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 payloads.append( From f1702d569058f6ea4939e64afca8e9c32b835313 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 13:49:39 +0000 Subject: [PATCH 25/36] Revert changes --- .../management/commands/seed_random.py | 932 +++++++++++++++++- 1 file changed, 884 insertions(+), 48 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 82879ff91..44ba5f2eb 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -1,62 +1,898 @@ -@classmethod -def _build_timeseries_ingestion_payloads( - cls, - *, - scale_config: dict[str, int], - is_public: bool, -) -> list[dict[str, object]]: - _, _, topic_rows = cls._build_theme_hierarchy_records() - geographies = cls._build_geography_seed_values(count=scale_config["geographies"]) - refresh_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") - start_date = date.today() - timedelta(days=scale_config["days"] - 1) - payloads: list[dict[str, object]] = [] - - for metric_index in range(scale_config["metrics"]): - topic_name, sub_theme_name, theme_name = topic_rows[ - metric_index % len(topic_rows) - ] - metric_name = f"{topic_name}_cases_randomByDay_{metric_index + 1}" +import random +import re +import time +from collections.abc import Callable, Iterable +from datetime import date, datetime, timedelta +from decimal import Decimal +from operator import itemgetter +from typing import TypeVar, cast, override - for geography in geographies: - time_series_rows: list[dict[str, object]] = [] +from django.core.management import CommandParser, call_command +from django.core.management.base import BaseCommand +from django.db import transaction +from django.db.models import Model - for day_offset in range(scale_config["days"]): - current_date = start_date + timedelta(days=day_offset) +from ingestion.aws_client import AWSClient +from metrics.data.enums import TimePeriod +from metrics.data.models.api_models import APITimeSeries +from metrics.data.models.core_models.supporting import ( + Age, + Geography, + GeographyType, + Metric, + Stratum, + SubTheme, + Theme, + Topic, +) +from metrics.data.models.core_models.timeseries import CoreTimeSeries +from validation import enums as validation_enums +from validation.geography_code import ( + NATION_GEOGRAPHY_CODES, + UNITED_KINGDOM_GEOGRAPHY_CODE, +) + +SCALE_CONFIGS = { + # Approximate time-series row counts generated by scale: + # small ~1,500, medium ~180,000, large ~7,300,000. + "small": {"geographies": 5, "metrics": 10, "days": 30}, + "medium": {"geographies": 20, "metrics": 50, "days": 180}, + "large": {"geographies": 100, "metrics": 200, "days": 365}, +} +SEED_RANDOM_SEX_OPTIONS = ("all", "f", "m") +TModel = TypeVar("TModel", bound=Model) + + +class Command(BaseCommand): + @override + def add_arguments(self, parser: CommandParser) -> None: + parser.add_argument( + "--dataset", + choices=["cms", "metrics", "both"], + default="both", + help="Which dataset to seed: CMS, metrics, or both.", + ) + parser.add_argument( + "--scale", + choices=["small", "medium", "large"], + default="small", + help="Size of the random metrics dataset to generate.", + ) + parser.add_argument( + "--seed", + type=int, + required=False, + default=None, + help="Optional random seed for reproducible metric values.", + ) + parser.add_argument( + "--truncate-first", + action="store_true", + default=False, + help="Clear existing metrics tables before seeding to avoid duplicates.", + ) + parser.add_argument( + "--delivery", + choices=["db", "s3"], + default="db", + help="Delivery mode for metrics dataset: database insert or s3 ingestion files.", + ) + parser.add_argument( + "--non-public", + action="store_true", + default=False, + help="Mark generated metric points as non-public (`is_public=False`).", + ) + + def handle(self, *args, **options) -> None: + started_at = time.perf_counter() + dataset: str = options["dataset"] + scale: str = options["scale"] + truncate_first: bool = options["truncate_first"] + delivery: str = options["delivery"] + is_public: bool = not options["non_public"] + + selected_seed = ( + options["seed"] if options["seed"] is not None else int(time.time()) + ) + random.seed(selected_seed) # nosec B311 + self.stdout.write(f"Seed used: {selected_seed}") + + should_seed_cms = dataset in {"cms", "both"} + should_seed_metrics = dataset in {"metrics", "both"} + + counts: dict[str, int] = { + "Theme": 0, + "SubTheme": 0, + "Topic": 0, + "Metric": 0, + "Geography": 0, + "CoreTimeSeries": 0, + "APITimeSeries": 0, + } + + if should_seed_metrics: + scale_config = SCALE_CONFIGS[scale] + self.stderr.write("Seeding metrics dataset...") + if delivery == "s3": + counts = self._seed_metrics_data_to_s3( + scale_config=scale_config, + is_public=is_public, + progress_callback=self.stderr.write, + ) + else: + counts = self._seed_metrics_data( + scale_config=scale_config, + truncate_first=truncate_first, + is_public=is_public, + progress_callback=self.stderr.write, + ) + self.stderr.write("Metrics dataset seeding complete.") + + if should_seed_cms: + self.stderr.write("Building CMS site data...") + call_command("build_cms_site") + self.stderr.write("CMS site build complete.") + + runtime_seconds = time.perf_counter() - started_at + self._print_summary( + dataset=dataset, + scale=scale, + seed=selected_seed, + counts=counts, + runtime_seconds=runtime_seconds, + ) + + @classmethod + def _seed_metrics_data( + cls, + *, + scale_config: dict[str, int], + truncate_first: bool, + is_public: bool, + progress_callback: Callable[[str], None] | None = None, + ) -> dict[str, int]: + """Seed supporting metric models and time series rows for the selected scale. + + Args: + scale_config: Scale-specific object counts for generated records. + truncate_first: Whether to clear existing metrics-related tables before seeding. + is_public: Whether generated metric rows should be marked as public. + progress_callback: Optional callback used to report progress updates. + + Returns: + Count of created records keyed by model or dataset name. + """ + if progress_callback is not None: + progress_callback("Preparing metric taxonomy and geography records...") + + with transaction.atomic(): + if truncate_first: + cls._truncate_metrics_data() + + themes, sub_themes, topics = cls._seed_theme_hierarchy() + metric_start_index = cls._get_next_random_metric_index() + + metrics = cls._bulk_create( + Metric, + [ + Metric( + name=f"Random Metric {metric_start_index + index}", + topic=topics[index % len(topics)], + ) + for index in range(scale_config["metrics"]) + ], + ) + + geographies = cls._seed_geographies(count=scale_config["geographies"]) + + stratum_record, _ = Stratum.objects.get_or_create(name="All") + age_record, _ = Age.objects.get_or_create(name="All ages") + stratum = cast(Stratum, stratum_record) + age = cast(Age, age_record) + + if progress_callback is not None: + progress_callback("Generating Core/API time series rows...") + core_count, api_count = cls._seed_time_series_rows( + metrics=metrics, + geographies=geographies, + stratum=stratum, + age=age, + days=scale_config["days"], + is_public=is_public, + progress_callback=progress_callback, + ) + + return { + "Theme": len(themes), + "SubTheme": len(sub_themes), + "Topic": len(topics), + "Metric": len(metrics), + "Geography": len(geographies), + "CoreTimeSeries": core_count, + "APITimeSeries": api_count, + } + + @classmethod + def _seed_metrics_data_to_s3( + cls, + *, + scale_config: dict[str, int], + is_public: bool, + progress_callback: Callable[[str], None] | None = None, + ) -> dict[str, int]: + if progress_callback is not None: + progress_callback("Generating ingestion payloads for S3 upload...") + + payloads = cls._build_timeseries_ingestion_payloads( + scale_config=scale_config, + is_public=is_public, + ) + client = AWSClient() + uploaded_files = 0 + for payload_index, payload in enumerate(payloads, start=1): + key = cls._build_s3_object_key(payload=payload, payload_index=payload_index) + client.upload_json_to_inbound(key=key, payload=payload) + uploaded_files += 1 + + if progress_callback is not None: + progress_callback( + f"Uploaded {uploaded_files:,} files to ingest bucket in/." + ) + + topic_rows = cls._build_theme_hierarchy_records()[2] + theme_count = len({theme_name for _, _, theme_name in topic_rows}) + sub_theme_count = len( + { + (sub_theme_name, theme_name) + for _, sub_theme_name, theme_name in topic_rows + } + ) + topic_count = len( + { + (topic_name, sub_theme_name, theme_name) + for topic_name, sub_theme_name, theme_name in topic_rows + } + ) + geography_count = len( + cls._build_geography_seed_values(count=scale_config["geographies"]) + ) + row_count = scale_config["metrics"] * geography_count * scale_config["days"] + + return { + "Theme": theme_count, + "SubTheme": sub_theme_count, + "Topic": topic_count, + "Metric": scale_config["metrics"], + "Geography": geography_count, + "CoreTimeSeries": row_count, + "APITimeSeries": row_count, + } + + @classmethod + def _truncate_metrics_data(cls) -> None: + """Delete all seeded metrics-related rows in dependency-safe order.""" + APITimeSeries.objects.all().delete() + CoreTimeSeries.objects.all().delete() + Metric.objects.all().delete() + Topic.objects.all().delete() + SubTheme.objects.all().delete() + Theme.objects.all().delete() + Geography.objects.all().delete() + GeographyType.objects.all().delete() + Age.objects.all().delete() + Stratum.objects.all().delete() + + @classmethod + def _seed_time_series_rows( + cls, + *, + metrics: list[Metric], + geographies: list[Geography], + stratum: Stratum, + age: Age, + days: int, + is_public: bool, + progress_callback: Callable[[str], None] | None = None, + ) -> tuple[int, int]: + frequency = TimePeriod.Weekly.value + start_date = date.today() - timedelta(days=days - 1) + batch_size = 5000 + core_rows: list[CoreTimeSeries] = [] + api_rows: list[APITimeSeries] = [] + core_count = 0 + api_count = 0 + total_metrics = len(metrics) + total_row_count = total_metrics * len(geographies) * days + log_interval = max(1, total_metrics // 10) if total_metrics else 1 + + for metric_index, metric in enumerate(metrics, start=1): + for core_row, api_row in cls._build_time_series_rows_for_metric( + metric=metric, + geographies=geographies, + stratum=stratum, + age=age, + days=days, + is_public=is_public, + start_date=start_date, + frequency=frequency, + ): + core_rows.append(core_row) + core_rows, core_count = cls._flush_batch( + model=CoreTimeSeries, + rows=core_rows, + batch_size=batch_size, + current_count=core_count, + ) + + api_rows.append(api_row) + api_rows, api_count = cls._flush_batch( + model=APITimeSeries, + rows=api_rows, + batch_size=batch_size, + current_count=api_count, + ) + + if progress_callback is not None and ( + metric_index == total_metrics or metric_index % log_interval == 0 + ): + processed_row_count = metric_index * len(geographies) * days + progress_callback( + f"Processed {metric_index}/{total_metrics} metrics " + f"({processed_row_count:,}/{total_row_count:,} row groups)." + ) + + core_count = cls._flush_remaining( + model=CoreTimeSeries, + rows=core_rows, + batch_size=batch_size, + current_count=core_count, + ) + api_count = cls._flush_remaining( + model=APITimeSeries, + rows=api_rows, + batch_size=batch_size, + current_count=api_count, + ) + + if progress_callback is not None: + progress_callback( + f"Inserted {core_count:,} CoreTimeSeries rows and {api_count:,} APITimeSeries rows." + ) + + return core_count, api_count + + @staticmethod + def _flush_batch( + *, + model: type[TModel], + rows: list[TModel], + batch_size: int, + current_count: int, + ) -> tuple[list[TModel], int]: + if len(rows) < batch_size: + return rows, current_count + + model.objects.bulk_create(rows, batch_size=batch_size) + return [], current_count + len(rows) + + @staticmethod + def _flush_remaining( + *, + model: type[TModel], + rows: list[TModel], + batch_size: int, + current_count: int, + ) -> int: + if not rows: + return current_count + + model.objects.bulk_create(rows, batch_size=batch_size) + return current_count + len(rows) + + @classmethod + def _seed_theme_hierarchy(cls) -> tuple[list[Theme], list[SubTheme], list[Topic]]: + theme_names, sub_theme_rows, topic_rows = cls._build_theme_hierarchy_records() + themes, themes_by_name = cls._upsert_themes(theme_names=theme_names) + sub_themes, sub_themes_by_key = cls._upsert_sub_themes( + theme_names=theme_names, + sub_theme_rows=sub_theme_rows, + themes_by_name=themes_by_name, + ) + topics = cls._upsert_topics( + topic_rows=topic_rows, + sub_themes_by_key=sub_themes_by_key, + ) + return themes, sub_themes, topics + + @classmethod + def _upsert_themes( + cls, + *, + theme_names: list[str], + ) -> tuple[list[Theme], dict[str, Theme]]: + themes_by_name = { + theme.name: theme for theme in Theme.objects.filter(name__in=theme_names) + } + missing_theme_names = [ + name for name in theme_names if name not in themes_by_name + ] + if missing_theme_names: + cls._bulk_create(Theme, [Theme(name=name) for name in missing_theme_names]) + themes_by_name.update( + { + theme.name: theme + for theme in Theme.objects.filter(name__in=missing_theme_names) + } + ) + return [themes_by_name[name] for name in theme_names], themes_by_name + + @classmethod + def _upsert_sub_themes( + cls, + *, + theme_names: list[str], + sub_theme_rows: list[tuple[str, str]], + themes_by_name: dict[str, Theme], + ) -> tuple[list[SubTheme], dict[tuple[str, str], SubTheme]]: + sub_theme_keys = list(dict.fromkeys(sub_theme_rows)) + existing_sub_themes = SubTheme.objects.select_related("theme").filter( + theme__name__in=theme_names, + name__in={name for name, _ in sub_theme_keys}, + ) + sub_themes_by_key = { + (sub_theme.name, sub_theme.theme.name): sub_theme + for sub_theme in existing_sub_themes + } + missing_sub_theme_keys = [ + (sub_theme_name, theme_name) + for sub_theme_name, theme_name in sub_theme_keys + if (sub_theme_name, theme_name) not in sub_themes_by_key + ] + if missing_sub_theme_keys: + cls._bulk_create( + SubTheme, + [ + SubTheme(name=sub_theme_name, theme=themes_by_name[theme_name]) + for sub_theme_name, theme_name in missing_sub_theme_keys + ], + ) + sub_themes_by_key.update( + { + (sub_theme.name, sub_theme.theme.name): sub_theme + for sub_theme in SubTheme.objects.select_related("theme").filter( + theme__name__in={ + theme_name for _, theme_name in missing_sub_theme_keys + }, + name__in={ + sub_theme_name + for sub_theme_name, _ in missing_sub_theme_keys + }, + ) + } + ) + return [sub_themes_by_key[key] for key in sub_theme_keys], sub_themes_by_key + + @classmethod + def _upsert_topics( + cls, + *, + topic_rows: list[tuple[str, str, str]], + sub_themes_by_key: dict[tuple[str, str], SubTheme], + ) -> list[Topic]: + topic_keys = list(dict.fromkeys(topic_rows)) + sub_themes_by_id_key = { + (sub_theme_name, theme_name): sub_themes_by_key[ + (sub_theme_name, theme_name) + ] + for _, sub_theme_name, theme_name in topic_keys + } + candidate_sub_theme_ids = [ + sub_theme.id for sub_theme in sub_themes_by_id_key.values() + ] + existing_topics = Topic.objects.filter( + sub_theme_id__in=candidate_sub_theme_ids, + name__in={topic_name for topic_name, _, _ in topic_keys}, + ) + topics_by_key = { + (topic.name, topic.sub_theme_id): topic for topic in existing_topics + } + missing_topic_keys = [ + topic_key + for topic_key in topic_keys + if ( + topic_key[0], + sub_themes_by_id_key[(topic_key[1], topic_key[2])].id, + ) + not in topics_by_key + ] + if missing_topic_keys: + cls._bulk_create( + Topic, + [ + Topic( + name=topic_name, + sub_theme=sub_themes_by_id_key[(sub_theme_name, theme_name)], + ) + for topic_name, sub_theme_name, theme_name in missing_topic_keys + ], + ) + topics_by_key.update( + { + (topic.name, topic.sub_theme_id): topic + for topic in Topic.objects.filter( + sub_theme_id__in=[ + sub_themes_by_id_key[(sub_theme_name, theme_name)].id + for _, sub_theme_name, theme_name in missing_topic_keys + ], + name__in={ + topic_name for topic_name, _, _ in missing_topic_keys + }, + ) + } + ) + return [ + topics_by_key[ + ( + topic_name, + sub_themes_by_id_key[(sub_theme_name, theme_name)].id, + ) + ] + for topic_name, sub_theme_name, theme_name in topic_keys + ] + + @classmethod + def _seed_geographies(cls, *, count: int) -> list[Geography]: + geography_seed_values = cls._build_geography_seed_values(count=count) + geography_type_names = { + record["geography_type"] for record in geography_seed_values + } + geography_type_names = sorted(geography_type_names) + geography_types_by_name = { + geography_type.name: geography_type + for geography_type in GeographyType.objects.filter( + name__in=geography_type_names + ) + } + missing_geography_type_names = [ + name for name in geography_type_names if name not in geography_types_by_name + ] + if missing_geography_type_names: + cls._bulk_create( + GeographyType, + [GeographyType(name=name) for name in missing_geography_type_names], + ) + geography_types_by_name.update( + { + geography_type.name: geography_type + for geography_type in GeographyType.objects.filter( + name__in=missing_geography_type_names + ) + } + ) + geography_types_by_name = { + name: geography_types_by_name[name] for name in geography_type_names + } + geography_keys = list( + dict.fromkeys( + (record["name"], record["geography_type"], record["geography_code"]) + for record in geography_seed_values + ) + ) + existing_geographies = Geography.objects.select_related( + "geography_type" + ).filter( + name__in={name for name, _, _ in geography_keys}, + geography_type__name__in={ + geography_type for _, geography_type, _ in geography_keys + }, + ) + geographies_by_key = { + (geography.name, geography.geography_type.name): geography + for geography in existing_geographies + } + missing_geography_keys = [ + (name, geography_type, geography_code) + for name, geography_type, geography_code in geography_keys + if (name, geography_type) not in geographies_by_key + ] + if missing_geography_keys: + cls._bulk_create( + Geography, + [ + Geography( + name=name, + geography_code=geography_code, + geography_type=geography_types_by_name[geography_type], + ) + for name, geography_type, geography_code in missing_geography_keys + ], + ) + geographies_by_key.update( + { + (geography.name, geography.geography_type.name): geography + for geography in Geography.objects.select_related( + "geography_type" + ).filter( + name__in={name for name, _, _ in missing_geography_keys}, + geography_type__name__in={ + geography_type + for _, geography_type, _ in missing_geography_keys + }, + ) + } + ) + + return [ + geographies_by_key[(name, geography_type)] + for name, geography_type, _ in geography_keys + ] + + @classmethod + def _build_time_series_rows_for_metric( + cls, + *, + metric: Metric, + geographies: list[Geography], + stratum: Stratum, + age: Age, + days: int, + is_public: bool, + start_date: date, + frequency: str, + ) -> Iterable[tuple[CoreTimeSeries, APITimeSeries]]: + topic = metric.topic + sub_theme = topic.sub_theme + theme = sub_theme.theme + + for geography in geographies: + for day_offset in range(days): + current_date = start_date + timedelta(days=day_offset) + base_value = random.uniform(5.0, 250.0) # noqa: S311 # nosec B311 metric_value = round( - random.uniform(5.0, 250.0), # noqa: B311 + base_value + + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 2, ) + sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 + epidemiological_week = current_date.isocalendar().week + + yield ( + CoreTimeSeries( + metric=metric, + metric_frequency=frequency, + geography=geography, + stratum=stratum, + age=age, + sex=sex, + year=current_date.year, + month=current_date.month, + epiweek=epidemiological_week, + date=current_date, + metric_value=Decimal(str(metric_value)), + is_public=is_public, + ), + APITimeSeries( + metric_frequency=frequency, + age=age.name, + month=current_date.month, + geography_code=geography.geography_code, + metric_group=None, + theme=theme.name, + sub_theme=sub_theme.name, + topic=topic.name, + geography_type=geography.geography_type.name, + geography=geography.name, + metric=metric.name, + stratum=stratum.name, + sex=sex, + year=current_date.year, + epiweek=epidemiological_week, + date=current_date, + metric_value=float(metric_value), + is_public=is_public, + ), + ) + + @classmethod + def _build_timeseries_ingestion_payloads( + cls, + *, + scale_config: dict[str, int], + is_public: bool, + ) -> list[dict[str, object]]: + _, _, topic_rows = cls._build_theme_hierarchy_records() + geographies = cls._build_geography_seed_values( + count=scale_config["geographies"] + ) + refresh_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + start_date = date.today() - timedelta(days=scale_config["days"] - 1) + payloads: list[dict[str, object]] = [] - time_series_rows.append( + for metric_index in range(scale_config["metrics"]): + topic_name, sub_theme_name, theme_name = topic_rows[ + metric_index % len(topic_rows) + ] + metric_name = f"{topic_name}_cases_randomByDay_{metric_index + 1}" + for geography in geographies: + time_series_rows: list[dict[str, object]] = [] + for day_offset in range(scale_config["days"]): + current_date = start_date + timedelta(days=day_offset) + metric_value = round( + random.uniform(5.0, 250.0), # noqa: S311 # nosec B311 + 2, + ) + time_series_rows.append( + { + "epiweek": current_date.isocalendar().week, + "date": current_date.isoformat(), + "metric_value": metric_value, + "embargo": None, + "is_public": is_public, + } + ) + + payloads.append( { - "epiweek": current_date.isocalendar().week, - "date": current_date.isoformat(), - "metric_value": metric_value, - "embargo": None, - "is_public": is_public, + "parent_theme": theme_name, + "child_theme": sub_theme_name, + "topic": topic_name, + "metric_group": "cases", + "metric": metric_name, + "metric_frequency": TimePeriod.Weekly.value, + "geography_type": geography["geography_type"], + "geography": geography["name"], + "geography_code": geography["geography_code"], + "age": "all", + "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # noqa: S311 # nosec B311 + "stratum": "default", + "refresh_date": refresh_date, + "time_series": time_series_rows, } ) - sex_value = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 + return payloads - payloads.append( - { - "parent_theme": theme_name, - "child_theme": sub_theme_name, - "topic": topic_name, - "metric_group": "cases", - "metric": metric_name, - "metric_frequency": TimePeriod.Weekly.value, - "geography_type": geography["geography_type"], - "geography": geography["name"], - "geography_code": geography["geography_code"], - "age": "all", - "sex": sex_value, - "stratum": "default", - "refresh_date": refresh_date, - "time_series": time_series_rows, - } + @classmethod + def _build_s3_object_key( + cls, + *, + payload: dict[str, object], + payload_index: int, + ) -> str: + topic_name = str(payload["topic"]) + metric_name = str(payload["metric"]) + geography_code = str(payload["geography_code"]) + age = str(payload["age"]) + sex = str(payload["sex"]) + stratum = str(payload["stratum"]) + safe_topic = cls._normalise_key(topic_name) + safe_metric = cls._normalise_key(metric_name) + return f"in/{safe_topic}_cases_{safe_metric}_{geography_code}_{age}_{sex}_{stratum}_{payload_index}.json" + + @staticmethod + def _bulk_create(model: type[TModel], records: Iterable[TModel]) -> list[TModel]: + """Materialise and bulk insert a sequence of model instances.""" + return model.objects.bulk_create(list(records)) + + @staticmethod + def _get_next_random_metric_index() -> int: + max_metric_index = 0 + for metric_name in Metric.objects.filter( + name__startswith="Random Metric " + ).values_list( + "name", + flat=True, + ): + match = re.fullmatch(r"Random Metric (\d+)", metric_name) + if match is None: + continue + max_metric_index = max(max_metric_index, int(match.group(1))) + return max_metric_index + 1 + + @classmethod + def _build_theme_hierarchy_records( + cls, + ) -> tuple[list[str], list[tuple[str, str]], list[tuple[str, str, str]]]: + child_to_parent: dict[str, str] = {} + normalised_to_child: dict[str, str] = {} + parent_by_name = validation_enums.ParentTheme.__members__ + + for child_theme_group in validation_enums.ChildTheme: + resolved_parent = ( + parent_by_name[child_theme_group.name].value + if child_theme_group.name in parent_by_name + else validation_enums.ParentTheme.INFECTIOUS_DISEASE.value ) + for sub_theme_name in child_theme_group.return_list(): + child_to_parent[sub_theme_name] = resolved_parent + normalised_to_child[cls._normalise_key(sub_theme_name)] = sub_theme_name + + topic_rows: list[tuple[str, str, str]] = [] + sub_theme_pairs: set[tuple[str, str]] = set() + for topic_group in validation_enums.Topic: + normalised_topic_group = cls._normalise_key(topic_group.name) + sub_theme_name = normalised_to_child.get(normalised_topic_group) + if sub_theme_name is None: + continue + + parent_theme_name = child_to_parent[sub_theme_name] + sub_theme_pairs.add((sub_theme_name, parent_theme_name)) + topic_rows.extend( + (topic_value, sub_theme_name, parent_theme_name) + for topic_value in topic_group.return_list() + ) + + theme_names = sorted({parent_name for _, parent_name in sub_theme_pairs}) + sub_theme_rows = sorted( + sub_theme_pairs, + key=itemgetter(1, 0), + ) + return theme_names, sub_theme_rows, topic_rows + + @classmethod + def _build_geography_seed_values(cls, *, count: int) -> list[dict[str, str]]: + geographies: list[dict[str, str]] = [ + { + "name": "United Kingdom", + "geography_code": UNITED_KINGDOM_GEOGRAPHY_CODE, + "geography_type": (validation_enums.GeographyType.UNITED_KINGDOM.value), + } + ] + + geographies.extend( + { + "name": name, + "geography_code": code, + "geography_type": validation_enums.GeographyType.NATION.value, + } + for name, code in NATION_GEOGRAPHY_CODES.items() + ) + + if len(geographies) >= count: + return geographies[:count] + + extra_required = count - len(geographies) + geographies.extend( + { + "name": cls._format_enum_name(ltla.name), + "geography_code": ltla.value, + "geography_type": ( + validation_enums.GeographyType.LOWER_TIER_LOCAL_AUTHORITY.value + ), + } + for ltla in list(validation_enums.LTLAs)[:extra_required] + ) + return geographies[:count] + + @staticmethod + def _normalise_key(value: str) -> str: + return value.lower().replace("-", "_") + + @staticmethod + def _format_enum_name(value: str) -> str: + return value.replace("_", " ").title() - return payloads + def _print_summary( + self, + *, + dataset: str, + scale: str, + seed: int, + counts: dict[str, int], + runtime_seconds: float, + ) -> None: + self.stdout.write("") + self.stdout.write("Seed random summary:") + self.stdout.write(f" dataset: {dataset}") + self.stdout.write(f" scale: {scale}") + self.stdout.write(f" seed used: {seed}") + self.stdout.write(f" Theme: {counts['Theme']}") + self.stdout.write(f" SubTheme: {counts['SubTheme']}") + self.stdout.write(f" Topic: {counts['Topic']}") + self.stdout.write(f" Metric: {counts['Metric']}") + self.stdout.write(f" Geography: {counts['Geography']}") + self.stdout.write(f" CoreTimeSeries: {counts['CoreTimeSeries']}") + self.stdout.write(f" APITimeSeries: {counts['APITimeSeries']}") + self.stdout.write(f" runtime seconds: {runtime_seconds:.2f}") \ No newline at end of file From 3a7013596dfbe53edd4c1f32ed72a14d2ac8c459 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 13:56:32 +0000 Subject: [PATCH 26/36] Apply formatting changes from CI --- metrics/interfaces/management/commands/seed_random.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 44ba5f2eb..902f7a2b1 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -745,7 +745,9 @@ def _build_timeseries_ingestion_payloads( "geography": geography["name"], "geography_code": geography["geography_code"], "age": "all", - "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # noqa: S311 # nosec B311 + "sex": random.choice( + SEED_RANDOM_SEX_OPTIONS + ), # noqa: S311 # nosec B311 "stratum": "default", "refresh_date": refresh_date, "time_series": time_series_rows, @@ -895,4 +897,4 @@ def _print_summary( self.stdout.write(f" Geography: {counts['Geography']}") self.stdout.write(f" CoreTimeSeries: {counts['CoreTimeSeries']}") self.stdout.write(f" APITimeSeries: {counts['APITimeSeries']}") - self.stdout.write(f" runtime seconds: {runtime_seconds:.2f}") \ No newline at end of file + self.stdout.write(f" runtime seconds: {runtime_seconds:.2f}") From a31766027016fe17b6afa854fa483f41f2f8fc2f Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 13:59:51 +0000 Subject: [PATCH 27/36] resolve s311 lint for random.choice inline --- metrics/interfaces/management/commands/seed_random.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 902f7a2b1..6543a6130 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -745,9 +745,7 @@ def _build_timeseries_ingestion_payloads( "geography": geography["name"], "geography_code": geography["geography_code"], "age": "all", - "sex": random.choice( - SEED_RANDOM_SEX_OPTIONS - ), # noqa: S311 # nosec B311 + "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # noqa: S311 "stratum": "default", "refresh_date": refresh_date, "time_series": time_series_rows, From 5d68451f33fdbf4ab256c157228682df0a5fca35 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 14:05:59 +0000 Subject: [PATCH 28/36] suppress bandit B311 using nosec --- metrics/interfaces/management/commands/seed_random.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 6543a6130..ce023c8c3 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -649,13 +649,13 @@ def _build_time_series_rows_for_metric( for geography in geographies: for day_offset in range(days): current_date = start_date + timedelta(days=day_offset) - base_value = random.uniform(5.0, 250.0) # noqa: S311 # nosec B311 + base_value = random.uniform(5.0, 250.0) # nosec metric_value = round( base_value - + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 + + random.uniform(-10.0, 10.0), # nosec 2, ) - sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 + sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # nosec epidemiological_week = current_date.isocalendar().week yield ( @@ -720,7 +720,7 @@ def _build_timeseries_ingestion_payloads( for day_offset in range(scale_config["days"]): current_date = start_date + timedelta(days=day_offset) metric_value = round( - random.uniform(5.0, 250.0), # noqa: S311 # nosec B311 + random.uniform(5.0, 250.0), # nosec 2, ) time_series_rows.append( @@ -745,7 +745,7 @@ def _build_timeseries_ingestion_payloads( "geography": geography["name"], "geography_code": geography["geography_code"], "age": "all", - "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # noqa: S311 + "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # nosec "stratum": "default", "refresh_date": refresh_date, "time_series": time_series_rows, From 88e9bd9b524eca051f708dd335b71f2a96b9d5b0 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 14:10:07 +0000 Subject: [PATCH 29/36] satisfy both ruff and bandit for random usage --- .../interfaces/management/commands/seed_random.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index ce023c8c3..4447bdd05 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -95,7 +95,7 @@ def handle(self, *args, **options) -> None: selected_seed = ( options["seed"] if options["seed"] is not None else int(time.time()) ) - random.seed(selected_seed) # nosec B311 + random.seed(selected_seed) # noqa: S311 # nosec self.stdout.write(f"Seed used: {selected_seed}") should_seed_cms = dataset in {"cms", "both"} @@ -649,13 +649,13 @@ def _build_time_series_rows_for_metric( for geography in geographies: for day_offset in range(days): current_date = start_date + timedelta(days=day_offset) - base_value = random.uniform(5.0, 250.0) # nosec + base_value = random.uniform(5.0, 250.0) # noqa: S311 # nosec metric_value = round( base_value - + random.uniform(-10.0, 10.0), # nosec + + random.uniform(-10.0, 10.0), # noqa: S311 # nosec 2, ) - sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # nosec + sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec epidemiological_week = current_date.isocalendar().week yield ( @@ -720,7 +720,7 @@ def _build_timeseries_ingestion_payloads( for day_offset in range(scale_config["days"]): current_date = start_date + timedelta(days=day_offset) metric_value = round( - random.uniform(5.0, 250.0), # nosec + random.uniform(5.0, 250.0), # noqa: S311 # nosec 2, ) time_series_rows.append( @@ -745,7 +745,7 @@ def _build_timeseries_ingestion_payloads( "geography": geography["name"], "geography_code": geography["geography_code"], "age": "all", - "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # nosec + "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # noqa: S311 # nosec "stratum": "default", "refresh_date": refresh_date, "time_series": time_series_rows, From 9de1ebd63e458937f255f9cb5222de66b9577298 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 14:20:30 +0000 Subject: [PATCH 30/36] resolve s311 lint issues properly --- .../interfaces/management/commands/seed_random.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 4447bdd05..664311b8d 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -95,7 +95,7 @@ def handle(self, *args, **options) -> None: selected_seed = ( options["seed"] if options["seed"] is not None else int(time.time()) ) - random.seed(selected_seed) # noqa: S311 # nosec + random.seed(selected_seed) # noqa: S311 # nosec B311 self.stdout.write(f"Seed used: {selected_seed}") should_seed_cms = dataset in {"cms", "both"} @@ -649,13 +649,13 @@ def _build_time_series_rows_for_metric( for geography in geographies: for day_offset in range(days): current_date = start_date + timedelta(days=day_offset) - base_value = random.uniform(5.0, 250.0) # noqa: S311 # nosec + base_value = random.uniform(5.0, 250.0) # noqa: S311 # nosec B311 metric_value = round( base_value - + random.uniform(-10.0, 10.0), # noqa: S311 # nosec + + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 2, ) - sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec + sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 epidemiological_week = current_date.isocalendar().week yield ( @@ -720,7 +720,7 @@ def _build_timeseries_ingestion_payloads( for day_offset in range(scale_config["days"]): current_date = start_date + timedelta(days=day_offset) metric_value = round( - random.uniform(5.0, 250.0), # noqa: S311 # nosec + random.uniform(5.0, 250.0), # noqa: S311 # nosec B311 2, ) time_series_rows.append( @@ -745,7 +745,9 @@ def _build_timeseries_ingestion_payloads( "geography": geography["name"], "geography_code": geography["geography_code"], "age": "all", - "sex": random.choice(SEED_RANDOM_SEX_OPTIONS), # noqa: S311 # nosec + "sex": random.choice( + SEED_RANDOM_SEX_OPTIONS + ), # noqa: S311 # nosec B311 "stratum": "default", "refresh_date": refresh_date, "time_series": time_series_rows, From 339c1b30c1d941aa66b5b1c8291e08c71f9b5ae5 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 14:35:05 +0000 Subject: [PATCH 31/36] Move random.choice to variable for lin stability --- metrics/interfaces/management/commands/seed_random.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index 664311b8d..d37132c06 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -733,6 +733,9 @@ def _build_timeseries_ingestion_payloads( } ) + sex_value = random.choice( + SEED_RANDOM_SEX_OPTIONS + ) # noqa: S311 # nosec B311 payloads.append( { "parent_theme": theme_name, @@ -745,9 +748,7 @@ def _build_timeseries_ingestion_payloads( "geography": geography["name"], "geography_code": geography["geography_code"], "age": "all", - "sex": random.choice( - SEED_RANDOM_SEX_OPTIONS - ), # noqa: S311 # nosec B311 + "sex": sex_value, "stratum": "default", "refresh_date": refresh_date, "time_series": time_series_rows, From f11303f98f9c7bf3dd919f93435a4829a309d762 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Fri, 27 Mar 2026 14:41:40 +0000 Subject: [PATCH 32/36] Single line to satisfy ruff --- metrics/interfaces/management/commands/seed_random.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index d37132c06..eb904911c 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -733,9 +733,7 @@ def _build_timeseries_ingestion_payloads( } ) - sex_value = random.choice( - SEED_RANDOM_SEX_OPTIONS - ) # noqa: S311 # nosec B311 + sex_value = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 payloads.append( { "parent_theme": theme_name, From 02bdc9feec9305ca405a242c4d42d9b91f347d36 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Mon, 30 Mar 2026 13:06:58 +0100 Subject: [PATCH 33/36] Rudd and black fixed --- metrics/interfaces/management/commands/seed_random.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index eb904911c..fe537d84d 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -733,7 +733,9 @@ def _build_timeseries_ingestion_payloads( } ) - sex_value = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 + sex_value = random.choice( # noqa: S311 # nosec B311 + SEED_RANDOM_SEX_OPTIONS + ) payloads.append( { "parent_theme": theme_name, From 87952d2b7bee0d6728cfeb6f159ccb06b187c1bf Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Mon, 30 Mar 2026 13:12:28 +0100 Subject: [PATCH 34/36] Ttest pipeline run --- metrics/interfaces/management/commands/seed_random.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index fe537d84d..eb0ae645c 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -655,7 +655,7 @@ def _build_time_series_rows_for_metric( + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 2, ) - sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 + sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 epidemiological_week = current_date.isocalendar().week yield ( From dbdef3359b00d88cf6948847b5e0133646b5795e Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Mon, 30 Mar 2026 13:18:55 +0100 Subject: [PATCH 35/36] Test pipeline run --- metrics/interfaces/management/commands/seed_random.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/interfaces/management/commands/seed_random.py b/metrics/interfaces/management/commands/seed_random.py index eb0ae645c..fe537d84d 100644 --- a/metrics/interfaces/management/commands/seed_random.py +++ b/metrics/interfaces/management/commands/seed_random.py @@ -655,7 +655,7 @@ def _build_time_series_rows_for_metric( + random.uniform(-10.0, 10.0), # noqa: S311 # nosec B311 2, ) - sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 + sex = random.choice(SEED_RANDOM_SEX_OPTIONS) # noqa: S311 # nosec B311 epidemiological_week = current_date.isocalendar().week yield ( From 34dff22036a8ef4ec70bd42f8dda6a1e7906dce3 Mon Sep 17 00:00:00 2001 From: abdihakim92x1 Date: Wed, 1 Apr 2026 16:40:35 +0100 Subject: [PATCH 36/36] fixing system test issue --- tests/system/test_seed_random.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/system/test_seed_random.py b/tests/system/test_seed_random.py index 1c5a22423..1bac11bcd 100644 --- a/tests/system/test_seed_random.py +++ b/tests/system/test_seed_random.py @@ -55,7 +55,7 @@ def test_command_seeds_metrics_dataset_and_data_is_queryable_via_api(self): f"topics/{quote(sample_row.topic, safe='')}/" f"geography_types/{quote(sample_row.geography_type, safe='')}/" f"geographies/{quote(sample_row.geography, safe='')}/" - "metrics/" + "metrics" ) response = api_client.get( path=path, @@ -64,5 +64,6 @@ def test_command_seeds_metrics_dataset_and_data_is_queryable_via_api(self): ) assert response.status_code == HTTP_OK - assert "metrics" in response.data - assert sample_row.metric in response.data["metrics"] + assert isinstance(response.data, list) + metric_names = [item["name"] for item in response.data] + assert sample_row.metric in metric_names