diff --git a/.env.example b/.env.example
index 48ec26f..14b2ec2 100644
--- a/.env.example
+++ b/.env.example
@@ -1,5 +1,5 @@
-PROJECT_ID=ons-sdx-bob
-ENV=development
-DATASET_BUCKET_NAME="ons-sdx-bob-datasets"
+PROJECT_ID=ons-sds-sandbox
+PROFILE=dev
+DATASET_BUCKET_NAME="ons-sds-sandbox-datasets"
 FIRESTORE_DATABASE="test"
 PUBLISH_DATASET_TOPIC_ID="projects/ons-sds-sandbox/topics/publish-dataset"
diff --git a/Makefile b/Makefile
index 093f299..bb9e5b6 100644
--- a/Makefile
+++ b/Makefile
@@ -2,11 +2,6 @@ SHELL := bash
 .ONESHELL:
 
 
-PHONY: install
-install: ## Install dependencies
-	uv sync
-
-
 .PHONY: lint
 lint:
 	@echo "Running Ruff linter..."
@@ -25,10 +20,9 @@ test:
 	uv run --dev pytest -v --disable-warnings tests/
 
 
-.PHONY: test-parallel
-test-parallel:
-	@echo "Running Local Tests..."
-	uv run --dev pytest -n auto -v --disable-warnings tests/
+.PHONY: install
+install: ## Install dependencies
+	uv sync
 
 
 .PHONY: dev
diff --git a/README.md b/README.md
index 1824386..1561d0d 100644
--- a/README.md
+++ b/README.md
@@ -59,4 +59,53 @@ make test
 docker build -t sds-loader .
 ```
 
+## Profiles
+
+sds-loader uses "profiles" to determine the concrete implementation of the abstracted services it uses.
+
+For example when running locally, you may just want to use fake repositories and test the business logic of the application, whereas in production you will want to use a Firestore database, GCP etc.
+
+Profiles are determined by the `PROFILE` environment variable. This will default to `prod` if not set. The following profiles are available...
+
+- `prod`: This profile will use the real implementations of all services. This is the default profile.
+- `dev`: This profile will use fake repositories and services that do not connect to any real services
+- `local_storage_firestore` This will use fake repositories for all services except the `DatasetStorageRepositoryInterface` which will use Firestore. To set up a local Firestore read the instructions below...
+
+## Firestore emulator
+
+In order to use Firestore locally, you will need to set up the Firestore emulator. You can do this using Docker. Run the following command to start the Firestore emulator:
+
+You will need to set the envronment variable FIRESTORE_EMULATOR_HOST to instruct the application to connect to the emulator instead of the real Firestore service...
+
+```bash
+export FIRESTORE_EMULATOR_HOST=localhost:8080
+```
+
+Ensure the profile for the application is set to `local_storage_firestore` to use the Firestore emulator...
+
+```bash
+export PROFILE=local_storage_firestore
+
+# Or add it to the .env file
+
+PROFILE=local_storage_firestore
+```
+
+Then run the following command to start the Firestore emulator in Docker. Note it takes a few seconds to start up, so you may want to run this command before starting the application...
+
+```
+docker run \
+		--rm \
+		-p=9000:9000 \
+		-p=8080:8080 \
+		-p=4000:4000 \
+		-p=9099:9099 \
+		-p=8085:8085 \
+		-p=5001:5001 \
+		-p=9199:9199 \
+		--env "GCP_PROJECT=${PROJECT_ID}" \
+		--env "ENABLE_UI=true" \
+		spine3/firebase-emulator &
+```
+
 
diff --git a/app/broadcasters/fake_broadcaster.py b/app/broadcasters/fake_broadcaster.py
index 6788179..55110c6 100644
--- a/app/broadcasters/fake_broadcaster.py
+++ b/app/broadcasters/fake_broadcaster.py
@@ -6,6 +6,7 @@ class FakeBroadcaster(DatasetBroadcastInterface):
     """
     A fake broadcaster that doesn't actually broadcast the data
     """
+
     def __init__(self):
         self.broadcasted = []
 
diff --git a/app/broadcasters/pubsub_broadcaster.py b/app/broadcasters/pubsub_broadcaster.py
index 7f3f583..61068fa 100644
--- a/app/broadcasters/pubsub_broadcaster.py
+++ b/app/broadcasters/pubsub_broadcaster.py
@@ -16,6 +16,7 @@ class PubsubBroadcaster(DatasetBroadcastInterface):
     A broadcaster that will
     broadcast to pubsub
     """
+
     def __init__(
         self,
         settings: PubsubBroadcastSettings,
@@ -24,9 +25,4 @@ def __init__(
         self.pubsub_client = PubsubService()
 
     def broadcast(self, dataset_metadata: DatasetMetadata) -> None:
-
-        self.pubsub_client.publish_message(
-            self.settings.publish_dataset_topic_id,
-            json.dumps(dataset_metadata),
-            {}
-        )
+        self.pubsub_client.publish_message(self.settings.publish_dataset_topic_id, json.dumps(dataset_metadata), {})
diff --git a/app/dependencies.py b/app/dependencies.py
index 7ca6bf7..2b37da6 100644
--- a/app/dependencies.py
+++ b/app/dependencies.py
@@ -1,26 +1,13 @@
-from lagom import Singleton, dependency_definition
+from lagom import Singleton
 from lagom.container import Container
-from sds_common.publishers.gcs_schema_publisher import GcsSchemaPublisher
-from sds_common.publishers.github_schema_publisher import GithubSchemaPublisher
-from sdx_base.services.storage import StorageService
-
-from app.broadcasters.fake_broadcaster import FakeBroadcaster
-from app.broadcasters.pubsub_broadcaster import PubsubBroadcaster
-from app.interfaces.dataset_broadcast_interface import DatasetBroadcastInterface
-from app.interfaces.dataset_deletion_repository_interface import DatasetDeletionRepositoryInterface
-from app.interfaces.dataset_source_repository_interface import DatasetSourceRepositoryInterface
-from app.interfaces.dataset_storage_repository_interface import DatasetStorageRepositoryInterface
-from app.repositories.dataset_deletion.fake_dataset_deletion_repository import FakeDatasetDeletionRepository
-from app.repositories.dataset_deletion.firestore_dataset_deletion_repository import FirestoreDatasetDeletionRepository
-from app.repositories.dataset_source.bucket_dataset_source_repository import BucketDatasetSourceRepository
-from app.repositories.dataset_source.fake_dataset_source_repository import FakeDatasetSourceRepository
-from app.repositories.dataset_storage.fake_dataset_storage_repository import FakeDatasetStorageRepository
-from app.repositories.dataset_storage.firestore_dataset_storage_repository import FirestoreDatasetStorageRepository
-from app.services.dataset_service import DatasetService, DatasetSettings
-from app.services.schema_service import SchemaService
 
+from app import get_logger
+from app.profiles import PROFILES
+from app.services.dataset_service import DatasetService, DatasetSettings
 from app.settings import Settings, get_instance, QuickSettings
 
+logger = get_logger()
+
 
 class FakePublisher:
     def __init__(self, name: str):
@@ -41,101 +28,30 @@ def build_container() -> Container:
     # Create the DI container
     container = Container()
 
-    # Determine environment
-    is_prod = QuickSettings().is_production()
-
     # -----------------------------
     # Core / shared dependencies
     # -----------------------------
     container[Settings] = lambda: get_instance()
 
     # -----------------------------
-    # DatasetSourceRepositoryInterface
-    # -----------------------------
-
-    if is_prod:
-
-        @dependency_definition(container)
-        def build_bucket_dataset_source_repository() -> BucketDatasetSourceRepository:
-            return BucketDatasetSourceRepository(
-                bucket_reader=StorageService(),
-                settings=container[Settings]
-            )
-
-        container[DatasetSourceRepositoryInterface] = BucketDatasetSourceRepository
-
-    else:
-        container[DatasetSourceRepositoryInterface] = FakeDatasetSourceRepository
-
-    # -----------------------------
-    # DatasetStorageRepositoryInterface
+    # Apply profile
     # -----------------------------
+    profile = QuickSettings().get_profile()
 
-    if is_prod:
+    try:
+        profile_fn = PROFILES[profile]
+    except KeyError:
+        raise ValueError(f"Unknown profile '{profile}'. Available: {list(PROFILES.keys())}")
+    logger.info(f"Using profile {profile}")
 
-        @dependency_definition(container)
-        def build_firestore_dataset_storage_repository() -> FirestoreDatasetStorageRepository:
-            return FirestoreDatasetStorageRepository(
-                settings=container[Settings]
-            )
-
-        container[DatasetStorageRepositoryInterface] = FirestoreDatasetStorageRepository
-    else:
-        container[DatasetStorageRepositoryInterface] = FakeDatasetStorageRepository
+    # Apply profile
+    profile_fn(container)
 
     # -----------------------------
-    # DatasetDeletionRepositoryInterface
+    # Static Services
     # -----------------------------
 
-    if is_prod:
-
-        @dependency_definition(container)
-        def build_firestore_dataset_deletion_repository() -> FirestoreDatasetDeletionRepository:
-            return FirestoreDatasetDeletionRepository(
-                settings=container[Settings]
-            )
-
-        container[DatasetDeletionRepositoryInterface] = FirestoreDatasetDeletionRepository
-    else:
-        container[DatasetDeletionRepositoryInterface] = FakeDatasetDeletionRepository
-
-    # -----------------------------
-    # DatasetBroadcastInterface
-    # -----------------------------
-
-    if is_prod:
-
-        @dependency_definition(container)
-        def build_pubsub_broadcaster() -> PubsubBroadcaster:
-            return PubsubBroadcaster(
-                settings=container[Settings]
-            )
-
-        container[DatasetBroadcastInterface] = PubsubBroadcaster
-    else:
-        container[DatasetBroadcastInterface] = FakeBroadcaster
-
-    # Settings
     container[DatasetSettings] = lambda: get_instance()
-
-    # -----------------------------
-    # Services
-    # -----------------------------
-
-    # Schema Service
-
-    if is_prod:
-        container[SchemaService] = SchemaService(
-            bucket_publisher=GcsSchemaPublisher,
-            repository_publisher=GithubSchemaPublisher,
-        )
-    else:
-        container[SchemaService] = SchemaService(
-            bucket_publisher=FakePublisher(name="Fake bucket publisher"),
-            repository_publisher=FakePublisher(name="Fake github publisher"),
-        )
-
-    # Dataset service
     container[DatasetService] = Singleton(DatasetService)
 
     return container
diff --git a/app/exceptions/__init__.py b/app/exceptions/__init__.py
index 938d408..908cc4c 100644
--- a/app/exceptions/__init__.py
+++ b/app/exceptions/__init__.py
@@ -1,6 +1,4 @@
-
-class NonCriticalException(Exception):
-    ...
+class NonCriticalException(Exception): ...
 
 
 class DatasetException(Exception):
diff --git a/app/exceptions/dataset_deletion_empty_exception.py b/app/exceptions/dataset_deletion_empty_exception.py
index 359e693..20088fa 100644
--- a/app/exceptions/dataset_deletion_empty_exception.py
+++ b/app/exceptions/dataset_deletion_empty_exception.py
@@ -1,5 +1,4 @@
 from app.exceptions import NonCriticalException
 
 
-class DatasetDeletionEmptyException(NonCriticalException):
-    ...
+class DatasetDeletionEmptyException(NonCriticalException): ...
diff --git a/app/exceptions/dataset_deletion_exception.py b/app/exceptions/dataset_deletion_exception.py
index 086b2b9..75285f0 100644
--- a/app/exceptions/dataset_deletion_exception.py
+++ b/app/exceptions/dataset_deletion_exception.py
@@ -1,5 +1,4 @@
 from app.exceptions import DatasetException
 
 
-class DatasetDeletionException(DatasetException):
-    ...
+class DatasetDeletionException(DatasetException): ...
diff --git a/app/exceptions/dataset_deletion_mark_exception.py b/app/exceptions/dataset_deletion_mark_exception.py
index d3c1a66..78aab29 100644
--- a/app/exceptions/dataset_deletion_mark_exception.py
+++ b/app/exceptions/dataset_deletion_mark_exception.py
@@ -1,5 +1,4 @@
 from app.exceptions import DatasetException
 
 
-class DatasetDeletionMarkException(DatasetException):
-    ...
+class DatasetDeletionMarkException(DatasetException): ...
diff --git a/app/exceptions/dataset_invalid_filename_exception.py b/app/exceptions/dataset_invalid_filename_exception.py
index f612bb9..a0e959f 100644
--- a/app/exceptions/dataset_invalid_filename_exception.py
+++ b/app/exceptions/dataset_invalid_filename_exception.py
@@ -1,5 +1,4 @@
 from app.exceptions import DatasetException
 
 
-class DatasetInvalidFilenameException(DatasetException):
-    ...
+class DatasetInvalidFilenameException(DatasetException): ...
diff --git a/app/exceptions/dataset_metadata_retrival_exception.py b/app/exceptions/dataset_metadata_retrival_exception.py
index b42daf0..3be4b09 100644
--- a/app/exceptions/dataset_metadata_retrival_exception.py
+++ b/app/exceptions/dataset_metadata_retrival_exception.py
@@ -1,5 +1,4 @@
 from app.exceptions import DatasetException
 
 
-class DatasetMetadataRetrivalException(DatasetException):
-    ...
+class DatasetMetadataRetrivalException(DatasetException): ...
diff --git a/app/exceptions/dataset_not_found_exception.py b/app/exceptions/dataset_not_found_exception.py
index f7a6e68..96c5753 100644
--- a/app/exceptions/dataset_not_found_exception.py
+++ b/app/exceptions/dataset_not_found_exception.py
@@ -1,5 +1,4 @@
 from app.exceptions import DatasetException
 
 
-class DatasetNotFoundException(DatasetException):
-    ...
+class DatasetNotFoundException(DatasetException): ...
diff --git a/app/exceptions/dataset_source_empty_exception.py b/app/exceptions/dataset_source_empty_exception.py
index 8c6aefb..b7bb7c2 100644
--- a/app/exceptions/dataset_source_empty_exception.py
+++ b/app/exceptions/dataset_source_empty_exception.py
@@ -1,5 +1,4 @@
 from app.exceptions import NonCriticalException
 
 
-class DatasetSourceEmptyException(NonCriticalException):
-    ...
+class DatasetSourceEmptyException(NonCriticalException): ...
diff --git a/app/exceptions/dataset_storing_exception.py b/app/exceptions/dataset_storing_exception.py
index ee87181..ecbefd9 100644
--- a/app/exceptions/dataset_storing_exception.py
+++ b/app/exceptions/dataset_storing_exception.py
@@ -1,5 +1,4 @@
 from app.exceptions import DatasetException
 
 
-class DatasetStoringException(DatasetException):
-    ...
+class DatasetStoringException(DatasetException): ...
diff --git a/app/exceptions/dataset_validation_exception.py b/app/exceptions/dataset_validation_exception.py
index d876030..60327d7 100644
--- a/app/exceptions/dataset_validation_exception.py
+++ b/app/exceptions/dataset_validation_exception.py
@@ -1,5 +1,4 @@
 from app.exceptions import DatasetException
 
 
-class DatasetValidationException(DatasetException):
-    ...
+class DatasetValidationException(DatasetException): ...
diff --git a/app/exceptions/schema_source_invalid_exception.py b/app/exceptions/schema_source_invalid_exception.py
index 0cc94dd..392331b 100644
--- a/app/exceptions/schema_source_invalid_exception.py
+++ b/app/exceptions/schema_source_invalid_exception.py
@@ -1,6 +1,4 @@
-
 from app.exceptions import SchemaException
 
 
-class SchemaSourceInvalidException(SchemaException):
-    ...
+class SchemaSourceInvalidException(SchemaException): ...
diff --git a/app/interfaces/dataset_storage_repository_interface.py b/app/interfaces/dataset_storage_repository_interface.py
index 5eaeac3..cec14e7 100644
--- a/app/interfaces/dataset_storage_repository_interface.py
+++ b/app/interfaces/dataset_storage_repository_interface.py
@@ -13,11 +13,7 @@ class DatasetStorageRepositoryInterface(ABC):
     """
 
     @abstractmethod
-    def get_latest_dataset_metadata(
-        self,
-        survey_id: str,
-        period_id: str
-    ) -> DatasetMetadataWithoutId | None:
+    def get_latest_dataset_metadata(self, survey_id: str, period_id: str) -> DatasetMetadataWithoutId | None:
         """
         Gets the latest dataset for a given survey and period id
 
@@ -49,12 +45,7 @@ def store_dataset(
         ...
 
     @abstractmethod
-    def delete_dataset_version(
-        self,
-        survey_id: str,
-        period_id: str,
-        version: int
-    ):
+    def delete_dataset_version(self, survey_id: str, period_id: str, version: int):
         """
         Delete a specific version of the dataset from the repository
 
diff --git a/app/middleware/timing.py b/app/middleware/timing.py
index 4f29444..3650ce8 100644
--- a/app/middleware/timing.py
+++ b/app/middleware/timing.py
@@ -27,14 +27,8 @@ async def dispatch(self, request: Request, call_next):
 
         response = await call_next(request)
 
-        process_time = round(
-            time.perf_counter() - start_time,
-            4
-        )
-
-        logger.info(
-            f"{request.method} {path} "
-            f"took {process_time}s"
-        )
+        process_time = round(time.perf_counter() - start_time, 4)
+
+        logger.info(f"{request.method} {path} took {process_time}s")
 
         return response
diff --git a/app/models/__init__.py b/app/models/__init__.py
index 68be02c..6773454 100644
--- a/app/models/__init__.py
+++ b/app/models/__init__.py
@@ -2,8 +2,8 @@
 
 
 class StrictBase(BaseModel):
-    model_config = ConfigDict(extra='forbid', use_enum_values=True)
+    model_config = ConfigDict(extra="forbid", use_enum_values=True)
 
 
 class AllowExtraBase(BaseModel):
-    model_config = ConfigDict(extra='allow', use_enum_values=True)
+    model_config = ConfigDict(extra="allow", use_enum_values=True)
diff --git a/app/models/dataset.py b/app/models/dataset.py
index 67f4486..2d50e3a 100644
--- a/app/models/dataset.py
+++ b/app/models/dataset.py
@@ -8,7 +8,8 @@ class RawDatasetDataItem(AllowExtraBase):
     """
     Represents an item in the "data" block of the raw dataset
     """
-    unit_data: list | dict
+
+    unit_data: str
     identifier: str
 
 
@@ -17,6 +18,7 @@ class RawDataset(AllowExtraBase):
     Represents a raw dataset JSON found in the source repository
     (i.e a Bucket)
     """
+
     survey_id: str
     period_id: str
     form_types: list[str]
@@ -29,11 +31,12 @@ class UnitDataset(AllowExtraBase):
     Represents a unit from the datasets "data" block
     along with metadata from the parent dataset
     """
+
     dataset_id: Guid
     survey_id: str
     period_id: str
     form_types: list[str]
-    data: dict | list  # The actual data
+    data: str
 
 
 class DatasetMetadataWithoutId(AllowExtraBase):
@@ -41,6 +44,7 @@ class DatasetMetadataWithoutId(AllowExtraBase):
     Represents metadata about a dataset
     without the dataset_id (i.e. Guid)
     """
+
     survey_id: str
     period_id: str
     form_types: list[str]
@@ -55,6 +59,7 @@ class DatasetMetadata(AllowExtraBase):
     """
     Represents metadata about a dataset
     """
+
     dataset_id: Guid
     survey_id: str
     period_id: str
diff --git a/app/models/guid.py b/app/models/guid.py
index 293cef5..2a02b22 100644
--- a/app/models/guid.py
+++ b/app/models/guid.py
@@ -1,2 +1 @@
-
 Guid = str
diff --git a/app/profiles.py b/app/profiles.py
new file mode 100644
index 0000000..cd48ef4
--- /dev/null
+++ b/app/profiles.py
@@ -0,0 +1,134 @@
+from lagom import Container, dependency_definition
+
+from sds_common.publishers.gcs_schema_publisher import GcsSchemaPublisher
+from sds_common.publishers.github_schema_publisher import GithubSchemaPublisher
+from sdx_base.services.storage import StorageService
+
+from app.broadcasters.fake_broadcaster import FakeBroadcaster
+from app.broadcasters.pubsub_broadcaster import PubsubBroadcaster
+
+from app.interfaces.dataset_broadcast_interface import DatasetBroadcastInterface
+from app.interfaces.dataset_deletion_repository_interface import DatasetDeletionRepositoryInterface
+from app.interfaces.dataset_source_repository_interface import DatasetSourceRepositoryInterface
+from app.interfaces.dataset_storage_repository_interface import DatasetStorageRepositoryInterface
+
+from app.repositories.dataset_deletion.fake_dataset_deletion_repository import FakeDatasetDeletionRepository
+from app.repositories.dataset_deletion.firestore_dataset_deletion_repository import FirestoreDatasetDeletionRepository
+
+from app.repositories.dataset_source.bucket_dataset_source_repository import BucketDatasetSourceRepository
+from app.repositories.dataset_source.fake_dataset_source_repository import FakeDatasetSourceRepository
+
+from app.repositories.dataset_storage.fake_dataset_storage_repository import FakeDatasetStorageRepository
+from app.repositories.dataset_storage.firestore_dataset_storage_repository import FirestoreDatasetStorageRepository
+
+from app.services.schema_service import SchemaService
+from app.settings import Settings
+
+
+class FakePublisher:
+    def __init__(self, name: str):
+        self._name = name
+
+    def publish_schema(self, file_name: str):
+        print(f"Published: {file_name} to {self._name}")
+
+
+# -------------------------
+# DEV PROFILE
+# Everything fake
+# -------------------------
+
+
+def dev(container: Container):
+    container[DatasetSourceRepositoryInterface] = FakeDatasetSourceRepository
+
+    container[DatasetStorageRepositoryInterface] = FakeDatasetStorageRepository
+
+    container[DatasetDeletionRepositoryInterface] = FakeDatasetDeletionRepository
+
+    container[DatasetBroadcastInterface] = FakeBroadcaster
+
+    container[SchemaService] = SchemaService(
+        bucket_publisher=FakePublisher(name="Fake bucket publisher"),
+        repository_publisher=FakePublisher(name="Fake github publisher"),
+    )
+
+
+# -------------------------
+# PRODUCTION PROFILE
+# Everything real
+# -------------------------
+
+
+def production(container: Container):
+    @dependency_definition(container)
+    def build_bucket_dataset_source_repository() -> BucketDatasetSourceRepository:
+        return BucketDatasetSourceRepository(
+            bucket_reader=StorageService(),
+            settings=container[Settings],
+        )
+
+    @dependency_definition(container)
+    def build_firestore_dataset_storage_repository() -> FirestoreDatasetStorageRepository:
+        return FirestoreDatasetStorageRepository(
+            settings=container[Settings],
+        )
+
+    @dependency_definition(container)
+    def build_firestore_dataset_deletion_repository() -> FirestoreDatasetDeletionRepository:
+        return FirestoreDatasetDeletionRepository(
+            settings=container[Settings],
+        )
+
+    @dependency_definition(container)
+    def build_pubsub_broadcaster() -> PubsubBroadcaster:
+        return PubsubBroadcaster(
+            settings=container[Settings],
+        )
+
+    container[DatasetSourceRepositoryInterface] = BucketDatasetSourceRepository
+    container[DatasetStorageRepositoryInterface] = FirestoreDatasetStorageRepository
+    container[DatasetDeletionRepositoryInterface] = FirestoreDatasetDeletionRepository
+    container[DatasetBroadcastInterface] = PubsubBroadcaster
+    container[SchemaService] = SchemaService(
+        bucket_publisher=GcsSchemaPublisher,
+        repository_publisher=GithubSchemaPublisher,
+    )
+
+
+# -------------------------
+# local_storage_firestore
+# Everything fake except FirestoreDatasetStorageRepository
+# -------------------------
+
+
+def local_storage_firestore(container: Container):
+    @dependency_definition(container)
+    def build_firestore_dataset_storage_repository() -> FirestoreDatasetStorageRepository:
+        return FirestoreDatasetStorageRepository(
+            settings=container[Settings],
+        )
+
+    container[DatasetSourceRepositoryInterface] = FakeDatasetSourceRepository
+
+    container[DatasetStorageRepositoryInterface] = FirestoreDatasetStorageRepository
+
+    container[DatasetDeletionRepositoryInterface] = FakeDatasetDeletionRepository
+
+    container[DatasetBroadcastInterface] = FakeBroadcaster
+
+    container[SchemaService] = SchemaService(
+        bucket_publisher=FakePublisher(name="Fake bucket publisher"),
+        repository_publisher=FakePublisher(name="Fake github publisher"),
+    )
+
+
+# -------------------------
+# Profile registry
+# -------------------------
+
+PROFILES = {
+    "prod": production,  # Everything using production implementations
+    "dev": dev,  # Everything using fake implementations
+    "local_storage_firestore": local_storage_firestore,  # Everything fake except FirestoreDatasetStorageRepository
+}
diff --git a/app/repositories/dataset_deletion/fake_dataset_deletion_repository.py b/app/repositories/dataset_deletion/fake_dataset_deletion_repository.py
index cde40e1..9f13584 100644
--- a/app/repositories/dataset_deletion/fake_dataset_deletion_repository.py
+++ b/app/repositories/dataset_deletion/fake_dataset_deletion_repository.py
@@ -9,7 +9,6 @@ class FakeDatasetDeletionRepository(DatasetDeletionRepositoryInterface):
     """
 
     def __init__(self):
-
         # Key = guid, value = status
         self.delete_records = {}
 
diff --git a/app/repositories/dataset_deletion/firestore_dataset_deletion_repository.py b/app/repositories/dataset_deletion/firestore_dataset_deletion_repository.py
index 4da766e..ccc4c66 100644
--- a/app/repositories/dataset_deletion/firestore_dataset_deletion_repository.py
+++ b/app/repositories/dataset_deletion/firestore_dataset_deletion_repository.py
@@ -31,10 +31,7 @@ def __init__(
         self.settings = settings
 
         # Create a firestore client
-        self.client = firestore.Client(
-            project=self.settings.project_id,
-            database=self.settings.firestore_database
-        )
+        self.client = firestore.Client(project=self.settings.project_id, database=self.settings.firestore_database)
 
         # Initialize Firestore collections
         self.mark_deletion_collection = self.client.collection("marked_for_deletion")
@@ -43,28 +40,23 @@ def __init__(
         self.document_references = {}
 
     def mark_record_status(self, guid: Guid, status: DeleteStatus) -> None:
-
         # Look for the document reference in the cache
         doc_id = self.document_references.get(guid)
 
         # If for some reason the guid is not in the cache, query firestore for it
         if not doc_id:
-
             logger.warning(f"Marking record with guid that is not in repository cache: {guid}, looking in firestore...")
 
-            results = (
-                self.mark_deletion_collection
-                .where("dataset_guid", "==", guid)
-                .limit(1)
-                .stream()
-            )
+            results = self.mark_deletion_collection.where("dataset_guid", "==", guid).limit(1).stream()
 
             results_list = list(results)
 
             if len(results_list) > 0:
                 doc_id = results_list[0].id
             else:
-                raise DatasetDeletionMarkException(f"Could not mark record with guid {guid} as a record with this guid could not be found")
+                raise DatasetDeletionMarkException(
+                    f"Could not mark record with guid {guid} as a record with this guid could not be found"
+                )
 
         try:
             # Update the status of this record in firestore
@@ -73,7 +65,6 @@ def mark_record_status(self, guid: Guid, status: DeleteStatus) -> None:
 
             # If the status is deleted, then mark a timestamp
             if status == DeleteStatus.DELETED:
-
                 utc_dt = datetime.now(timezone.utc)  # UTC time
                 dt = utc_dt.astimezone()  # local time
                 timestamp = dt.strftime("%Y-%m-%dT%H:%M:%SZ")
@@ -82,21 +73,14 @@ def mark_record_status(self, guid: Guid, status: DeleteStatus) -> None:
             raise DatasetDeletionMarkException from e
 
     def get_dataset_to_delete(self) -> Guid | None:
-
         # First, try to fetch a PROCESSING dataset
-        processing = (
-            self.mark_deletion_collection
-            .where("status", "==", DeleteStatus.PROCESSING)
-            .limit(1)
-            .stream()
-        )
+        processing = self.mark_deletion_collection.where("status", "==", DeleteStatus.PROCESSING).limit(1).stream()
 
         processing_list = list(processing)
 
         # If a result is found that is "Processing"
 
         if len(processing_list) > 0:
-
             # Pick the first record
             doc = processing_list[0]
 
@@ -104,7 +88,7 @@ def get_dataset_to_delete(self) -> Guid | None:
             dataset = doc.to_dict()
 
             # Extract the fields we need
-            guid = dataset.get('dataset_guid')
+            guid = dataset.get("dataset_guid")
             doc_id = doc.id
 
             # Store in cache
@@ -114,12 +98,7 @@ def get_dataset_to_delete(self) -> Guid | None:
             return guid
 
         # If no "Processing" results found, fetch a PENDING dataset
-        pending = (
-            self.mark_deletion_collection
-            .where("status", "==", DeleteStatus.PENDING)
-            .limit(1)
-            .stream()
-        )
+        pending = self.mark_deletion_collection.where("status", "==", DeleteStatus.PENDING).limit(1).stream()
 
         pending_list = list(pending)
 
@@ -130,7 +109,7 @@ def get_dataset_to_delete(self) -> Guid | None:
             dataset = doc.to_dict()
 
             # Extract the fields we need
-            guid = dataset.get('dataset_guid')
+            guid = dataset.get("dataset_guid")
             doc_id = doc.id
 
             # Store in cache
diff --git a/app/repositories/dataset_source/bucket_dataset_source_repository.py b/app/repositories/dataset_source/bucket_dataset_source_repository.py
index d324438..bc3cf98 100644
--- a/app/repositories/dataset_source/bucket_dataset_source_repository.py
+++ b/app/repositories/dataset_source/bucket_dataset_source_repository.py
@@ -8,23 +8,17 @@
 
 
 class BucketReader(Protocol):
-
-    def get_blobs(self,
-                  bucket_name: str,
-                  project_id: Optional[str] = None,
-                  directory: Optional[str] = None) -> list[Blob]:
-        ...
+    def get_blobs(
+        self, bucket_name: str, project_id: Optional[str] = None, directory: Optional[str] = None
+    ) -> list[Blob]: ...
 
     def read(
         self, filename: str, bucket_name: str, sub_dir: Optional[str] = None, project_id: Optional[str] = None
     ) -> bytes: ...
 
-    def delete(self,
-               filename: str,
-               bucket_name: str,
-               sub_dir: Optional[str] = None,
-               project_id: Optional[str] = None) -> bool:
-        ...
+    def delete(
+        self, filename: str, bucket_name: str, sub_dir: Optional[str] = None, project_id: Optional[str] = None
+    ) -> bool: ...
 
 
 class BucketSettings(Protocol):
@@ -37,22 +31,14 @@ class BucketDatasetSourceRepository(DatasetSourceRepositoryInterface):
     uses GCP buckets
     """
 
-    def __init__(
-        self,
-        bucket_reader: BucketReader,
-        settings: BucketSettings
-    ) -> None:
+    def __init__(self, bucket_reader: BucketReader, settings: BucketSettings) -> None:
         self._bucket_reader = bucket_reader
         self._settings = settings
 
     def get_oldest_filename(self) -> str | None:
         blobs = self._bucket_reader.get_blobs(self._settings.dataset_bucket_name)
 
-        valid_blobs = (
-            blob
-            for blob in blobs
-            if blob.updated is not None and blob.name is not None
-        )
+        valid_blobs = (blob for blob in blobs if blob.updated is not None and blob.name is not None)
 
         oldest_blob = min(
             valid_blobs,
@@ -63,12 +49,8 @@ def get_oldest_filename(self) -> str | None:
         return oldest_blob.name if oldest_blob else None
 
     def get_raw_data(self, file_name: str) -> RawDataset | None:
-
         # Read the raw data from the bucket
-        data_bytes = self._bucket_reader.read(
-            file_name,
-            bucket_name=self._settings.dataset_bucket_name
-        )
+        data_bytes = self._bucket_reader.read(file_name, bucket_name=self._settings.dataset_bucket_name)
 
         # If not found, return None
         if data_bytes is None:
@@ -81,9 +63,5 @@ def get_raw_data(self, file_name: str) -> RawDataset | None:
         return RawDataset.model_validate(json_content)
 
     def delete_raw_data(self, file_name: str) -> None:
-
         # Delete from Bucket
-        self._bucket_reader.delete(
-            file_name,
-            bucket_name=self._settings.dataset_bucket_name
-        )
+        self._bucket_reader.delete(file_name, bucket_name=self._settings.dataset_bucket_name)
diff --git a/app/repositories/dataset_source/fake_dataset_source_repository.py b/app/repositories/dataset_source/fake_dataset_source_repository.py
index 406a88b..7c84609 100644
--- a/app/repositories/dataset_source/fake_dataset_source_repository.py
+++ b/app/repositories/dataset_source/fake_dataset_source_repository.py
@@ -1,4 +1,3 @@
-
 from app.factories.dataset_factories import RawDatasetFactory
 from app.interfaces.dataset_source_repository_interface import DatasetSourceRepositoryInterface
 from app.models.dataset import RawDataset
@@ -11,12 +10,8 @@ class FakeDatasetSourceRepository(DatasetSourceRepositoryInterface):
     """
 
     def __init__(self):
-
         # Store fake datasets
-        self.datasets = {
-            f"dataset_{x}.json": RawDatasetFactory.build()
-            for x in range(3)
-        }
+        self.datasets = {f"dataset_{x}.json": RawDatasetFactory.build() for x in range(3)}
 
     def get_oldest_filename(self) -> str | None:
         if not self.datasets:
@@ -26,7 +21,6 @@ def get_oldest_filename(self) -> str | None:
         return next(iter(self.datasets.keys()))
 
     def get_raw_data(self, file_name: str) -> RawDataset | None:
-
         if file_name in self.datasets:
             return self.datasets[file_name]
         return None
diff --git a/app/repositories/dataset_storage/fake_dataset_storage_repository.py b/app/repositories/dataset_storage/fake_dataset_storage_repository.py
index f16e249..4e8b18a 100644
--- a/app/repositories/dataset_storage/fake_dataset_storage_repository.py
+++ b/app/repositories/dataset_storage/fake_dataset_storage_repository.py
@@ -10,17 +10,12 @@ class FakeDatasetStorageRepository(DatasetStorageRepositoryInterface):
     """
 
     def __init__(self):
-
         # Store datasets in a dictionary
         # key is a tuple of (survey_id, period_id) and value is a 2D list of...
         # [dataset_id, dataset_metadata, unit_data_collection_with_metadata, unit_data_identifiers, marked_for_deletion?]
         self.datasets = {}
 
-    def get_latest_dataset_metadata(
-        self,
-        survey_id: str,
-        period_id: str
-    ) -> DatasetMetadataWithoutId | None:
+    def get_latest_dataset_metadata(self, survey_id: str, period_id: str) -> DatasetMetadataWithoutId | None:
         dataset = self.datasets.get((survey_id, period_id))
         if dataset is None:
             return None
@@ -33,7 +28,7 @@ def store_dataset(
         dataset_id: Guid,
         dataset_metadata: DatasetMetadataWithoutId,
         unit_data_collection_with_metadata: list[UnitDataset],
-        unit_data_identifiers: list[str]
+        unit_data_identifiers: list[str],
     ):
         dataset_key = (dataset_metadata.survey_id, dataset_metadata.period_id)
         self.datasets[dataset_key] = [
@@ -41,15 +36,10 @@ def store_dataset(
             dataset_metadata,
             unit_data_collection_with_metadata,
             unit_data_identifiers,
-            False  # marked for deletion?
+            False,  # marked for deletion?
         ]
 
-    def delete_dataset_version(
-        self,
-        survey_id: str,
-        period_id: str,
-        version: int
-    ):
+    def delete_dataset_version(self, survey_id: str, period_id: str, version: int):
         dataset_key = (survey_id, period_id)
         datasets = self.datasets.get(dataset_key)
 
diff --git a/app/repositories/dataset_storage/firestore_dataset_storage_repository.py b/app/repositories/dataset_storage/firestore_dataset_storage_repository.py
index 8e75cb1..aebc40f 100644
--- a/app/repositories/dataset_storage/firestore_dataset_storage_repository.py
+++ b/app/repositories/dataset_storage/firestore_dataset_storage_repository.py
@@ -22,16 +22,17 @@ class FirestoreDatasetStorageRepository(DatasetStorageRepositoryInterface):
     that uses firestore
     """
 
-    def __init__(
-        self,
-        settings: FirestoreSettings
-    ):
+    def __init__(self, settings: FirestoreSettings):
         self.settings = settings
 
         # Create a firestore client
         self.client = firestore.Client(
             project=self.settings.project_id,
-            database=self.settings.firestore_database
+            database=self.settings.firestore_database,
+        )
+
+        logger.info(
+            f"Connected to Firestore with project_id: {settings.project_id} and database: {settings.firestore_database}"
         )
 
         # Max size in bytes we can upload in one batch
@@ -41,15 +42,9 @@ def __init__(
         # Initialize Firestore collections
         self.dataset_collection = self.client.collection("datasets")
 
-    def get_latest_dataset_metadata(
-        self,
-        survey_id: str,
-        period_id: str
-    ) -> DatasetMetadataWithoutId | None:
-
+    def get_latest_dataset_metadata(self, survey_id: str, period_id: str) -> DatasetMetadataWithoutId | None:
         latest_dataset = (
-            self.dataset_collection
-            .where("survey_id", "==", survey_id)
+            self.dataset_collection.where("survey_id", "==", survey_id)
             .where("period_id", "==", period_id)
             .order_by("sds_dataset_version", direction=firestore.Query.DESCENDING)
             .limit(1)
@@ -64,16 +59,9 @@ def get_latest_dataset_metadata(
 
         return DatasetMetadataWithoutId.model_validate(datasets[0])
 
-    def _get_dataset_metadata(
-        self,
-        survey_id: str,
-        period_id: str,
-        version: int
-    ) -> DatasetMetadata | None:
-
+    def _get_dataset_metadata(self, survey_id: str, period_id: str, version: int) -> DatasetMetadata | None:
         latest_dataset = (
-            self.dataset_collection
-            .where("survey_id", "==", survey_id)
+            self.dataset_collection.where("survey_id", "==", survey_id)
             .where("period_id", "==", period_id)
             .where("version", "==", version)
             .limit(1)
@@ -93,24 +81,25 @@ def store_dataset(
         dataset_id: Guid,
         dataset_metadata: DatasetMetadataWithoutId,
         unit_data_collection_with_metadata: list[UnitDataset],
-        unit_data_identifiers: list[str]
+        unit_data_identifiers: list[str],
     ):
-
         # Write to firebase in batches or not depends on the settings
 
         if self.settings.should_batch:
+            logger.info("Writing to Firestore in BATCH mode")
             self._store_dataset_with_batching(
                 dataset_id=dataset_id,
                 dataset_metadata=dataset_metadata,
                 unit_data_collection_with_metadata=unit_data_collection_with_metadata,
-                unit_data_identifiers=unit_data_identifiers
+                unit_data_identifiers=unit_data_identifiers,
             )
         else:
+            logger.info("Writing to Firestore in NORMAL mode")
             self._store_dataset_without_batching(
                 dataset_id=dataset_id,
                 dataset_metadata=dataset_metadata,
                 unit_data_collection_with_metadata=unit_data_collection_with_metadata,
-                unit_data_identifiers=unit_data_identifiers
+                unit_data_identifiers=unit_data_identifiers,
             )
 
     def _store_dataset_without_batching(
@@ -118,7 +107,7 @@ def _store_dataset_without_batching(
         dataset_id: Guid,
         dataset_metadata: DatasetMetadataWithoutId,
         unit_data_collection_with_metadata: list[UnitDataset],
-        unit_data_identifiers: list[str]
+        unit_data_identifiers: list[str],
     ):
         """
         Write the data to firestore without batching
@@ -133,7 +122,7 @@ def _store_dataset_without_batching(
         units_collection = new_dataset_document.collection("units")
 
         # Go through unit data
-        for (unit_data, unit_identifier) in zip(unit_data_collection_with_metadata, unit_data_identifiers):
+        for unit_data, unit_identifier in zip(unit_data_collection_with_metadata, unit_data_identifiers):
             # Create and save the unit data as a new sub document
             units_collection.document(unit_identifier).set(unit_data.model_dump())
 
@@ -142,7 +131,7 @@ def _store_dataset_with_batching(
         dataset_id: Guid,
         dataset_metadata: DatasetMetadataWithoutId,
         unit_data_collection_with_metadata: list[UnitDataset],
-        unit_data_identifiers: list[str]
+        unit_data_identifiers: list[str],
     ):
         """
         Use batches to write the data to firestore
@@ -163,7 +152,7 @@ def _store_dataset_with_batching(
         batch_num_records = 0
 
         # Go through unit data
-        for (unit_data, unit_identifier) in zip(unit_data_collection_with_metadata, unit_data_identifiers):
+        for unit_data, unit_identifier in zip(unit_data_collection_with_metadata, unit_data_identifiers):
             """
             Add this unit to the current batch if ...
 
@@ -172,10 +161,12 @@ def _store_dataset_with_batching(
             """
 
             # Work out the size of this unit
-            unit_size = len(unit_data.model_dump().encode('utf-8'))
+            unit_size = len(unit_data.model_dump_json().encode("utf-8"))
 
             # Work out if the current batch is too big already
-            if (batch_size_bytes + unit_size >= self.MAX_BATCH_SIZE_BYTES) or (batch_num_records + 1 >= self.MAX_NUMBER_OF_WRITES_PER_BATCH):
+            if (batch_size_bytes + unit_size >= self.MAX_BATCH_SIZE_BYTES) or (
+                batch_num_records + 1 >= self.MAX_NUMBER_OF_WRITES_PER_BATCH
+            ):
                 # Commit the current batch
                 batch.commit()
 
@@ -194,13 +185,7 @@ def _store_dataset_with_batching(
         if batch_size_bytes > 0:
             batch.commit()
 
-    def delete_dataset_version(
-        self,
-        survey_id: str,
-        period_id: str,
-        version: int
-    ):
-
+    def delete_dataset_version(self, survey_id: str, period_id: str, version: int):
         dataset_metadata = self._get_dataset_metadata(survey_id, period_id, version)
 
         if not dataset_metadata:
@@ -211,7 +196,6 @@ def delete_dataset_version(
         self.delete_dataset_by_guid(dataset_metadata.dataset_id)
 
     def delete_dataset_by_guid(self, guid: Guid):
-
         # Get all the collections for this dataset
         collections = self.dataset_collection.document(guid).collections()
 
diff --git a/app/routes.py b/app/routes.py
index afa66a4..da55fc5 100644
--- a/app/routes.py
+++ b/app/routes.py
@@ -47,12 +47,9 @@ async def version():
 async def publish_schemas(
     request: Request,
     source: Annotated[
-        Literal["github", "bucket"],
-        Query(
-            description="The source of the files specified in this request."
-        )
+        Literal["github", "bucket"], Query(description="The source of the files specified in this request.")
     ] = "github",
-    schema_service: SchemaService = DEPS.depends(SchemaService)
+    schema_service: SchemaService = DEPS.depends(SchemaService),
 ):
     """
     This endpoint handles a publishing schemas from a given
@@ -70,12 +67,8 @@ async def publish_schemas(
 
     try:
         # Publish the new schemas
-        schema_service.publish_new_schemas(
-            source=source,
-            file_list=get_data(message).split("\n")
-        )
+        schema_service.publish_new_schemas(source=source, file_list=get_data(message).split("\n"))
     except NonCriticalException as e:
-
         # Return a status 200 (non-critical exception)
         return JSONResponse(
             status_code=200,
@@ -83,7 +76,6 @@ async def publish_schemas(
         )
 
     except (SchemaException, Exception) as e:
-
         return JSONResponse(
             status_code=500,
             content={"success": False, "message": "Exception publishing schema: " + str(e)},
@@ -96,9 +88,7 @@ async def publish_schemas(
 
 
 @router.get("/events/dataset/create")
-async def create_dataset(
-    dataset_service: DatasetService = DEPS.depends(DatasetService)
-):
+async def create_dataset(dataset_service: DatasetService = DEPS.depends(DatasetService)):
     """
     This endpoint handles creating a dataset
 
@@ -112,7 +102,6 @@ async def create_dataset(
     try:
         dataset_service.create_dataset()
     except NonCriticalException as e:
-
         # Return a status 200 (non-critical exception)
         return JSONResponse(
             status_code=200,
@@ -120,6 +109,7 @@ async def create_dataset(
         )
 
     except (DatasetException, Exception) as e:
+        logger.exception("Exception creating dataset")
 
         return JSONResponse(
             status_code=500,
@@ -133,9 +123,7 @@ async def create_dataset(
 
 
 @router.get("/events/dataset/delete")
-async def delete_dataset(
-    dataset_service: DatasetService = DEPS.depends(DatasetService)
-):
+async def delete_dataset(dataset_service: DatasetService = DEPS.depends(DatasetService)):
     """
     This endpoint deletes a dataset
 
@@ -149,7 +137,6 @@ async def delete_dataset(
     try:
         dataset_service.delete_dataset()
     except NonCriticalException as e:
-
         # Return a status 200 (non critical exception)
         return JSONResponse(
             status_code=200,
@@ -157,7 +144,6 @@ async def delete_dataset(
         )
 
     except (DatasetException, Exception) as e:
-
         return JSONResponse(
             status_code=500,
             content={"success": False, "message": "Exception deleting dataset: " + str(e)},
diff --git a/app/services/dataset_service.py b/app/services/dataset_service.py
index 141bd73..dd46bc3 100644
--- a/app/services/dataset_service.py
+++ b/app/services/dataset_service.py
@@ -42,7 +42,7 @@ def __init__(
         dataset_storage_repo: DatasetStorageRepositoryInterface,
         dataset_deletion_repo: DatasetDeletionRepositoryInterface,
         broadcaster: DatasetBroadcastInterface,
-        settings: DatasetSettings
+        settings: DatasetSettings,
     ):
         self.dataset_source_repo = dataset_source_repo
         self.dataset_storage_repo = dataset_storage_repo
@@ -160,11 +160,13 @@ def create_dataset(self):
         # Determine next dataset version based on the latest dataset version
         if current_dataset:
             logger.info(
-                f"Found previous dataset version: {current_dataset.sds_dataset_version} for survey {raw_dataset.survey_id}, period {raw_dataset.period_id}, incrementing version for new dataset")
+                f"Found previous dataset version: {current_dataset.sds_dataset_version} for survey {raw_dataset.survey_id}, period {raw_dataset.period_id}, incrementing version for new dataset"
+            )
             new_version = current_dataset.sds_dataset_version + 1
         else:
             logger.info(
-                f"Could not find a previous dataset version for survey {raw_dataset.survey_id}, period {raw_dataset.period_id}, setting version to 1")
+                f"Could not find a previous dataset version for survey {raw_dataset.survey_id}, period {raw_dataset.period_id}, setting version to 1"
+            )
             new_version = 1
 
         # Create a new dataset_metadata object to store
@@ -194,14 +196,11 @@ def create_dataset(self):
                 form_types=new_dataset_metadata.form_types,
                 data=item.unit_data,
             )
-
             for item in raw_dataset.data
         ]
 
         # Fetch a list of the identifiers for the unit data in the dataset
-        unit_data_identifiers = [
-            item.identifier for item in raw_dataset.data
-        ]
+        unit_data_identifiers = [item.identifier for item in raw_dataset.data]
 
         # Write the new dataset to storage (firestore)
 
@@ -215,7 +214,6 @@ def create_dataset(self):
                 unit_data_identifiers=unit_data_identifiers,
             )
         except Exception as e:
-
             logger.error(f"Failed to save new dataset to storage repository, cleaning up: {e}")
             # If an error occurs, ensure this is fully deleted
             self.dataset_storage_repo.delete_dataset_by_guid(dataset_id)
@@ -225,10 +223,7 @@ def create_dataset(self):
         logger.info(f"Dataset saved to storage successfully: {dataset_id}")
 
         # Create a DatasetMetadata object
-        dataset_metadata = DatasetMetadata(
-            dataset_id=dataset_id,
-            **new_dataset_metadata.model_dump()
-        )
+        dataset_metadata = DatasetMetadata(dataset_id=dataset_id, **new_dataset_metadata.model_dump())
 
         # Broadcast the dataset has been created (pubsub)
         self.broadcaster.broadcast(dataset_metadata)
@@ -248,13 +243,7 @@ def create_dataset(self):
 
             logger.info(f"Dataset creation process completed: {dataset_id}")
 
-    def _cleanup(
-        self,
-        survey_id: str,
-        period_id: str,
-        new_version: int,
-        older_version: int
-    ):
+    def _cleanup(self, survey_id: str, period_id: str, new_version: int, older_version: int):
         """
         Determine if the other versions of the dataset should be deleted
 
@@ -268,17 +257,16 @@ def _cleanup(
 
         # Delete the previous version if the retention flag is false and this is not v1
         if not self.settings.retain_old_datasets and new_version > 1:
-
             logger.info("Deleting previous version of dataset...")
 
             # Delete the old version
             self.dataset_storage_repo.delete_dataset_version(
-                survey_id=survey_id,
-                period_id=period_id,
-                version=older_version
+                survey_id=survey_id, period_id=period_id, version=older_version
             )
 
-            logger.info(f"Older Dataset deleted successfully: survey: {survey_id}, period: {period_id}, version: {older_version}")
+            logger.info(
+                f"Older Dataset deleted successfully: survey: {survey_id}, period: {period_id}, version: {older_version}"
+            )
         else:
             logger.info("Nothing to cleanup")
 
@@ -304,9 +292,7 @@ def delete_dataset(self):
 
         if not dataset_guid_to_delete:
             logger.info("No datasets marked for deletion (skipping process)")
-            raise DatasetDeletionEmptyException(
-                "No datasets marked for deletion in the storage repository"
-            )
+            raise DatasetDeletionEmptyException("No datasets marked for deletion in the storage repository")
 
         logger.info(f"Selected dataset to delete: {dataset_guid_to_delete}")
 
@@ -321,7 +307,6 @@ def delete_dataset(self):
             self.dataset_storage_repo.delete_dataset_by_guid(dataset_guid_to_delete)
 
         except (DatasetDeletionException, Exception) as e:
-
             logger.error("Error deleting dataset, updating delete record status to ERROR")
 
             # If an error occurred, update the status for the delete record
diff --git a/app/services/schema_service.py b/app/services/schema_service.py
index b92d1f3..18d45f1 100644
--- a/app/services/schema_service.py
+++ b/app/services/schema_service.py
@@ -13,8 +13,7 @@ class PublisherProtocol(Protocol):
     which is responsible for publishing schema files.
     """
 
-    def publish_schema(self, file_name: str):
-        ...
+    def publish_schema(self, file_name: str): ...
 
 
 class SchemaService:
@@ -30,7 +29,7 @@ def __init__(
         self.bucket_publisher = bucket_publisher
         self.repository_publisher = repository_publisher
 
-    def _publish_single_file(self, file_name: str, publisher: PublisherProtocol): # noqa
+    def _publish_single_file(self, file_name: str, publisher: PublisherProtocol):  # noqa
         """
         Publish a single file using a given publisher
         :param file_name: name of the file to be published
@@ -42,18 +41,14 @@ def _publish_single_file(self, file_name: str, publisher: PublisherProtocol): #
         except Exception as e:
             logger.error(f"Failed to publish schema {file_name}: {e}")
 
-    def _filter_github_files(self, files: list[str]) -> list[str]: # noqa
+    def _filter_github_files(self, files: list[str]) -> list[str]:  # noqa
         """
         Filter a list of file names to publish by the regex
         """
         pattern = re.compile(r"^schemas/[^/]+/[^/]+\.json$")
         return [f for f in files if pattern.match(f)]
 
-    def publish_new_schemas(
-        self,
-        source: str,
-        file_list: list[str]
-    ):
+    def publish_new_schemas(self, source: str, file_list: list[str]):
         """
         Take a list of file names and publish each one using a SchemaPublisher that is identified using publisher
         If any schema fails to publish, it will print an error message but continue processing the remaining files.
diff --git a/app/settings.py b/app/settings.py
index 61eac3e..445f586 100644
--- a/app/settings.py
+++ b/app/settings.py
@@ -11,11 +11,12 @@ class QuickSettings(BaseSettings):
     Quick settings are settings that are needed before
     SDX Base populates the AppSettings class.
     """
-    model_config = SettingsConfigDict(env_file='.env', extra='ignore')
-    env: str = 'production'
 
-    def is_production(self) -> bool:
-        return self.env.lower() in ('production', 'prod')
+    model_config = SettingsConfigDict(env_file=".env", extra="ignore")
+    profile: str = "prod"
+
+    def get_profile(self) -> str:
+        return self.profile.lower()
 
 
 class Settings(AppSettings):
@@ -27,8 +28,11 @@ class Settings(AppSettings):
     dataset_bucket_name: The bucket name to pick up datasets from
     firestore_database: The Firestore database to publish datasets to
     publish_dataset_topic_id: The Pub/Sub topic ID to publish dataset updates to
+
+    # Note this will get overridden by any duplicate entries in bash profile
     """
-    project_id: str
+
+    project_id: str = "ons-sds-sandbox"
     autodelete_dataset: bool = True
     retain_old_dataset: bool = True
     should_batch: bool = True
diff --git a/pyproject.toml b/pyproject.toml
index 02cfd0f..58e2784 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "sds-loader"
-version = "0.1.1"
-description = "A service for loading new sds schemas and datasets"
+version = "0.1.2"
+description = "A service for loading new SDS schemas and datasets"
 requires-python = "==3.13.*"
 dependencies = [
     "sdx-base>=0.2.9",
diff --git a/run.py b/run.py
index 2d405a3..b093785 100644
--- a/run.py
+++ b/run.py
@@ -45,18 +45,11 @@ async def smart_txid(request: Request) -> str:
 
 
 # Basic router configuration
-router_1 = RouterConfig(
-    router, tx_id_getter=smart_txid
-)
+router_1 = RouterConfig(router, tx_id_getter=smart_txid)
 
 
 # Initialize the FastAPI app with the specified settings, routers, and project root.
-app: FastAPI = initialise(
-    settings=Settings,
-    routers=[router_1],
-    middleware=[TimingMiddleware],
-    proj_root=ROOT
-)
+app: FastAPI = initialise(settings=Settings, routers=[router_1], middleware=[TimingMiddleware], proj_root=ROOT)
 
 # Fetch the populated settings
 settings = get_instance()
@@ -68,7 +61,4 @@ async def smart_txid(request: Request) -> str:
 if __name__ == "__main__":
     print(load_startup_banner())
 
-    default_server(
-        app,
-        port=settings.port
-    )
+    default_server(app, port=settings.port)
diff --git a/tests/conftest.py b/tests/conftest.py
index 2abd3dd..964879f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -37,6 +37,7 @@ class MockPublisher:
     Mock Publisher class, allows
     tracking of published schemas and side effects for testing purposes.
     """
+
     def __init__(self, label: str):
         self.label = label
         self.side_effects = {}
@@ -55,6 +56,7 @@ def publish_schema(self, file_name: str):
 
         self.published_schemas.append(file_name)
 
+
 # ------------------------
 # Testing classes for dataset_service
 # ------------------------
@@ -79,9 +81,7 @@ def broadcast(self, dataset_metadata: DatasetMetadata) -> None:
 
 @pytest.fixture
 def mock_repo_publisher() -> MockPublisher:
-    return MockPublisher(
-        label="repo publisher"
-    )
+    return MockPublisher(label="repo publisher")
 
 
 @pytest.fixture
@@ -90,6 +90,7 @@ def mock_bucket_publisher() -> MockPublisher:
         label="bucket publisher",
     )
 
+
 # ------------------------
 # Fixtures for dataset_service
 # ------------------------
@@ -119,6 +120,7 @@ def mock_broadcaster() -> DatasetBroadcastInterface:
 # App fixture
 # ------------------------
 
+
 @pytest.fixture
 def test_app() -> FastAPI:
     """
@@ -138,11 +140,7 @@ def get_secret(self, project_id: str, secret_id: str) -> str:
 
     return initialise(
         settings=FakeSettings,
-        routers=[
-            RouterConfig(
-                router, tx_id_getter=txid_not_applicable
-            )
-        ],
+        routers=[RouterConfig(router, tx_id_getter=txid_not_applicable)],
         proj_root=ROOT,
         secret_reader=MockSecretReader,
     )
diff --git a/tests/integration/test_create_dataset_endpoint.py b/tests/integration/test_create_dataset_endpoint.py
index c01e2b2..f00c66d 100644
--- a/tests/integration/test_create_dataset_endpoint.py
+++ b/tests/integration/test_create_dataset_endpoint.py
@@ -8,7 +8,6 @@
 
 
 class TestCreateDatasetEndpoint:
-
     def test_create_dataset_first_version(
         self,
         test_app: FastAPI,
@@ -16,7 +15,7 @@ def test_create_dataset_first_version(
         mock_dataset_storage_repo,
         mock_dataset_deletion_repo,
         mock_broadcaster: MockBroadcaster,
-        raw_dataset_factory: RawDatasetFactory
+        raw_dataset_factory: RawDatasetFactory,
     ):
         """
         Test a happy path for creating a dataset endpoint.
@@ -50,7 +49,6 @@ def test_create_dataset_first_version(
         mock_dataset_storage_repo.get_latest_dataset_metadata.return_value = None
 
         with DEPS.override_for_test() as test_container:
-
             # For this test we set autodelete to True
             class MockSettings:
                 autodelete_dataset: bool = True
@@ -62,7 +60,7 @@ class MockSettings:
                 dataset_storage_repo=mock_dataset_storage_repo,
                 dataset_deletion_repo=mock_dataset_deletion_repo,
                 broadcaster=mock_broadcaster,
-                settings=MockSettings()
+                settings=MockSettings(),
             )
 
             # Create a TestClient instance
@@ -99,7 +97,6 @@ def test_create_dataset_returns_200_if_source_repository_empty(
         mock_dataset_source_repo.get_oldest_filename.return_value = None
 
         with DEPS.override_for_test() as test_container:
-
             # For this test we set autodelete to True
             class MockSettings:
                 autodelete_dataset: bool = True
@@ -111,7 +108,7 @@ class MockSettings:
                 dataset_storage_repo=mock_dataset_storage_repo,
                 dataset_deletion_repo=mock_dataset_deletion_repo,
                 broadcaster=mock_broadcaster,
-                settings=MockSettings()
+                settings=MockSettings(),
             )
 
             # Create a TestClient instance
@@ -148,7 +145,6 @@ def test_create_dataset_returns_500_if_the_filename_is_invalid(
         mock_dataset_source_repo.get_oldest_filename.return_value = "invalid"
 
         with DEPS.override_for_test() as test_container:
-
             # For this test we set autodelete to True
             class MockSettings:
                 autodelete_dataset: bool = True
@@ -160,7 +156,7 @@ class MockSettings:
                 dataset_storage_repo=mock_dataset_storage_repo,
                 dataset_deletion_repo=mock_dataset_deletion_repo,
                 broadcaster=mock_broadcaster,
-                settings=MockSettings()
+                settings=MockSettings(),
             )
 
             # Create a TestClient instance
diff --git a/tests/integration/test_delete_dataset_endpoint.py b/tests/integration/test_delete_dataset_endpoint.py
index b4e87ff..9e40277 100644
--- a/tests/integration/test_delete_dataset_endpoint.py
+++ b/tests/integration/test_delete_dataset_endpoint.py
@@ -7,7 +7,6 @@
 
 
 class TestDeleteDatasetEndpoint:
-
     def test_returns_200_when_no_datasets_are_to_be_deleted(
         self,
         test_app: FastAPI,
@@ -29,7 +28,6 @@ def test_returns_200_when_no_datasets_are_to_be_deleted(
         mock_dataset_deletion_repo.get_dataset_to_delete.return_value = None
 
         with DEPS.override_for_test() as test_container:
-
             # For this test we set autodelete to True
             class MockSettings:
                 autodelete_dataset: bool = False
@@ -41,7 +39,7 @@ class MockSettings:
                 dataset_storage_repo=mock_dataset_storage_repo,
                 dataset_deletion_repo=mock_dataset_deletion_repo,
                 broadcaster=mock_broadcaster,
-                settings=MockSettings()
+                settings=MockSettings(),
             )
 
             # Create a TestClient instance
@@ -86,7 +84,6 @@ def error():
         mock_dataset_storage_repo.delete_dataset_by_guid.side_effect = error
 
         with DEPS.override_for_test() as test_container:
-
             # For this test we set autodelete to True
             class MockSettings:
                 autodelete_dataset: bool = False
@@ -98,7 +95,7 @@ class MockSettings:
                 dataset_storage_repo=mock_dataset_storage_repo,
                 dataset_deletion_repo=mock_dataset_deletion_repo,
                 broadcaster=mock_broadcaster,
-                settings=MockSettings()
+                settings=MockSettings(),
             )
 
             # Create a TestClient instance
diff --git a/tests/integration/test_publish_schemas_endpoint.py b/tests/integration/test_publish_schemas_endpoint.py
index b48f14c..eb79314 100644
--- a/tests/integration/test_publish_schemas_endpoint.py
+++ b/tests/integration/test_publish_schemas_endpoint.py
@@ -10,16 +10,13 @@
 
 
 class TestPublishSchemasEndpoint:
-
     def _encode_filenames(self, filenames: str) -> Envelope:
         """
         Helper method to encode a string of filenames into the format expected by our endpoint
         """
 
         # Encode the message
-        encoded_data = base64.b64encode(
-            filenames.encode("utf-8")
-        ).decode("utf-8")
+        encoded_data = base64.b64encode(filenames.encode("utf-8")).decode("utf-8")
 
         # Create a fake Message object to simulate a pubsub object
         message: Message = {
@@ -33,10 +30,7 @@ def _encode_filenames(self, filenames: str) -> Envelope:
         return {"message": message, "subscription": ""}
 
     def test_publish_schemas_to_github_with_all_valid_schemas(
-        self,
-        test_app: FastAPI,
-        mock_repo_publisher: MockPublisher,
-        mock_bucket_publisher: MockPublisher
+        self, test_app: FastAPI, mock_repo_publisher: MockPublisher, mock_bucket_publisher: MockPublisher
     ):
         """
         Test our publish schemas endpoint (to GitHub)
@@ -48,7 +42,6 @@ def test_publish_schemas_to_github_with_all_valid_schemas(
         """
 
         with DEPS.override_for_test() as test_container:
-
             # Create our own schema service to use in this app
             test_container[SchemaService] = SchemaService(
                 repository_publisher=mock_repo_publisher,
@@ -63,8 +56,7 @@ def test_publish_schemas_to_github_with_all_valid_schemas(
 
             # Make a POST request to the /events/schema/publish endpoint and specify "source" to be GitHub
             response = client.post(
-                "/events/schema/publish?source=github",
-                json=self._encode_filenames(received_filenames)
+                "/events/schema/publish?source=github", json=self._encode_filenames(received_filenames)
             )
 
             # Assert a 200 status code
@@ -79,10 +71,7 @@ def test_publish_schemas_to_github_with_all_valid_schemas(
             assert len(mock_bucket_publisher.published_schemas) == 0
 
     def test_publish_schemas_to_bucket_with_all_valid_schemas(
-        self,
-        test_app: FastAPI,
-        mock_repo_publisher: MockPublisher,
-        mock_bucket_publisher: MockPublisher
+        self, test_app: FastAPI, mock_repo_publisher: MockPublisher, mock_bucket_publisher: MockPublisher
     ):
         """
         Test our publish schemas endpoint with a single
@@ -93,7 +82,6 @@ def test_publish_schemas_to_bucket_with_all_valid_schemas(
         """
 
         with DEPS.override_for_test() as test_container:
-
             # Create our own schema service to use in this app
             test_container[SchemaService] = SchemaService(
                 repository_publisher=mock_repo_publisher,
@@ -108,8 +96,7 @@ def test_publish_schemas_to_bucket_with_all_valid_schemas(
 
             # Make a POST request to the /events/schema/publish endpoint and specify "source" to be bucket
             response = client.post(
-                "/events/schema/publish?source=bucket",
-                json=self._encode_filenames(received_filenames)
+                "/events/schema/publish?source=bucket", json=self._encode_filenames(received_filenames)
             )
 
             # Assert a 200 status code
@@ -122,10 +109,7 @@ def test_publish_schemas_to_bucket_with_all_valid_schemas(
             assert len(mock_repo_publisher.published_schemas) == 0
 
     def test_publish_schemas_to_github_with_some_invalid_filenames(
-        self,
-        test_app: FastAPI,
-        mock_repo_publisher: MockPublisher,
-        mock_bucket_publisher: MockPublisher
+        self, test_app: FastAPI, mock_repo_publisher: MockPublisher, mock_bucket_publisher: MockPublisher
     ):
         """
         Test our publish schemas endpoint (to GitHub)
@@ -141,7 +125,6 @@ def test_publish_schemas_to_github_with_some_invalid_filenames(
         """
 
         with DEPS.override_for_test() as test_container:
-
             # Create our own schema service to use in this app
             test_container[SchemaService] = SchemaService(
                 repository_publisher=mock_repo_publisher,
@@ -149,15 +132,16 @@ def test_publish_schemas_to_github_with_some_invalid_filenames(
             )
 
             # Create fake files to simulate new added schemas sent to loader
-            received_filenames = "schemas/abc/v1.json\nother/foo/v2.json\nschemas/abc/v3.json\nother/foo/v2_template.json"
+            received_filenames = (
+                "schemas/abc/v1.json\nother/foo/v2.json\nschemas/abc/v3.json\nother/foo/v2_template.json"
+            )
 
             # Create a TestClient instance
             client = TestClient(test_app)
 
             # Make a POST request to the /events/schema/publish endpoint and specify "source" to be GitHub
             response = client.post(
-                "/events/schema/publish?source=github",
-                json=self._encode_filenames(received_filenames)
+                "/events/schema/publish?source=github", json=self._encode_filenames(received_filenames)
             )
 
             # Assert a 200 status code
diff --git a/tests/unit/services/test_dataset_service.py b/tests/unit/services/test_dataset_service.py
index 9923701..6b8cc2a 100644
--- a/tests/unit/services/test_dataset_service.py
+++ b/tests/unit/services/test_dataset_service.py
@@ -1,4 +1,3 @@
-
 import pytest
 
 from app.enums.delete_status import DeleteStatus
@@ -19,13 +18,12 @@
 
 
 class TestCreateDataset:
-
     def test_raises_exception_if_source_is_empty(
         self,
         mock_dataset_source_repo: DatasetSourceRepositoryInterface,
         mock_dataset_storage_repo: DatasetStorageRepositoryInterface,
         mock_dataset_deletion_repo: DatasetDeletionRepositoryInterface,
-        mock_broadcaster
+        mock_broadcaster,
     ):
         """
         Test that when the dataset source repository where datasets are picked up from is empty
@@ -51,7 +49,6 @@ class MockSettings:
 
         # Call create_dataset and assert that it raises the expected exception
         with pytest.raises(DatasetSourceEmptyException):
-
             service.create_dataset()
 
     def test_raises_exception_if_filename_invalid(
@@ -59,7 +56,7 @@ def test_raises_exception_if_filename_invalid(
         mock_dataset_source_repo: DatasetSourceRepositoryInterface,
         mock_dataset_storage_repo: DatasetStorageRepositoryInterface,
         mock_dataset_deletion_repo: DatasetDeletionRepositoryInterface,
-        mock_broadcaster
+        mock_broadcaster,
     ):
         """
         Test that the filename in the dataset source repository is invalid
@@ -95,7 +92,7 @@ def test_raises_exception_and_autodeletes_dataset_if_filename_invalid(
         mock_dataset_source_repo: DatasetSourceRepositoryInterface,
         mock_dataset_storage_repo: DatasetStorageRepositoryInterface,
         mock_dataset_deletion_repo: DatasetDeletionRepositoryInterface,
-        mock_broadcaster
+        mock_broadcaster,
     ):
         """
         Test that the filename in the dataset source repository is invalid
@@ -133,7 +130,7 @@ def test_dataset_is_fully_deleted_if_exception_occurs_during_creation(
         mock_dataset_deletion_repo: DatasetDeletionRepositoryInterface,
         mock_broadcaster,
         raw_dataset_factory: RawDatasetFactory,
-        dataset_metadata_without_id_factory: DatasetMetadataWithoutIdFactory
+        dataset_metadata_without_id_factory: DatasetMetadataWithoutIdFactory,
     ):
         """
         Test that if an exception occurs when saving the dataset to the storage repository,
@@ -186,7 +183,7 @@ def test_raises_exception_when_file_contents_are_invalid(
         mock_dataset_source_repo: DatasetSourceRepositoryInterface,
         mock_dataset_storage_repo: DatasetStorageRepositoryInterface,
         mock_dataset_deletion_repo: DatasetDeletionRepositoryInterface,
-        mock_broadcaster
+        mock_broadcaster,
     ):
         """
         Test that when the dataset is read in from the source repository
@@ -225,7 +222,7 @@ def test_raises_exception_and_autodeletes_when_file_contents_are_invalid(
         mock_dataset_source_repo: DatasetSourceRepositoryInterface,
         mock_dataset_storage_repo: DatasetStorageRepositoryInterface,
         mock_dataset_deletion_repo: DatasetDeletionRepositoryInterface,
-        mock_broadcaster
+        mock_broadcaster,
     ):
         """
         Test that when the dataset is read in from the source repository
@@ -264,7 +261,7 @@ def test_raises_exception_if_cannot_find_dataset_in_source_repo(
         mock_dataset_source_repo: DatasetSourceRepositoryInterface,
         mock_dataset_storage_repo: DatasetStorageRepositoryInterface,
         mock_dataset_deletion_repo: DatasetDeletionRepositoryInterface,
-        mock_broadcaster
+        mock_broadcaster,
     ):
         """
         Test that if the specified filename for a dataset cannot be found in the source repository
@@ -302,7 +299,7 @@ def test_autodelete_after_successfully_reading_dataset(
         mock_dataset_deletion_repo: DatasetDeletionRepositoryInterface,
         mock_broadcaster,
         raw_dataset_factory: RawDatasetFactory,
-        dataset_metadata_without_id_factory: DatasetMetadataWithoutIdFactory
+        dataset_metadata_without_id_factory: DatasetMetadataWithoutIdFactory,
     ):
         """
         Test that when autodelete is set to True the dataset is automatically deleted
@@ -354,7 +351,7 @@ def test_does_not_autodelete_after_successfully_reading_dataset_if_autodelete_fa
         mock_dataset_deletion_repo: DatasetDeletionRepositoryInterface,
         mock_broadcaster,
         raw_dataset_factory: RawDatasetFactory,
-        dataset_metadata_without_id_factory: DatasetMetadataWithoutIdFactory
+        dataset_metadata_without_id_factory: DatasetMetadataWithoutIdFactory,
     ):
         """
         Test that when autodelete is set to False the dataset should not be autodeleted
@@ -405,7 +402,7 @@ def test_increments_dataset_version(
         mock_dataset_deletion_repo: DatasetDeletionRepositoryInterface,
         mock_broadcaster,
         raw_dataset_factory: RawDatasetFactory,
-        dataset_metadata_without_id_factory: DatasetMetadataWithoutIdFactory
+        dataset_metadata_without_id_factory: DatasetMetadataWithoutIdFactory,
     ):
         """
         Test that if a dataset exists for the current survey_id and period
@@ -465,7 +462,7 @@ class MockSettings:
         args, kwargs = mock_dataset_storage_repo.store_dataset.call_args
 
         # Extract just the dataset_metadata argument
-        dataset_metadata: DatasetMetadataWithoutId = kwargs['dataset_metadata']
+        dataset_metadata: DatasetMetadataWithoutId = kwargs["dataset_metadata"]
 
         # Assert the metadata contains correct data
         assert dataset_metadata.survey_id == survey_id
@@ -534,7 +531,7 @@ class MockSettings:
         args, kwargs = mock_dataset_storage_repo.store_dataset.call_args
 
         # Extract just the dataset_metadata argument
-        dataset_metadata: DatasetMetadataWithoutId = kwargs['dataset_metadata']
+        dataset_metadata: DatasetMetadataWithoutId = kwargs["dataset_metadata"]
 
         # Assert the metadata contains correct data
         assert dataset_metadata.survey_id == survey_id
@@ -547,7 +544,7 @@ def test_stored_successfully_when_created(
         mock_dataset_storage_repo: DatasetStorageRepositoryInterface,
         mock_dataset_deletion_repo: DatasetDeletionRepositoryInterface,
         mock_broadcaster,
-        raw_dataset_factory: RawDatasetFactory
+        raw_dataset_factory: RawDatasetFactory,
     ):
         """
         Test the dataset_storage_repo is called correctly when a valid
@@ -573,17 +570,11 @@ def test_stored_successfully_when_created(
             data=[
                 {
                     "identifier": "abc",
-                    "unit_data": ["hello", "world"],
+                    "unit_data": "hello",
                 },
-                {
-                    "identifier": "def",
-                    "unit_data": []
-                },
-                {
-                    "identifier": "ghi",
-                    "unit_data": ["test"]
-                }
-            ]
+                {"identifier": "def", "unit_data": "world"},
+                {"identifier": "ghi", "unit_data": "test"},
+            ],
         )
 
         # ------------------------
@@ -619,11 +610,9 @@ class MockSettings:
         # Check unit_data_identifiers
         # ------------------------
 
-        unit_data_identifiers = kwargs['unit_data_identifiers']
+        unit_data_identifiers = kwargs["unit_data_identifiers"]
 
-        expected_identifiers = [
-            "abc", "def", "ghi"
-        ]
+        expected_identifiers = ["abc", "def", "ghi"]
 
         assert unit_data_identifiers == expected_identifiers
 
@@ -631,7 +620,7 @@ class MockSettings:
         # Check unit_data_collection_with_metadata
         # ------------------------
 
-        unit_data_collection_with_metadata = kwargs['unit_data_collection_with_metadata']
+        unit_data_collection_with_metadata = kwargs["unit_data_collection_with_metadata"]
 
         # There should be 3 sets of unit_data
         assert len(unit_data_collection_with_metadata) == 3
@@ -641,15 +630,15 @@ class MockSettings:
 
         assert unit_data_collection_with_metadata[0].survey_id == survey_id
         assert unit_data_collection_with_metadata[0].period_id == period_id
-        assert unit_data_collection_with_metadata[0].data == ["hello", "world"]
+        assert unit_data_collection_with_metadata[0].data == "hello"
 
         assert unit_data_collection_with_metadata[1].survey_id == survey_id
         assert unit_data_collection_with_metadata[1].period_id == period_id
-        assert unit_data_collection_with_metadata[1].data == []
+        assert unit_data_collection_with_metadata[1].data == "world"
 
         assert unit_data_collection_with_metadata[2].survey_id == survey_id
         assert unit_data_collection_with_metadata[2].period_id == period_id
-        assert unit_data_collection_with_metadata[2].data == ["test"]
+        assert unit_data_collection_with_metadata[2].data == "test"
 
     def test_broadcasts_dataset_when_created_successfully(
         self,
@@ -724,7 +713,7 @@ def test_cleanup_old_dataset_version_when_created_successfully(
         mock_dataset_deletion_repo: DatasetDeletionRepositoryInterface,
         mock_broadcaster,
         raw_dataset_factory: RawDatasetFactory,
-        dataset_metadata_without_id_factory: DatasetMetadataWithoutIdFactory
+        dataset_metadata_without_id_factory: DatasetMetadataWithoutIdFactory,
     ):
         """
         Test that if a new version of the dataset is created successfully
@@ -796,7 +785,7 @@ def test_cleanup_old_dataset_version_does_not_delete_when_retain_flag_is_true(
         mock_dataset_deletion_repo: DatasetDeletionRepositoryInterface,
         mock_broadcaster,
         raw_dataset_factory: RawDatasetFactory,
-        dataset_metadata_without_id_factory: DatasetMetadataWithoutIdFactory
+        dataset_metadata_without_id_factory: DatasetMetadataWithoutIdFactory,
     ):
         """
         Test that if a new version of the dataset is created successfully
@@ -854,7 +843,6 @@ class MockSettings:
 
 
 class TestDeleteDataset:
-
     def test_raises_exception_if_no_datasets_to_delete(
         self,
         mock_dataset_source_repo: DatasetSourceRepositoryInterface,
diff --git a/tests/unit/services/test_schema_service.py b/tests/unit/services/test_schema_service.py
index 418a0fb..c59a75b 100644
--- a/tests/unit/services/test_schema_service.py
+++ b/tests/unit/services/test_schema_service.py
@@ -12,13 +12,11 @@ def raise_schema_error():
 
 
 class TestPublishNewSchemas:
-
     def test_publish_new_schemas_publishes_after_exception(
         self,
         mock_repo_publisher: MockPublisher,
         mock_bucket_publisher: MockPublisher,
     ):
-
         # Define input filenames
         filenames = [
             "schemas/abc/v1.json",
@@ -37,10 +35,7 @@ def test_publish_new_schemas_publishes_after_exception(
         )
 
         # Publish schemas from GitHub
-        service.publish_new_schemas(
-            "github",
-            filenames
-        )
+        service.publish_new_schemas("github", filenames)
 
         should_be_published = [
             "schemas/abc/v1.json",
@@ -63,7 +58,6 @@ def test_publish_all_files_successfully(
         mock_repo_publisher: MockPublisher,
         mock_bucket_publisher: MockPublisher,
     ):
-
         # Define input filenames
         repo_filenames = [
             "schemas/abc/v1.json",
@@ -85,16 +79,10 @@ def test_publish_all_files_successfully(
         )
 
         # Publish the repository filenames
-        service.publish_new_schemas(
-            "github",
-            repo_filenames
-        )
+        service.publish_new_schemas("github", repo_filenames)
 
         # Publish the bucket filenames
-        service.publish_new_schemas(
-            "bucket",
-            bucket_filenames
-        )
+        service.publish_new_schemas("bucket", bucket_filenames)
 
         # Assert all the GitHub files are published
         assert len(mock_repo_publisher.published_schemas) == len(repo_filenames)
@@ -118,13 +106,13 @@ def test_publish_github_filter(
         filenames = {
             "schemas/a/b.json": True,
             "schemas/abc/def.json": True,
-            "schemas/a/b.JSON": False,        # case-sensitive
-            "schemas/a/b": False,             # no extension
+            "schemas/a/b.JSON": False,  # case-sensitive
+            "schemas/a/b": False,  # no extension
             "schemas/a/b.json/extra": False,  # too deep
-            "schemas/a//b.json": False,       # empty segment => doesn't match [^/]+
-            "schemas//b.json": False,         # empty segment
-            "schemas/a/": False,              # missing filename
-            "Schemas/a/b.json": False,        # wrong case in prefix
+            "schemas/a//b.json": False,  # empty segment => doesn't match [^/]+
+            "schemas//b.json": False,  # empty segment
+            "schemas/a/": False,  # missing filename
+            "Schemas/a/b.json": False,  # wrong case in prefix
             "other/schemas/a/b.json": False,  # doesn't start with schemas/
         }
 
@@ -133,10 +121,7 @@ def test_publish_github_filter(
             bucket_publisher=mock_bucket_publisher,
         )
 
-        service.publish_new_schemas(
-            "github",
-            list(filenames.keys())
-        )
+        service.publish_new_schemas("github", list(filenames.keys()))
 
         # Assert only the valid filenames are published
         assert len(mock_repo_publisher.published_schemas) == 2
diff --git a/uv.lock b/uv.lock
index 5143665..dd4f85f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1328,7 +1328,7 @@ wheels = [
 
 [[package]]
 name = "sds-loader"
-version = "0.1.1"
+version = "0.1.2"
 source = { virtual = "." }
 dependencies = [
     { name = "fastapi" },