Add specialized exception class and error handling

JeremyMcCormick · JeremyMcCormick · commit 247217ef3ef3 · 2026-04-07T16:35:09.000-05:00
diff --git a/python/lsst/dax/ppdb/bigquery/updates/updates_manager.py b/python/lsst/dax/ppdb/bigquery/updates/updates_manager.py
@@ -21,7 +21,7 @@
 
 from __future__ import annotations
 
-__all__ = ["UpdatesManager"]
+__all__ = ["UpdatesManager", "UpdatesManagerError"]
 
 import logging
 import posixpath
@@ -52,6 +52,10 @@
 _LOG = logging.getLogger(__name__)
 
 
+class UpdatesManagerError(Exception):
+    """Base exception for errors related to the updates process."""
+
+
 class UpdatesManager:
     """Class responsible for managing the process of applying updates to the
     PPDB database, including expanding them into a generic format from JSON and
@@ -62,17 +66,13 @@ class UpdatesManager:
     ----------
     config : `PpdbBigQueryConfig`
         Configuration for the PPDB BigQuery interface.
-    mergers : `Sequence` [ `UpdatesMerger` ], optional
-        Sequence of `UpdatesMerger` instances to use for merging updates into
-        target tables. If not provided, a default set of mergers will be used.
     table_name_format : `str`, optional
         Optional format string for the target table names used by the mergers.
     """
 
     def __init__(
         self,
         config: PpdbBigQueryConfig,
-        mergers: Sequence[UpdatesMerger] | None = None,
         table_name_format: str | None = None,
     ) -> None:
         # Get some necessary setup information from the config
@@ -81,12 +81,7 @@ def __init__(
         bucket_name = config.bucket_name
 
         # Merger instances for handling each target table
-        if mergers and table_name_format:
-            raise ValueError("Cannot specify both 'mergers' and 'table_name_format'")
-        if mergers:
-            self._mergers = mergers
-        else:
-            self._mergers = tuple(cls(table_name_format=table_name_format) for cls in _DEFAULT_MERGER_CLASSES)
+        self._mergers = tuple(cls(table_name_format=table_name_format) for cls in _DEFAULT_MERGER_CLASSES)
 
         # Setup the updates table interface
         self._bq_client = bigquery.Client()
@@ -111,24 +106,44 @@ def apply_updates(self, replica_chunks: Sequence[PpdbReplicaChunkExtended]) -> N
         ----------
         replica_chunks: `Sequence` [ `PpdbReplicaChunkExtended` ]
             The replica chunks with the update records.
+
+        Raises
+        ------
+        UpdatesManagerError
+            If any step of the updates process fails.
         """
         # Create the updates table, first dropping if it already exists
-        self._updates_table.recreate()
+        try:
+            self._updates_table.recreate()
+        except Exception as e:
+            raise UpdatesManagerError("Failed to recreate updates table") from e
 
         # Process the replica chunks to build the expanded updates table
-        self._process_chunks(replica_chunks)
+        try:
+            self._process_chunks(replica_chunks)
+        except Exception as e:
+            raise UpdatesManagerError("Failed to process replica chunks") from e
 
         # Get a fresh reference to the updates table to check if there were
         # any update records generated from the processed replica chunks
-        bq_updates_table = self._bq_client.get_table(self._updates_table.table_fqn)
+        try:
+            bq_updates_table = self._bq_client.get_table(self._updates_table.table_fqn)
+        except Exception as e:
+            raise UpdatesManagerError(f"Failed to get updates table '{self._updates_table.table_fqn}'") from e
 
         # Check if there were any updates in this set of chunks
         if bq_updates_table.num_rows > 0:
             # Select only the latest update records to a new table
-            self._updates_table.create_latest_only()
+            try:
+                self._updates_table.create_latest_only()
+            except Exception as e:
+                raise UpdatesManagerError("Failed to create latest-only updates table") from e
 
             # Merge the latest-only updates into the target tables
-            self._merge_updates(self._updates_table.latest_only_table_fqn)
+            try:
+                self._merge_updates(self._updates_table.latest_only_table_fqn)
+            except Exception as e:
+                raise UpdatesManagerError("Failed to merge updates into target tables") from e
         else:
             # No updates were present in the processed replica chunks
             _LOG.info("No update records found when processing replica chunks")
@@ -140,7 +155,7 @@ def apply_updates(self, replica_chunks: Sequence[PpdbReplicaChunkExtended]) -> N
     def _process_chunks(self, chunks: Sequence[PpdbReplicaChunkExtended]) -> None:
         for chunk in chunks:
             if chunk.gcs_uri is None:
-                raise ValueError(f"Replica chunk {chunk.id} does not have a GCS URI")
+                raise UpdatesManagerError(f"Replica chunk {chunk.id} does not have a GCS URI")
 
             # Parse the GCS URI
             parsed_uri = urllib.parse.urlparse(chunk.gcs_uri)
@@ -153,28 +168,39 @@ def _process_chunks(self, chunks: Sequence[PpdbReplicaChunkExtended]) -> None:
             chunk_prefix = parsed_uri.path.lstrip("/")
 
             # Load the manifest file for the chunk from GCS
-            manifest_uri = posixpath.join(chunk_prefix, Manifest.FILE_NAME)
-            manifest_blob = bucket.blob(manifest_uri)
-            manifest_content = manifest_blob.download_as_text()
-            manifest = Manifest.from_json_str(manifest_content)
+            try:
+                manifest_uri = posixpath.join(chunk_prefix, Manifest.FILE_NAME)
+                manifest_blob = bucket.blob(manifest_uri)
+                manifest_content = manifest_blob.download_as_text()
+                manifest = Manifest.from_json_str(manifest_content)
+            except Exception as e:
+                raise UpdatesManagerError(f"Failed to load manifest for replica chunk {chunk.id}") from e
 
             # Read the update records if the chunk was flagged as having them
             if manifest.includes_update_records:
-                # Get the update records file contents from the bucket
-                object_name = posixpath.join(parsed_uri.path.lstrip("/"), UpdateRecords.FILE_NAME)
-                blob = bucket.blob(object_name)
-                content = blob.download_as_text()
-
-                # Expand the update records into the appropriate format and
-                # insert them into the updates table
-                update_records = UpdateRecords.from_json_string(content)
-                expanded_update_records = UpdateRecordExpander.expand_updates(update_records)
-                self._updates_table.insert(expanded_update_records)
+                try:
+                    # Get the update records file contents from the bucket
+                    object_name = posixpath.join(parsed_uri.path.lstrip("/"), UpdateRecords.FILE_NAME)
+                    blob = bucket.blob(object_name)
+                    content = blob.download_as_text()
+
+                    # Expand the update records into the appropriate format and
+                    # insert them into the updates table
+                    update_records = UpdateRecords.from_json_string(content)
+                    expanded_update_records = UpdateRecordExpander.expand_updates(update_records)
+                    self._updates_table.insert(expanded_update_records)
+                except Exception as e:
+                    raise UpdatesManagerError(
+                        f"Failed to process update records for replica chunk {chunk.id}"
+                    ) from e
 
     def _merge_updates(self, target_table_fqn: str) -> None:
         for merger in self._mergers:
-            merger.merge(
-                client=self._bq_client,
-                updates_table_fqn=target_table_fqn,
-                target_dataset_fqn=self._target_dataset_fqn,
-            )
+            try:
+                merger.merge(
+                    client=self._bq_client,
+                    updates_table_fqn=target_table_fqn,
+                    target_dataset_fqn=self._target_dataset_fqn,
+                )
+            except Exception as e:
+                raise UpdatesManagerError(f"Failed to merge updates using {type(merger).__name__}") from e