From ecf349f73511fd668b60e69b13d6f1682a7b1138 Mon Sep 17 00:00:00 2001 From: shourya116 Date: Fri, 23 Jun 2023 05:28:06 +0000 Subject: [PATCH 1/3] adding metadata to logs --- clouddq/log.py | 3 ++- clouddq/main.py | 30 +++++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/clouddq/log.py b/clouddq/log.py index da47a64c..afd8ec69 100644 --- a/clouddq/log.py +++ b/clouddq/log.py @@ -64,7 +64,7 @@ def format(self, record): return super().format(record) -def add_cloud_logging_handler(logger: Logger): +def add_cloud_logging_handler(metadata, logger: Logger): client = google.cloud.logging.Client() handler = CloudLoggingHandler( client=client, @@ -72,6 +72,7 @@ def add_cloud_logging_handler(logger: Logger): labels={ "name": APP_NAME, "releaseId": APP_VERSION, + "metadata" : str(metadata), }, ) handler.setFormatter(JSONFormatter()) diff --git a/clouddq/main.py b/clouddq/main.py index 08b5f32a..a158be30 100644 --- a/clouddq/main.py +++ b/clouddq/main.py @@ -22,6 +22,7 @@ import json import logging import logging.config +import os import click import coloredlogs @@ -195,6 +196,12 @@ default=8, type=int, ) +@click.option( + "--add_metadata_to_logs", + help="If True, the metadata results will be logged to cloud and stdout. ", + is_flag=True, + default=False, +) def main( # noqa: C901 rule_binding_ids: str, rule_binding_config_path: str, @@ -216,6 +223,7 @@ def main( # noqa: C901 summary_to_stdout: bool = False, enable_experimental_bigquery_entity_uris: bool = True, enable_experimental_dataplex_gcs_validation: bool = True, + add_metadata_to_logs: bool = False, ) -> None: """Run RULE_BINDING_IDS from a RULE_BINDING_CONFIG_PATH. @@ -270,8 +278,26 @@ def main( # noqa: C901 gcp_service_account_key_path=gcp_service_account_key_path, gcp_impersonation_credentials=gcp_impersonation_credentials, ) + # Load metadata + metadata = json.loads(metadata) # Set-up cloud logging - add_cloud_logging_handler(logger=json_logger) + if add_metadata_to_logs: + dataplex_batch_name = os.environ.get('DATAPLEX_BATCH_NAME') + dataplex_task_name = os.environ.get('DATAPLEX_TASK_NAME') + if dataplex_batch_name: + metadata["dataplex_batch_name"] = dataplex_batch_name + if dataplex_task_name: + metadata["dataplex_task_name"] = dataplex_task_name + add_cloud_logging_handler(logger=json_logger, metadata=metadata) + else: + dataplex_batch_name = os.environ.get('DATAPLEX_BATCH_NAME') + dataplex_task_name = os.environ.get('DATAPLEX_TASK_NAME') + if dataplex_batch_name: + metadata["dataplex_batch_name"] = dataplex_batch_name + if dataplex_task_name: + metadata["dataplex_task_name"] = dataplex_task_name + add_cloud_logging_handler(logger=json_logger, metadata=metadata) + logger.info(f"Metadata: {metadata}") logger.info("Starting CloudDQ run with configs:") json_logger.warning( json.dumps({"clouddq_run_configs": locals()}, cls=JsonEncoderDatetime) @@ -408,8 +434,6 @@ def main( # noqa: C901 "--summary_to_stdout is True but --target_bigquery_summary_table is not set. " "No summary logs will be logged to stdout." ) - # Load metadata - metadata = json.loads(metadata) # Load Rule Bindings configs_path = Path(rule_binding_config_path) logger.debug(f"Loading rule bindings from: {configs_path.absolute()}") From 1f10e0e483db77a84e2c0ca2c260688e05dc39e0 Mon Sep 17 00:00:00 2001 From: shourya116 Date: Fri, 23 Jun 2023 05:57:42 +0000 Subject: [PATCH 2/3] adding metadata to cloud log and stdout --- clouddq/log.py | 2 +- clouddq/main.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clouddq/log.py b/clouddq/log.py index afd8ec69..3a47ecdf 100644 --- a/clouddq/log.py +++ b/clouddq/log.py @@ -72,7 +72,7 @@ def add_cloud_logging_handler(metadata, logger: Logger): labels={ "name": APP_NAME, "releaseId": APP_VERSION, - "metadata" : str(metadata), + "metadata": str(metadata), }, ) handler.setFormatter(JSONFormatter()) diff --git a/clouddq/main.py b/clouddq/main.py index a158be30..0475adba 100644 --- a/clouddq/main.py +++ b/clouddq/main.py @@ -282,22 +282,22 @@ def main( # noqa: C901 metadata = json.loads(metadata) # Set-up cloud logging if add_metadata_to_logs: - dataplex_batch_name = os.environ.get('DATAPLEX_BATCH_NAME') - dataplex_task_name = os.environ.get('DATAPLEX_TASK_NAME') + dataplex_batch_name = os.environ.get("DATAPLEX_BATCH_NAME") + dataplex_task_name = os.environ.get("DATAPLEX_TASK_NAME") if dataplex_batch_name: metadata["dataplex_batch_name"] = dataplex_batch_name if dataplex_task_name: metadata["dataplex_task_name"] = dataplex_task_name add_cloud_logging_handler(logger=json_logger, metadata=metadata) else: - dataplex_batch_name = os.environ.get('DATAPLEX_BATCH_NAME') - dataplex_task_name = os.environ.get('DATAPLEX_TASK_NAME') + dataplex_batch_name = os.environ.get("DATAPLEX_BATCH_NAME") + dataplex_task_name = os.environ.get("DATAPLEX_TASK_NAME") if dataplex_batch_name: metadata["dataplex_batch_name"] = dataplex_batch_name if dataplex_task_name: metadata["dataplex_task_name"] = dataplex_task_name add_cloud_logging_handler(logger=json_logger, metadata=metadata) - logger.info(f"Metadata: {metadata}") + logger.info(f"metadata: {metadata}") logger.info("Starting CloudDQ run with configs:") json_logger.warning( json.dumps({"clouddq_run_configs": locals()}, cls=JsonEncoderDatetime) From 9ba88d816df9fca77f5e0fd9a29014e606e90d13 Mon Sep 17 00:00:00 2001 From: shourya116 Date: Mon, 26 Jun 2023 05:11:49 +0000 Subject: [PATCH 3/3] updated code for adding metadata to logs --- clouddq/log.py | 4 ++-- clouddq/main.py | 13 ++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/clouddq/log.py b/clouddq/log.py index 3a47ecdf..8e14c3b3 100644 --- a/clouddq/log.py +++ b/clouddq/log.py @@ -64,7 +64,7 @@ def format(self, record): return super().format(record) -def add_cloud_logging_handler(metadata, logger: Logger): +def add_cloud_logging_handler(logger: Logger, metadata: dict): client = google.cloud.logging.Client() handler = CloudLoggingHandler( client=client, @@ -72,7 +72,7 @@ def add_cloud_logging_handler(metadata, logger: Logger): labels={ "name": APP_NAME, "releaseId": APP_VERSION, - "metadata": str(metadata), + "metadata": json.dumps(metadata), }, ) handler.setFormatter(JSONFormatter()) diff --git a/clouddq/main.py b/clouddq/main.py index 0475adba..bff641a8 100644 --- a/clouddq/main.py +++ b/clouddq/main.py @@ -280,23 +280,22 @@ def main( # noqa: C901 ) # Load metadata metadata = json.loads(metadata) + dataplex_batch_name = os.environ.get("DATAPLEX_BATCH_NAME") + dataplex_task_name = os.environ.get("DATAPLEX_TASK_NAME") # Set-up cloud logging if add_metadata_to_logs: - dataplex_batch_name = os.environ.get("DATAPLEX_BATCH_NAME") - dataplex_task_name = os.environ.get("DATAPLEX_TASK_NAME") if dataplex_batch_name: metadata["dataplex_batch_name"] = dataplex_batch_name if dataplex_task_name: metadata["dataplex_task_name"] = dataplex_task_name add_cloud_logging_handler(logger=json_logger, metadata=metadata) else: - dataplex_batch_name = os.environ.get("DATAPLEX_BATCH_NAME") - dataplex_task_name = os.environ.get("DATAPLEX_TASK_NAME") + metadata_dict = {} if dataplex_batch_name: - metadata["dataplex_batch_name"] = dataplex_batch_name + metadata_dict["dataplex_batch_name"] = dataplex_batch_name if dataplex_task_name: - metadata["dataplex_task_name"] = dataplex_task_name - add_cloud_logging_handler(logger=json_logger, metadata=metadata) + metadata_dict["dataplex_task_name"] = dataplex_task_name + add_cloud_logging_handler(logger=json_logger, metadata=metadata_dict) logger.info(f"metadata: {metadata}") logger.info("Starting CloudDQ run with configs:") json_logger.warning(