diff --git a/postgres/changelog.d/23645.added b/postgres/changelog.d/23645.added new file mode 100644 index 0000000000000..949d76193e287 --- /dev/null +++ b/postgres/changelog.d/23645.added @@ -0,0 +1 @@ +Run `diagnose` when the main Postgres check fails then emit any warnings and failures as Agent Health events. \ No newline at end of file diff --git a/postgres/datadog_checks/postgres/diagnose.py b/postgres/datadog_checks/postgres/diagnose.py index 9044799e911f7..eb6c5970fc059 100644 --- a/postgres/datadog_checks/postgres/diagnose.py +++ b/postgres/datadog_checks/postgres/diagnose.py @@ -16,8 +16,15 @@ health event instead. """ +from __future__ import annotations + +from typing import TYPE_CHECKING + import psycopg +if TYPE_CHECKING: + from .postgres import PostgreSql + from .util import ( DIAGNOSTIC_METADATA, DatabaseConfigurationError, @@ -35,7 +42,7 @@ class PostgresDiagnose: """Explicit pre-flight diagnostics for `datadog-agent diagnose`.""" - def __init__(self, check): + def __init__(self, check: PostgreSql): self._check = check # Codes that have FAIL'd in the current explicit run. Used for cascade skipping so we # don't emit downstream-effect FAILs with nonsensical remediations (e.g. "CREATE EXTENSION" diff --git a/postgres/datadog_checks/postgres/health.py b/postgres/datadog_checks/postgres/health.py index e900ad9d55032..4c7ef041748eb 100644 --- a/postgres/datadog_checks/postgres/health.py +++ b/postgres/datadog_checks/postgres/health.py @@ -5,6 +5,8 @@ from typing import TYPE_CHECKING +from datadog_checks.base.utils.diagnose import Diagnosis + if TYPE_CHECKING: from datadog_checks.postgres import PostgreSql @@ -62,3 +64,20 @@ def submit_health_event( }, **kwargs, ) + + def diagnose(self): + """ + Run the diagnostics for the Postgres check. + """ + self.check.diagnose.run() + for diagnosis in self.check.diagnosis.diagnoses: + if diagnosis.result == Diagnosis.DIAGNOSIS_FAIL: + self.submit_health_event( + name=diagnosis.name, + status=HealthStatus.WARNING + if diagnosis.result == Diagnosis.DIAGNOSIS_WARNING + else HealthStatus.ERROR, + data={ + "diagnosis": diagnosis, + }, + ) diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 113e4402af133..f269ca671a413 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -192,7 +192,8 @@ def __init__(self, name, init_config, instances): ) # type: TTLCache # Register explicit pre-flight diagnostics for `datadog-agent diagnose`. - PostgresDiagnose(self).register() + self.diagnose = PostgresDiagnose(self) + self.diagnose.register() def _submit_initialization_health_event(self): try: @@ -492,6 +493,7 @@ def cancel(self): if self.data_observability._job_loop_future: self.data_observability._job_loop_future.result() self._close_db_pool() + self.diagnose = None def _clean_state(self): self.log.debug("Cleaning state") @@ -1206,6 +1208,9 @@ def check(self, _): hostname=self.reported_hostname, raw=True, ) + + self.health.diagnose() + raise e else: self.service_check(