From ec27d222a8054e312a02412454196e58b5be3b55 Mon Sep 17 00:00:00 2001 From: Joel Marcotte Date: Wed, 6 May 2026 13:57:36 -0400 Subject: [PATCH 1/3] Add SQL Server diagnostics --- .../datadog_checks/sqlserver/diagnose.py | 388 ++++++++++++++++++ .../datadog_checks/sqlserver/sqlserver.py | 4 + sqlserver/tests/test_diagnose.py | 283 +++++++++++++ 3 files changed, 675 insertions(+) create mode 100644 sqlserver/datadog_checks/sqlserver/diagnose.py create mode 100644 sqlserver/tests/test_diagnose.py diff --git a/sqlserver/datadog_checks/sqlserver/diagnose.py b/sqlserver/datadog_checks/sqlserver/diagnose.py new file mode 100644 index 0000000000000..7b0467535426a --- /dev/null +++ b/sqlserver/datadog_checks/sqlserver/diagnose.py @@ -0,0 +1,388 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +""" +Explicit pre-flight diagnostics for the SQL Server integration. + +Registered with ``self.diagnosis.register(...)`` in ``SQLServer.__init__`` and +run on-demand when the Agent invokes ``get_diagnoses()``. +""" + +from __future__ import annotations + +from enum import Enum +from typing import Any + +from datadog_checks.base import is_affirmative +from datadog_checks.sqlserver.connection_errors import SQLConnectionError +from datadog_checks.sqlserver.const import ENGINE_EDITION_SQL_DATABASE, STATIC_INFO_ENGINE_EDITION + +CATEGORY_SQLSERVER = "sqlserver" +KEY_PREFIX = "sqlserver-diagnose-" +MIN_SUPPORTED_MAJOR_VERSION = 11 +SQLSERVER_2014_MAJOR_VERSION = 12 + +SQLSERVER_SETUP_DOCS_URL = "https://docs.datadoghq.com/integrations/sql-server/?tab=host#setup" +SQLSERVER_DBM_GRANTS_DOCS_URL = ( + "https://docs.datadoghq.com/database_monitoring/setup_sql_server/selfhosted/" + "?tab=sqlserver2014#grant-the-agent-access" +) + + +class SQLServerConfigurationError(Enum): + """SQL Server diagnostic error codes.""" + + connection_failure = "connection-failure" + sqlserver_version_unsupported = "sqlserver-version-unsupported" + performance_counters_not_readable = "performance-counters-not-readable" + missing_view_server_state = "missing-view-server-state" + missing_connect_any_database = "missing-connect-any-database" + missing_view_any_definition = "missing-view-any-definition" + missing_msdb_select = "missing-msdb-select" + + +DIAGNOSTIC_METADATA = { + SQLServerConfigurationError.connection_failure: { + "description": "Verifies that the Agent can connect to the configured SQL Server database.", + "remediation": "Review the SQL Server host, port, driver, authentication, and TLS settings.", + "docs_url": SQLSERVER_SETUP_DOCS_URL, + }, + SQLServerConfigurationError.sqlserver_version_unsupported: { + "description": "Verifies that SQL Server is a supported version for the integration.", + "remediation": "Use SQL Server 2012 or newer.", + "docs_url": SQLSERVER_SETUP_DOCS_URL, + }, + SQLServerConfigurationError.performance_counters_not_readable: { + "description": "Verifies read access to sys.dm_os_performance_counters.", + "remediation": "Grant SELECT on sys.dm_os_performance_counters to the Datadog login.", + "docs_url": SQLSERVER_SETUP_DOCS_URL, + }, + SQLServerConfigurationError.missing_view_server_state: { + "description": "Verifies VIEW SERVER STATE for server state, query metrics, and query activity collection.", + "remediation": "Grant VIEW SERVER STATE to the Datadog login.", + "docs_url": SQLSERVER_SETUP_DOCS_URL, + }, + SQLServerConfigurationError.missing_connect_any_database: { + "description": "Verifies CONNECT ANY DATABASE when the check can fan out across databases.", + "remediation": "Grant CONNECT ANY DATABASE to the Datadog login.", + "docs_url": SQLSERVER_DBM_GRANTS_DOCS_URL, + }, + SQLServerConfigurationError.missing_view_any_definition: { + "description": "Verifies VIEW ANY DEFINITION for DBM metadata and definition-dependent metrics.", + "remediation": "Grant VIEW ANY DEFINITION to the Datadog login.", + "docs_url": SQLSERVER_DBM_GRANTS_DOCS_URL, + }, + SQLServerConfigurationError.missing_msdb_select: { + "description": "Verifies read access to enabled SQL Server features backed by msdb tables.", + "remediation": "Create the Datadog user in msdb and grant SELECT on the required msdb tables.", + "docs_url": SQLSERVER_SETUP_DOCS_URL, + }, +} + + +def build_remediation(code: SQLServerConfigurationError) -> str: + """Return remediation text with the relevant Datadog docs URL.""" + metadata = DIAGNOSTIC_METADATA[code] + return "{} See {}.".format(metadata["remediation"], metadata["docs_url"]) + + +class SqlserverDiagnose: + """Explicit pre-flight diagnostics for `datadog-agent diagnose`.""" + + def __init__(self, check: Any) -> None: + self._check = check + self._failed: set[str] = set() + self._major_version: int | None = None + self._engine_edition: int | None = None + + def register(self) -> None: + """Register the diagnostic entry point with the check's Diagnosis object.""" + diagnosis = self._check.diagnosis + if getattr(diagnosis, '_sqlserver_diagnostics_registered', False): + return + diagnosis._sqlserver_diagnostics_registered = True + diagnosis.register(self._run) + + def _run(self) -> None: + """Open one probe connection and run enabled diagnostics.""" + self._failed = set() + self._major_version = None + self._engine_edition = None + + try: + with self._check.connection.open_managed_default_connection(KEY_PREFIX): + with self._check.connection.get_managed_cursor(KEY_PREFIX) as cursor: + self._diagnose_connection() + self._diagnose_version(cursor) + self._diagnose_performance_counters(cursor) + self._diagnose_view_server_state(cursor) + if self._needs_connect_any_database(): + self._diagnose_connect_any_database(cursor) + if self._needs_view_any_definition(): + self._diagnose_view_any_definition(cursor) + if self._needs_msdb_select(): + self._diagnose_msdb_select(cursor) + except SQLConnectionError as e: + code = SQLServerConfigurationError.connection_failure + self._fail( + code, + diagnosis="Failed to connect to {} as {}: {}".format(self._database_desc(), self._username_desc(), e), + rawerror=str(e), + ) + + def _diagnose_connection(self) -> None: + code = SQLServerConfigurationError.connection_failure + self._check.diagnosis.success( + name=code.value, + diagnosis="Connected to {} as {}.".format(self._database_desc(), self._username_desc()), + category=CATEGORY_SQLSERVER, + ) + + def _diagnose_version(self, cursor: Any) -> None: + code = SQLServerConfigurationError.sqlserver_version_unsupported + try: + row = _fetchone( + cursor, + "SELECT CAST(SERVERPROPERTY('ProductMajorVersion') AS INT), " + "CAST(SERVERPROPERTY('EngineEdition') AS INT)", + ) + self._major_version = _to_int(row[0]) if row else None + self._engine_edition = _to_int(row[1]) if row and len(row) > 1 else None + except Exception as e: + self._fail(code, diagnosis="Unable to determine SQL Server version: {}".format(e), rawerror=str(e)) + return + + if self._major_version is None: + self._fail(code, diagnosis="Unable to determine SQL Server major version.") + return + + if self._major_version < MIN_SUPPORTED_MAJOR_VERSION: + self._fail( + code, + diagnosis="SQL Server major version {} is below the minimum supported version (11).".format( + self._major_version + ), + ) + return + + self._check.diagnosis.success( + name=code.value, + diagnosis="SQL Server major version {} is supported.".format(self._major_version), + category=CATEGORY_SQLSERVER, + ) + + def _diagnose_performance_counters(self, cursor: Any) -> None: + code = SQLServerConfigurationError.performance_counters_not_readable + try: + _execute_read_probe(cursor, "SELECT TOP 1 object_name FROM sys.dm_os_performance_counters") + except Exception as e: + self._fail( + code, + diagnosis="Unable to read sys.dm_os_performance_counters: {}".format(e), + rawerror=str(e), + ) + return + + self._check.diagnosis.success( + name=code.value, + diagnosis="sys.dm_os_performance_counters is readable.", + category=CATEGORY_SQLSERVER, + ) + + def _diagnose_view_server_state(self, cursor: Any) -> None: + code = SQLServerConfigurationError.missing_view_server_state + try: + if not _has_server_permission(cursor, "VIEW SERVER STATE"): + self._fail(code, diagnosis="The Datadog login does not have VIEW SERVER STATE.") + return + _execute_read_probe(cursor, "SELECT TOP 1 session_id FROM sys.dm_exec_sessions") + except Exception as e: + self._fail( + code, + diagnosis="Unable to validate VIEW SERVER STATE with sys.dm_exec_sessions: {}".format(e), + rawerror=str(e), + ) + return + + self._check.diagnosis.success( + name=code.value, + diagnosis="VIEW SERVER STATE is granted and sys.dm_exec_sessions is readable.", + category=CATEGORY_SQLSERVER, + ) + + def _diagnose_connect_any_database(self, cursor: Any) -> None: + code = SQLServerConfigurationError.missing_connect_any_database + if self._major_version is not None and self._major_version < SQLSERVER_2014_MAJOR_VERSION: + self._check.diagnosis.warning( + name=code.value, + diagnosis=( + "CONNECT ANY DATABASE is unavailable before SQL Server 2014; ensure the Datadog user exists " + "in every monitored database." + ), + category=CATEGORY_SQLSERVER, + description=DIAGNOSTIC_METADATA[code]["description"], + remediation=build_remediation(code), + ) + return + + try: + if not _has_server_permission(cursor, "CONNECT ANY DATABASE"): + self._fail(code, diagnosis="The Datadog login does not have CONNECT ANY DATABASE.") + return + except Exception as e: + self._fail(code, diagnosis="Unable to validate CONNECT ANY DATABASE: {}".format(e), rawerror=str(e)) + return + + self._check.diagnosis.success( + name=code.value, + diagnosis="CONNECT ANY DATABASE is granted.", + category=CATEGORY_SQLSERVER, + ) + + def _diagnose_view_any_definition(self, cursor: Any) -> None: + code = SQLServerConfigurationError.missing_view_any_definition + try: + if not _has_server_permission(cursor, "VIEW ANY DEFINITION"): + self._fail(code, diagnosis="The Datadog login does not have VIEW ANY DEFINITION.") + return + except Exception as e: + self._fail(code, diagnosis="Unable to validate VIEW ANY DEFINITION: {}".format(e), rawerror=str(e)) + return + + self._check.diagnosis.success( + name=code.value, + diagnosis="VIEW ANY DEFINITION is granted.", + category=CATEGORY_SQLSERVER, + ) + + def _diagnose_msdb_select(self, cursor: Any) -> None: + code = SQLServerConfigurationError.missing_msdb_select + failures = [] + probes = self._msdb_probe_queries() + for table, query in probes: + try: + _execute_read_probe(cursor, query) + except Exception as e: + failures.append("{}: {}".format(table, e)) + + if failures: + self._fail( + code, + diagnosis="Unable to read required msdb tables: {}".format("; ".join(failures)), + rawerror="; ".join(failures), + ) + return + + self._check.diagnosis.success( + name=code.value, + diagnosis="Required msdb tables are readable: {}.".format(", ".join(table for table, _ in probes)), + category=CATEGORY_SQLSERVER, + ) + + def _needs_connect_any_database(self) -> bool: + return not self._is_azure_sql_database() and ( + self._check._config.dbm_enabled or self._check._config.autodiscovery + ) + + def _needs_view_any_definition(self) -> bool: + config = self._check._config + database_metrics = config.database_metrics_config + return not self._is_azure_sql_database() and ( + config.dbm_enabled + or database_metrics["ao_metrics"]["enabled"] + or database_metrics["master_files_metrics"]["enabled"] + ) + + def _needs_msdb_select(self) -> bool: + if self._is_azure_sql_database(): + return False + + config = self._check._config + database_metrics = config.database_metrics_config + return ( + database_metrics["db_backup_metrics"]["enabled"] + or database_metrics["primary_log_shipping_metrics"]["enabled"] + or database_metrics["secondary_log_shipping_metrics"]["enabled"] + or self._agent_jobs_enabled() + ) + + def _msdb_probe_queries(self) -> list[tuple[str, str]]: + config = self._check._config + database_metrics = config.database_metrics_config + probes = [] + if database_metrics["db_backup_metrics"]["enabled"]: + probes.append(("msdb.dbo.backupset", "SELECT TOP 1 1 FROM msdb.dbo.backupset")) + if database_metrics["primary_log_shipping_metrics"]["enabled"]: + probes.append( + ( + "msdb.dbo.log_shipping_monitor_primary", + "SELECT TOP 1 1 FROM msdb.dbo.log_shipping_monitor_primary", + ) + ) + if database_metrics["secondary_log_shipping_metrics"]["enabled"]: + probes.append( + ( + "msdb.dbo.log_shipping_monitor_secondary", + "SELECT TOP 1 1 FROM msdb.dbo.log_shipping_monitor_secondary", + ) + ) + if self._agent_jobs_enabled(): + probes.extend( + [ + ("msdb.dbo.sysjobs", "SELECT TOP 1 1 FROM msdb.dbo.sysjobs"), + ("msdb.dbo.sysjobhistory", "SELECT TOP 1 1 FROM msdb.dbo.sysjobhistory"), + ("msdb.dbo.sysjobactivity", "SELECT TOP 1 1 FROM msdb.dbo.sysjobactivity"), + ] + ) + return probes + + def _agent_jobs_enabled(self) -> bool: + config = self._check._config + return config.dbm_enabled and is_affirmative(config.agent_jobs_config.get('enabled', False)) + + def _is_azure_sql_database(self) -> bool: + engine_edition = self._engine_edition or self._check.static_info_cache.get(STATIC_INFO_ENGINE_EDITION) + return engine_edition == ENGINE_EDITION_SQL_DATABASE + + def _fail(self, code: SQLServerConfigurationError, diagnosis: str, rawerror: str | None = None) -> None: + self._check.diagnosis.fail( + name=code.value, + diagnosis=diagnosis, + category=CATEGORY_SQLSERVER, + description=DIAGNOSTIC_METADATA[code]["description"], + remediation=build_remediation(code), + rawerror=rawerror, + ) + self._failed.add(code.value) + + def _database_desc(self) -> str: + connection = self._check.connection + database = self._check.instance.get('database', connection.DEFAULT_DATABASE) + return "{} (database={})".format(connection.get_host_with_port(), database) + + def _username_desc(self) -> str: + return self._check.instance.get('username') or "configured authentication" + + +def _has_server_permission(cursor: Any, permission: str) -> bool: + row = _fetchone(cursor, "SELECT HAS_PERMS_BY_NAME(NULL, NULL, ?)", (permission,)) + return bool(row and row[0] == 1) + + +def _fetchone(cursor: Any, query: str, params: tuple[Any, ...] | None = None) -> Any: + if params is None: + cursor.execute(query) + else: + cursor.execute(query, params) + return cursor.fetchone() + + +def _execute_read_probe(cursor: Any, query: str) -> None: + cursor.execute(query) + cursor.fetchall() + + +def _to_int(value: Any) -> int | None: + if value is None: + return None + return int(value) diff --git a/sqlserver/datadog_checks/sqlserver/sqlserver.py b/sqlserver/datadog_checks/sqlserver/sqlserver.py index 3953b8f1bd3fb..52d7918704fd5 100644 --- a/sqlserver/datadog_checks/sqlserver/sqlserver.py +++ b/sqlserver/datadog_checks/sqlserver/sqlserver.py @@ -103,6 +103,7 @@ VALID_METRIC_TYPES, expected_sys_databases_columns, ) +from datadog_checks.sqlserver.diagnose import SqlserverDiagnose from datadog_checks.sqlserver.metrics import DEFAULT_PERFORMANCE_TABLE, VALID_TABLES from datadog_checks.sqlserver.utils import ( is_azure_sql_database, @@ -196,6 +197,9 @@ def __init__(self, name, init_config, instances): self._database_metrics = None self.sqlserver_incr_fraction_metric_previous_values = {} + # Register explicit pre-flight diagnostics for `datadog-agent diagnose`. + SqlserverDiagnose(self).register() + self._submit_initialization_health_event() def initialize_xe_session_handlers(self): diff --git a/sqlserver/tests/test_diagnose.py b/sqlserver/tests/test_diagnose.py new file mode 100644 index 0000000000000..61a266c32d0e8 --- /dev/null +++ b/sqlserver/tests/test_diagnose.py @@ -0,0 +1,283 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import json +from contextlib import contextmanager + +import pytest + +from datadog_checks.base.utils.diagnose import Diagnosis +from datadog_checks.sqlserver import SQLServer +from datadog_checks.sqlserver.connection_errors import SQLConnectionError +from datadog_checks.sqlserver.diagnose import SQLServerConfigurationError, SqlserverDiagnose + +from .common import CHECK_NAME + +pytestmark = pytest.mark.unit + + +class FakeCursor: + """Cursor stub that dispatches SQL and params to canned results.""" + + def __init__(self, responses): + self._responses = responses + self._rows = [] + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def execute(self, sql, params=None): + for matcher, result in self._responses: + if callable(matcher): + ok = matcher(sql, params) + else: + ok = matcher in sql + if not ok: + continue + if isinstance(result, Exception): + raise result + self._rows = list(result) + return + raise AssertionError("unexpected query: {!r} params={!r}".format(sql, params)) + + def fetchone(self): + return self._rows[0] if self._rows else None + + def fetchall(self): + return list(self._rows) + + +class FakeConnection: + DEFAULT_DATABASE = "master" + + def __init__(self, responses=None, open_error=None): + self._responses = responses or [] + self._open_error = open_error + self.opened = 0 + self.closed = 0 + + @contextmanager + def open_managed_default_connection(self, key_prefix): + if self._open_error is not None: + raise self._open_error + self.opened += 1 + try: + yield + finally: + self.closed += 1 + + def get_managed_cursor(self, key_prefix): + return FakeCursor(self._responses) + + def get_host_with_port(self): + return "localhost,1433" + + +def _permission(name): + def predicate(sql, params): + return "HAS_PERMS_BY_NAME" in sql and params == (name,) + + predicate.__qualname__ = "_permission({!r})".format(name) + return predicate + + +def _get_diagnoses(check): + check.diagnosis.clear() + return [d._asdict() for d in check.diagnosis.run_explicit()] + + +def _by_name(diagnoses, name): + return [d for d in diagnoses if d['name'] == name] + + +def _check(instance_minimal_defaults, responses, **instance_overrides): + instance = dict(instance_minimal_defaults, **instance_overrides) + check = SQLServer(CHECK_NAME, {}, [instance]) + check._connection = FakeConnection(responses) + return check + + +def _happy_responses(*, major_version=16, engine_edition=2, connect_any_database=True, view_any_definition=True): + return [ + ("SERVERPROPERTY('ProductMajorVersion')", [(major_version, engine_edition)]), + ("sys.dm_os_performance_counters", [(1,)]), + (_permission("VIEW SERVER STATE"), [(1,)]), + ("sys.dm_exec_sessions", [(1,)]), + (_permission("CONNECT ANY DATABASE"), [(1 if connect_any_database else 0,)]), + (_permission("VIEW ANY DEFINITION"), [(1 if view_any_definition else 0,)]), + ("msdb.dbo.backupset", []), + ("msdb.dbo.sysjobs", []), + ("msdb.dbo.sysjobhistory", []), + ("msdb.dbo.sysjobactivity", []), + ] + + +def _assert_result(diagnoses, code, result): + rows = _by_name(diagnoses, code.value) + assert rows, "missing diagnosis for {}".format(code.value) + assert rows[0]['result'] == result, rows + + +def test_register_is_idempotent(instance_minimal_defaults): + check = SQLServer(CHECK_NAME, {}, [instance_minimal_defaults]) + registered = list(check.diagnosis._diagnostics) + + SqlserverDiagnose(check).register() + + assert check.diagnosis._diagnostics == registered + + +def test_standard_diagnostics_success(instance_minimal_defaults): + check = _check(instance_minimal_defaults, _happy_responses()) + + diagnoses = _get_diagnoses(check) + + for code in ( + SQLServerConfigurationError.connection_failure, + SQLServerConfigurationError.sqlserver_version_unsupported, + SQLServerConfigurationError.performance_counters_not_readable, + SQLServerConfigurationError.missing_view_server_state, + SQLServerConfigurationError.missing_msdb_select, + ): + _assert_result(diagnoses, code, Diagnosis.DIAGNOSIS_SUCCESS) + assert not _by_name(diagnoses, SQLServerConfigurationError.missing_connect_any_database.value) + assert not _by_name(diagnoses, SQLServerConfigurationError.missing_view_any_definition.value) + assert check._connection.opened == 1 + assert check._connection.closed == 1 + + +def test_connection_failure(instance_minimal_defaults): + check = SQLServer(CHECK_NAME, {}, [instance_minimal_defaults]) + check._connection = FakeConnection(open_error=SQLConnectionError("login failed for user")) + + diagnoses = _get_diagnoses(check) + + rows = _by_name(diagnoses, SQLServerConfigurationError.connection_failure.value) + assert len(rows) == 1 + assert rows[0]['result'] == Diagnosis.DIAGNOSIS_FAIL + assert "login failed for user" in rows[0]['diagnosis'] + + +def test_unsupported_version_fails(instance_minimal_defaults): + check = _check(instance_minimal_defaults, _happy_responses(major_version=10)) + + diagnoses = _get_diagnoses(check) + + row = _by_name(diagnoses, SQLServerConfigurationError.sqlserver_version_unsupported.value)[0] + assert row['result'] == Diagnosis.DIAGNOSIS_FAIL + assert "below the minimum supported version" in row['diagnosis'] + + +def test_performance_counter_permission_failure(instance_minimal_defaults): + responses = _happy_responses() + responses[1] = ("sys.dm_os_performance_counters", Exception("permission denied")) + check = _check(instance_minimal_defaults, responses) + + diagnoses = _get_diagnoses(check) + + row = _by_name(diagnoses, SQLServerConfigurationError.performance_counters_not_readable.value)[0] + assert row['result'] == Diagnosis.DIAGNOSIS_FAIL + assert "sys.dm_os_performance_counters" in row['diagnosis'] + assert "permission denied" in row['rawerror'] + + +def test_missing_view_server_state_fails(instance_minimal_defaults): + responses = _happy_responses() + responses[2] = (_permission("VIEW SERVER STATE"), [(0,)]) + check = _check(instance_minimal_defaults, responses) + + diagnoses = _get_diagnoses(check) + + row = _by_name(diagnoses, SQLServerConfigurationError.missing_view_server_state.value)[0] + assert row['result'] == Diagnosis.DIAGNOSIS_FAIL + assert "VIEW SERVER STATE" in row['diagnosis'] + + +def test_dbm_diagnostics_success(instance_minimal_defaults): + check = _check(instance_minimal_defaults, _happy_responses(), dbm=True) + + diagnoses = _get_diagnoses(check) + + _assert_result(diagnoses, SQLServerConfigurationError.missing_connect_any_database, Diagnosis.DIAGNOSIS_SUCCESS) + _assert_result(diagnoses, SQLServerConfigurationError.missing_view_any_definition, Diagnosis.DIAGNOSIS_SUCCESS) + + +def test_missing_connect_any_database_fails_for_dbm(instance_minimal_defaults): + check = _check(instance_minimal_defaults, _happy_responses(connect_any_database=False), dbm=True) + + diagnoses = _get_diagnoses(check) + + row = _by_name(diagnoses, SQLServerConfigurationError.missing_connect_any_database.value)[0] + assert row['result'] == Diagnosis.DIAGNOSIS_FAIL + assert "CONNECT ANY DATABASE" in row['diagnosis'] + + +def test_connect_any_database_warns_before_sqlserver_2014(instance_minimal_defaults): + check = _check(instance_minimal_defaults, _happy_responses(major_version=11), dbm=True) + + diagnoses = _get_diagnoses(check) + + row = _by_name(diagnoses, SQLServerConfigurationError.missing_connect_any_database.value)[0] + assert row['result'] == Diagnosis.DIAGNOSIS_WARNING + assert "unavailable before SQL Server 2014" in row['diagnosis'] + + +def test_missing_view_any_definition_fails_for_dbm(instance_minimal_defaults): + check = _check(instance_minimal_defaults, _happy_responses(view_any_definition=False), dbm=True) + + diagnoses = _get_diagnoses(check) + + row = _by_name(diagnoses, SQLServerConfigurationError.missing_view_any_definition.value)[0] + assert row['result'] == Diagnosis.DIAGNOSIS_FAIL + assert "VIEW ANY DEFINITION" in row['diagnosis'] + + +def test_missing_msdb_select_fails_when_enabled_feature_needs_msdb(instance_minimal_defaults): + responses = _happy_responses() + responses[6] = ("msdb.dbo.backupset", Exception("permission denied")) + check = _check(instance_minimal_defaults, responses) + + diagnoses = _get_diagnoses(check) + + row = _by_name(diagnoses, SQLServerConfigurationError.missing_msdb_select.value)[0] + assert row['result'] == Diagnosis.DIAGNOSIS_FAIL + assert "msdb.dbo.backupset" in row['diagnosis'] + assert "permission denied" in row['rawerror'] + + +def test_agent_jobs_adds_msdb_job_table_probes(instance_minimal_defaults): + check = _check( + instance_minimal_defaults, + _happy_responses(), + dbm=True, + agent_jobs={'enabled': True}, + ) + + diagnoses = _get_diagnoses(check) + + row = _by_name(diagnoses, SQLServerConfigurationError.missing_msdb_select.value)[0] + assert row['result'] == Diagnosis.DIAGNOSIS_SUCCESS + assert "msdb.dbo.sysjobs" in row['diagnosis'] + assert "msdb.dbo.sysjobhistory" in row['diagnosis'] + assert "msdb.dbo.sysjobactivity" in row['diagnosis'] + + +def test_azure_sql_database_skips_server_and_msdb_specific_dbm_diagnostics(instance_minimal_defaults): + check = _check(instance_minimal_defaults, _happy_responses(engine_edition=5), dbm=True) + + diagnoses = _get_diagnoses(check) + + assert not _by_name(diagnoses, SQLServerConfigurationError.missing_connect_any_database.value) + assert not _by_name(diagnoses, SQLServerConfigurationError.missing_view_any_definition.value) + assert not _by_name(diagnoses, SQLServerConfigurationError.missing_msdb_select.value) + + +def test_get_diagnoses_returns_json(instance_minimal_defaults): + check = _check(instance_minimal_defaults, _happy_responses()) + + parsed = json.loads(check.get_diagnoses()) + + assert any(d['name'] == SQLServerConfigurationError.connection_failure.value for d in parsed) From 288359bce9647353b8c9b9ae696399f643804a18 Mon Sep 17 00:00:00 2001 From: Joel Marcotte Date: Wed, 6 May 2026 15:17:43 -0400 Subject: [PATCH 2/3] Add SQL Server diagnostics changelog --- sqlserver/changelog.d/23621.added | 1 + 1 file changed, 1 insertion(+) create mode 100644 sqlserver/changelog.d/23621.added diff --git a/sqlserver/changelog.d/23621.added b/sqlserver/changelog.d/23621.added new file mode 100644 index 0000000000000..7e178cb5f4637 --- /dev/null +++ b/sqlserver/changelog.d/23621.added @@ -0,0 +1 @@ +Add explicit diagnostics for SQL Server setup validation. \ No newline at end of file From 9b8b4122a3908986675b5d28e34ab0e7cac66866 Mon Sep 17 00:00:00 2001 From: Joel Marcotte Date: Fri, 8 May 2026 10:26:50 -0400 Subject: [PATCH 3/3] Address Codex review on SQL Server diagnostics - skip server-level VIEW SERVER STATE probe on Azure SQL Database and validate via sys.dm_exec_sessions (VIEW DATABASE STATE) - probe msdb.dbo.syssessions when agent_jobs is enabled and not on RDS Co-Authored-By: Claude Opus 4.7 (1M context) --- .../datadog_checks/sqlserver/diagnose.py | 28 ++++-- sqlserver/tests/test_diagnose.py | 86 ++++++++++++++++++- 2 files changed, 106 insertions(+), 8 deletions(-) diff --git a/sqlserver/datadog_checks/sqlserver/diagnose.py b/sqlserver/datadog_checks/sqlserver/diagnose.py index 7b0467535426a..5bb06622bd262 100644 --- a/sqlserver/datadog_checks/sqlserver/diagnose.py +++ b/sqlserver/datadog_checks/sqlserver/diagnose.py @@ -58,8 +58,11 @@ class SQLServerConfigurationError(Enum): "docs_url": SQLSERVER_SETUP_DOCS_URL, }, SQLServerConfigurationError.missing_view_server_state: { - "description": "Verifies VIEW SERVER STATE for server state, query metrics, and query activity collection.", - "remediation": "Grant VIEW SERVER STATE to the Datadog login.", + "description": ( + "Verifies VIEW SERVER STATE (or VIEW DATABASE STATE on Azure SQL Database) for server state, " + "query metrics, and query activity collection." + ), + "remediation": "Grant VIEW SERVER STATE (or VIEW DATABASE STATE on Azure SQL Database) to the Datadog login.", "docs_url": SQLSERVER_SETUP_DOCS_URL, }, SQLServerConfigurationError.missing_connect_any_database: { @@ -94,6 +97,7 @@ def __init__(self, check: Any) -> None: self._failed: set[str] = set() self._major_version: int | None = None self._engine_edition: int | None = None + self._is_rds: bool | None = None def register(self) -> None: """Register the diagnostic entry point with the check's Diagnosis object.""" @@ -108,6 +112,7 @@ def _run(self) -> None: self._failed = set() self._major_version = None self._engine_edition = None + self._is_rds = None try: with self._check.connection.open_managed_default_connection(KEY_PREFIX): @@ -121,6 +126,7 @@ def _run(self) -> None: if self._needs_view_any_definition(): self._diagnose_view_any_definition(cursor) if self._needs_msdb_select(): + self._detect_rds(cursor) self._diagnose_msdb_select(cursor) except SQLConnectionError as e: code = SQLServerConfigurationError.connection_failure @@ -191,22 +197,24 @@ def _diagnose_performance_counters(self, cursor: Any) -> None: def _diagnose_view_server_state(self, cursor: Any) -> None: code = SQLServerConfigurationError.missing_view_server_state + azure = self._is_azure_sql_database() + permission_label = "VIEW DATABASE STATE" if azure else "VIEW SERVER STATE" try: - if not _has_server_permission(cursor, "VIEW SERVER STATE"): + if not azure and not _has_server_permission(cursor, "VIEW SERVER STATE"): self._fail(code, diagnosis="The Datadog login does not have VIEW SERVER STATE.") return _execute_read_probe(cursor, "SELECT TOP 1 session_id FROM sys.dm_exec_sessions") except Exception as e: self._fail( code, - diagnosis="Unable to validate VIEW SERVER STATE with sys.dm_exec_sessions: {}".format(e), + diagnosis="Unable to validate {} with sys.dm_exec_sessions: {}".format(permission_label, e), rawerror=str(e), ) return self._check.diagnosis.success( name=code.value, - diagnosis="VIEW SERVER STATE is granted and sys.dm_exec_sessions is readable.", + diagnosis="{} is granted and sys.dm_exec_sessions is readable.".format(permission_label), category=CATEGORY_SQLSERVER, ) @@ -334,6 +342,8 @@ def _msdb_probe_queries(self) -> list[tuple[str, str]]: ("msdb.dbo.sysjobactivity", "SELECT TOP 1 1 FROM msdb.dbo.sysjobactivity"), ] ) + if not self._is_rds: + probes.append(("msdb.dbo.syssessions", "SELECT TOP 1 1 FROM msdb.dbo.syssessions")) return probes def _agent_jobs_enabled(self) -> bool: @@ -344,6 +354,14 @@ def _is_azure_sql_database(self) -> bool: engine_edition = self._engine_edition or self._check.static_info_cache.get(STATIC_INFO_ENGINE_EDITION) return engine_edition == ENGINE_EDITION_SQL_DATABASE + def _detect_rds(self, cursor: Any) -> None: + try: + row = _fetchone(cursor, "SELECT name FROM sys.databases WHERE name = 'rdsadmin'") + except Exception: + self._is_rds = False + return + self._is_rds = bool(row) + def _fail(self, code: SQLServerConfigurationError, diagnosis: str, rawerror: str | None = None) -> None: self._check.diagnosis.fail( name=code.value, diff --git a/sqlserver/tests/test_diagnose.py b/sqlserver/tests/test_diagnose.py index 61a266c32d0e8..25c8ed88db78b 100644 --- a/sqlserver/tests/test_diagnose.py +++ b/sqlserver/tests/test_diagnose.py @@ -100,7 +100,14 @@ def _check(instance_minimal_defaults, responses, **instance_overrides): return check -def _happy_responses(*, major_version=16, engine_edition=2, connect_any_database=True, view_any_definition=True): +def _happy_responses( + *, + major_version=16, + engine_edition=2, + connect_any_database=True, + view_any_definition=True, + is_rds=False, +): return [ ("SERVERPROPERTY('ProductMajorVersion')", [(major_version, engine_edition)]), ("sys.dm_os_performance_counters", [(1,)]), @@ -108,10 +115,12 @@ def _happy_responses(*, major_version=16, engine_edition=2, connect_any_database ("sys.dm_exec_sessions", [(1,)]), (_permission("CONNECT ANY DATABASE"), [(1 if connect_any_database else 0,)]), (_permission("VIEW ANY DEFINITION"), [(1 if view_any_definition else 0,)]), + ("'rdsadmin'", [("rdsadmin",)] if is_rds else []), ("msdb.dbo.backupset", []), ("msdb.dbo.sysjobs", []), ("msdb.dbo.sysjobhistory", []), ("msdb.dbo.sysjobactivity", []), + ("msdb.dbo.syssessions", []), ] @@ -121,6 +130,18 @@ def _assert_result(diagnoses, code, result): assert rows[0]['result'] == result, rows +def _replace_response(responses, matcher_key, new_result): + """Return responses with the entry whose matcher matches matcher_key replaced with new_result. + + String matcher_keys compare equal to string matchers; callable matcher_keys compare via __qualname__. + """ + target = getattr(matcher_key, '__qualname__', matcher_key) + return [ + (matcher, new_result) if getattr(matcher, '__qualname__', matcher) == target else (matcher, result) + for matcher, result in responses + ] + + def test_register_is_idempotent(instance_minimal_defaults): check = SQLServer(CHECK_NAME, {}, [instance_minimal_defaults]) registered = list(check.diagnosis._diagnostics) @@ -236,8 +257,7 @@ def test_missing_view_any_definition_fails_for_dbm(instance_minimal_defaults): def test_missing_msdb_select_fails_when_enabled_feature_needs_msdb(instance_minimal_defaults): - responses = _happy_responses() - responses[6] = ("msdb.dbo.backupset", Exception("permission denied")) + responses = _replace_response(_happy_responses(), "msdb.dbo.backupset", Exception("permission denied")) check = _check(instance_minimal_defaults, responses) diagnoses = _get_diagnoses(check) @@ -263,6 +283,34 @@ def test_agent_jobs_adds_msdb_job_table_probes(instance_minimal_defaults): assert "msdb.dbo.sysjobs" in row['diagnosis'] assert "msdb.dbo.sysjobhistory" in row['diagnosis'] assert "msdb.dbo.sysjobactivity" in row['diagnosis'] + assert "msdb.dbo.syssessions" in row['diagnosis'] + + +def test_agent_jobs_skips_syssessions_on_rds(instance_minimal_defaults): + check = _check( + instance_minimal_defaults, + _happy_responses(is_rds=True), + dbm=True, + agent_jobs={'enabled': True}, + ) + + diagnoses = _get_diagnoses(check) + + row = _by_name(diagnoses, SQLServerConfigurationError.missing_msdb_select.value)[0] + assert row['result'] == Diagnosis.DIAGNOSIS_SUCCESS + assert "msdb.dbo.syssessions" not in row['diagnosis'] + + +def test_agent_jobs_missing_syssessions_select_fails(instance_minimal_defaults): + responses = _replace_response(_happy_responses(), "msdb.dbo.syssessions", Exception("permission denied")) + check = _check(instance_minimal_defaults, responses, dbm=True, agent_jobs={'enabled': True}) + + diagnoses = _get_diagnoses(check) + + row = _by_name(diagnoses, SQLServerConfigurationError.missing_msdb_select.value)[0] + assert row['result'] == Diagnosis.DIAGNOSIS_FAIL + assert "msdb.dbo.syssessions" in row['diagnosis'] + assert "permission denied" in row['rawerror'] def test_azure_sql_database_skips_server_and_msdb_specific_dbm_diagnostics(instance_minimal_defaults): @@ -275,6 +323,38 @@ def test_azure_sql_database_skips_server_and_msdb_specific_dbm_diagnostics(insta assert not _by_name(diagnoses, SQLServerConfigurationError.missing_msdb_select.value) +def test_azure_sql_database_uses_view_database_state_probe(instance_minimal_defaults): + responses = _replace_response( + _happy_responses(engine_edition=5), + _permission("VIEW SERVER STATE"), + [(0,)], + ) + check = _check(instance_minimal_defaults, responses) + + diagnoses = _get_diagnoses(check) + + row = _by_name(diagnoses, SQLServerConfigurationError.missing_view_server_state.value)[0] + assert row['result'] == Diagnosis.DIAGNOSIS_SUCCESS + assert "VIEW DATABASE STATE" in row['diagnosis'] + + +def test_azure_sql_database_view_database_state_failure(instance_minimal_defaults): + responses = _replace_response( + _happy_responses(engine_edition=5), + "sys.dm_exec_sessions", + Exception("permission denied"), + ) + check = _check(instance_minimal_defaults, responses) + + diagnoses = _get_diagnoses(check) + + row = _by_name(diagnoses, SQLServerConfigurationError.missing_view_server_state.value)[0] + assert row['result'] == Diagnosis.DIAGNOSIS_FAIL + assert "VIEW DATABASE STATE" in row['diagnosis'] + assert "VIEW SERVER STATE" not in row['diagnosis'] + assert "permission denied" in row['rawerror'] + + def test_get_diagnoses_returns_json(instance_minimal_defaults): check = _check(instance_minimal_defaults, _happy_responses())