Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
f52a367
datadog_checks_base: fix is_prometheus_exposition() to accept NaN and…
vitkyrka May 4, 2026
44af6a6
n8n, kuma, pulsar: add OpenMetrics auto-discovery support
vitkyrka May 4, 2026
fbab85f
26 integrations: add OpenMetrics auto-discovery support
vitkyrka May 4, 2026
7131817
quarkus: add e2e discovery test
vitkyrka May 5, 2026
570103f
temporal: add e2e discovery test
vitkyrka May 5, 2026
3c773f4
ray: add e2e discovery test
vitkyrka May 5, 2026
e8588da
celery: add e2e discovery test
vitkyrka May 5, 2026
e6e8de1
revert: remove auto_conf_discovery from 21 untestable integrations
vitkyrka May 5, 2026
44051f2
quarkus: revert discovery support
vitkyrka May 5, 2026
f63d5c0
boundary, cockroachdb, kong: add OpenMetrics auto-discovery support w…
vitkyrka May 5, 2026
9c67eb1
celery: revert discovery support
vitkyrka May 5, 2026
98b0cb7
demo
vitkyrka May 5, 2026
6a4fba3
n8n: fix missing metrics by setting raw_metric_prefix to n8n_
vitkyrka May 5, 2026
18ef0bd
docs: add advanced auto-config KrakenD experiment design spec
vitkyrka May 5, 2026
162453f
docs: add advanced auto-config KrakenD experiment implementation plan
vitkyrka May 5, 2026
d2e450c
krakend: add auto_conf_discovery.yaml for advanced auto-config experi…
vitkyrka May 5, 2026
686e401
krakend: fix auto_conf_discovery.yaml port hint to 9090
vitkyrka May 5, 2026
9959948
docs: add advanced auto-config Python discover() design spec
vitkyrka May 5, 2026
9613420
docs: add Plan A — Python discovery library implementation plan
vitkyrka May 5, 2026
d83464e
datadog_checks_base: add Service and Port dataclasses for discovery
vitkyrka May 5, 2026
4875d23
datadog_checks_base: add candidate_ports() for discovery probe ordering
vitkyrka May 5, 2026
51b2fa9
datadog_checks_base: add verifier predicates for discovery probes
vitkyrka May 5, 2026
3a2c682
datadog_checks_base: tighten verifier exception catch and add type an…
vitkyrka May 5, 2026
02a5d61
datadog_checks_base: add http_probe() for discovery
vitkyrka May 5, 2026
3335f52
datadog_checks_base: add tcp_probe() for discovery
vitkyrka May 5, 2026
dd256cb
datadog_checks_base: export discovery probe helpers
vitkyrka May 6, 2026
44c2c4f
datadog_checks_base: changelog entry for discovery probe helpers
vitkyrka May 6, 2026
ce5ea81
datadog_checks_base: fix ruff F821 and apply formatter to discovery h…
vitkyrka May 6, 2026
1715067
datadog_checks_base: rename verify= to verifier= on probe helpers
vitkyrka May 6, 2026
a5b19aa
datadog_checks_base: add discover() rtloader bridge helper
vitkyrka May 6, 2026
fbea899
krakend: migrate to Python discover() classmethod
vitkyrka May 6, 2026
de98ae4
datadog_checks_base, krakend: add changelog entries for discover() br…
vitkyrka May 6, 2026
d8b93f8
krakend: drop unjustified 9090 port hint from discover()
vitkyrka May 6, 2026
794ecb2
krakend: add unit tests for discover()
vitkyrka May 6, 2026
d21d89d
krakend: add e2e discovery test
vitkyrka May 6, 2026
6b49375
docs: add Plan C — demo integrations for discover() implementation plan
vitkyrka May 6, 2026
cc8c08a
openmetrics, krakend: move discover() to OpenMetricsBaseCheckV2
vitkyrka May 4, 2026
5298b2b
datadog_checks_base: strip discover() classmethod for alt-PoC baseline
vitkyrka May 6, 2026
02e0d41
krakend: implement trial-mode discovery via check() override
vitkyrka May 6, 2026
216bd9f
test: drop orphan tests for stripped OpenMetricsBaseCheckV2.discover()
vitkyrka May 6, 2026
1cd7800
openmetrics_base: move trial-mode discovery handling into the base class
vitkyrka May 6, 2026
8820aa0
openmetrics_base + integrations: adapt 8 integrations to alt-PoC disc…
vitkyrka May 6, 2026
1070b1e
Revert candidate_ports hint-anyway change
vitkyrka May 6, 2026
3286778
Revert http_probe default timeout bump (0.5s remains)
vitkyrka May 6, 2026
2e07c65
Revert krakend DISCOVERY_PORT_HINTS = [9090]
vitkyrka May 6, 2026
bedfb81
Revert n8n DISCOVERY_PORT_HINTS = [5678]
vitkyrka May 6, 2026
dcca660
config-discovery: AgentCheck-level trial via dynamic proxy class
vitkyrka May 6, 2026
bd37e4b
config-discovery: fix ray e2e by 3 unrelated tweaks
vitkyrka May 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions boundary/changelog.d/23588.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add OpenMetrics auto-discovery support.
8 changes: 8 additions & 0 deletions boundary/datadog_checks/boundary/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,17 @@
class BoundaryCheck(OpenMetricsBaseCheckV2, ConfigMixin):
__NAMESPACE__ = 'boundary'
DEFAULT_METRIC_LIMIT = 0
DISCOVERY_PORT_HINTS = [9203]

SERVICE_CHECK_CONTROLLER_HEALTH = 'controller.health'

@classmethod
def generate_configs(cls, service_dict):
for cfg in super().generate_configs(service_dict):
base_url = cfg["openmetrics_endpoint"].rsplit('/', 1)[0]
cfg["health_endpoint"] = f"{base_url}/health"
yield cfg

def check(self, _):
try:
response = self.http.get(self.config.health_endpoint)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ad_identifiers:
- hashicorp/boundary
discovery: {}
init_config:
instances: []
33 changes: 32 additions & 1 deletion boundary/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,49 @@
# (C) Datadog, Inc. 2022-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
from pathlib import Path

import pytest

from datadog_checks.boundary import BoundaryCheck
from datadog_checks.dev import docker_run

from . import common

INTEGRATIONS_CORE_ROOT = Path(__file__).resolve().parents[2]
BOUNDARY_AUTOCONF = Path(__file__).parent.parent / "datadog_checks" / "boundary" / "data" / "auto_conf_discovery.yaml"
DISCOVERY_HELPERS_DIR = (
INTEGRATIONS_CORE_ROOT / "datadog_checks_base" / "datadog_checks" / "base" / "utils" / "discovery"
)
OPENMETRICS_V2_BASE_PY = (
INTEGRATIONS_CORE_ROOT
/ "datadog_checks_base"
/ "datadog_checks"
/ "base"
/ "checks"
/ "openmetrics"
/ "v2"
/ "base.py"
)
AGENTCHECK_BASE_PY = INTEGRATIONS_CORE_ROOT / "datadog_checks_base" / "datadog_checks" / "base" / "checks" / "base.py"
SITE_PACKAGES = "/opt/datadog-agent/embedded/lib/python3.13/site-packages"


@pytest.fixture(scope='session')
def dd_environment(instance):
with docker_run(common.COMPOSE_FILE, endpoints=[common.HEALTH_ENDPOINT, common.METRIC_ENDPOINT], mount_logs=True):
yield instance
yield (
instance,
{
'docker_volumes': [
f"{BOUNDARY_AUTOCONF}:/etc/datadog-agent/conf.d/boundary.d/auto_conf_discovery.yaml:ro",
f"{DISCOVERY_HELPERS_DIR}:{SITE_PACKAGES}/datadog_checks/base/utils/discovery:ro",
f"{OPENMETRICS_V2_BASE_PY}:{SITE_PACKAGES}/datadog_checks/base/checks/openmetrics/v2/base.py:ro",
f"{AGENTCHECK_BASE_PY}:{SITE_PACKAGES}/datadog_checks/base/checks/base.py:ro",
"/var/run/docker.sock:/var/run/docker.sock:ro",
],
},
)


@pytest.fixture(scope='session')
Expand Down
10 changes: 10 additions & 0 deletions boundary/tests/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@
pytestmark = [pytest.mark.e2e]


def test_e2e_discovery(dd_agent_check):
aggregator = dd_agent_check(
{"init_config": {}, "instances": []},
rate=True,
discovery_min_instances=1,
discovery_timeout=30,
)
aggregator.assert_service_check('boundary.openmetrics.health', ServiceCheck.OK)


def test(dd_agent_check, instance):
aggregator = dd_agent_check(instance, rate=True)
custom_tags = instance['tags']
Expand Down
8 changes: 1 addition & 7 deletions celery/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@

@pytest.fixture(scope='session')
def dd_environment():
"""
Start docker-compose environment before running tests and tear it down afterward.
"""
compose_file = common.COMPOSE_FILE

with docker_run(
Expand All @@ -25,12 +22,9 @@ def dd_environment():
CheckEndpoints(common.MOCKED_INSTANCE['openmetrics_endpoint']),
],
):
yield common.MOCKED_INSTANCE, common.E2E_METADATA
yield common.MOCKED_INSTANCE


@pytest.fixture(scope='session')
def instance():
"""
Return a default instance used for the integration.
"""
return copy.deepcopy(common.MOCKED_INSTANCE)
2 changes: 1 addition & 1 deletion celery/tests/docker/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ services:
networks:
- network1
restart: always
command: celery -A tasks flower --port=5555
command: celery --broker=redis://:devops-best-friend@redis-standalone:6379/0 flower --port=5555

networks:
network1:
Expand Down
1 change: 1 addition & 0 deletions cockroachdb/changelog.d/23588.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add OpenMetrics auto-discovery support.
2 changes: 2 additions & 0 deletions cockroachdb/datadog_checks/cockroachdb/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ class CockroachdbCheckV2(OpenMetricsBaseCheckV2, ConfigMixin):
__NAMESPACE__ = 'cockroachdb'

DEFAULT_METRIC_LIMIT = 0
DISCOVERY_PORT_HINTS = [8080]
DISCOVERY_METRICS_PATH = '/_status/vars'

def __init__(self, name, init_config, instances):
super().__init__(name, init_config, instances)
Expand Down
4 changes: 3 additions & 1 deletion cockroachdb/datadog_checks/cockroachdb/cockroachdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ class CockroachdbCheck(OpenMetricsBaseCheck):
def __new__(cls, name, init_config, instances):
instance = instances[0]

if 'openmetrics_endpoint' in instance:
# Trial-mode (config-discovery) instances and explicit openmetrics
# configurations both go through the V2 OpenMetrics-based check.
if 'openmetrics_endpoint' in instance or '__discovery_service__' in instance:
return CockroachdbCheckV2(name, init_config, instances)
else:
return super(CockroachdbCheck, cls).__new__(cls)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ad_identifiers:
- cockroachdb/cockroach
discovery: {}
init_config:
instances: []
34 changes: 33 additions & 1 deletion cockroachdb/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
import os
from pathlib import Path

import pytest
from packaging.version import parse as parse_version
Expand All @@ -11,6 +12,26 @@

from .common import COCKROACHDB_VERSION, HERE, HOST, PORT

INTEGRATIONS_CORE_ROOT = Path(__file__).resolve().parents[2]
COCKROACHDB_AUTOCONF = (
Path(__file__).parent.parent / "datadog_checks" / "cockroachdb" / "data" / "auto_conf_discovery.yaml"
)
DISCOVERY_HELPERS_DIR = (
INTEGRATIONS_CORE_ROOT / "datadog_checks_base" / "datadog_checks" / "base" / "utils" / "discovery"
)
OPENMETRICS_V2_BASE_PY = (
INTEGRATIONS_CORE_ROOT
/ "datadog_checks_base"
/ "datadog_checks"
/ "base"
/ "checks"
/ "openmetrics"
/ "v2"
/ "base.py"
)
AGENTCHECK_BASE_PY = INTEGRATIONS_CORE_ROOT / "datadog_checks_base" / "datadog_checks" / "base" / "checks" / "base.py"
SITE_PACKAGES = "/opt/datadog-agent/embedded/lib/python3.13/site-packages"


@pytest.fixture(scope='session')
def dd_environment(instance):
Expand All @@ -24,7 +45,18 @@ def dd_environment(instance):
endpoints=instance['openmetrics_endpoint'],
conditions=conditions,
):
yield instance
yield (
instance,
{
'docker_volumes': [
f"{COCKROACHDB_AUTOCONF}:/etc/datadog-agent/conf.d/cockroachdb.d/auto_conf_discovery.yaml:ro",
f"{DISCOVERY_HELPERS_DIR}:{SITE_PACKAGES}/datadog_checks/base/utils/discovery:ro",
f"{OPENMETRICS_V2_BASE_PY}:{SITE_PACKAGES}/datadog_checks/base/checks/openmetrics/v2/base.py:ro",
f"{AGENTCHECK_BASE_PY}:{SITE_PACKAGES}/datadog_checks/base/checks/base.py:ro",
"/var/run/docker.sock:/var/run/docker.sock:ro",
],
},
)


@pytest.fixture(scope='session')
Expand Down
14 changes: 14 additions & 0 deletions cockroachdb/tests/test_e2e.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,24 @@
# (C) Datadog, Inc. 2021-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
import pytest

from datadog_checks.base.constants import ServiceCheck

from .common import assert_metrics


@pytest.mark.e2e
def test_e2e_discovery(dd_agent_check):
aggregator = dd_agent_check(
{"init_config": {}, "instances": []},
rate=True,
discovery_min_instances=1,
discovery_timeout=30,
)
aggregator.assert_service_check('cockroachdb.openmetrics.health', ServiceCheck.OK)


def test_metrics(dd_agent_check, instance):
aggregator = dd_agent_check(instance, rate=True)
assert_metrics(aggregator)
1 change: 1 addition & 0 deletions datadog_checks_base/changelog.d/23547.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add discover() classmethod to OpenMetricsBaseCheckV2 for generic OpenMetrics port scanning.
1 change: 1 addition & 0 deletions datadog_checks_base/changelog.d/23572.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add Service/Port types and probe helpers (http_probe, tcp_probe, candidate_ports, verifier predicates) under datadog_checks.base.utils.discovery for advanced auto-config.
1 change: 1 addition & 0 deletions datadog_checks_base/changelog.d/23576.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add discover() rtloader bridge helper for advanced auto-config.
1 change: 1 addition & 0 deletions datadog_checks_base/changelog.d/23581.fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix is_prometheus_exposition() to accept NaN and Inf metric values in discovery probes.
103 changes: 103 additions & 0 deletions datadog_checks_base/datadog_checks/base/checks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,37 @@ def __init_subclass__(cls, *args, **kwargs):
except Exception:
return cls

def __new__(cls, *args, **kwargs):
# Trial-mode dispatch: when AD schedules a check with a synthetic
# __discovery_service__ instance, route construction through a
# _TrialModeProxy that defers real check work until a candidate
# config is found. We do NOT subclass cls (target_cls) — rtloader's
# subclass detector at three.cpp:727 skips any AgentCheck subclass
# that itself has subclasses, so adding a subclass of target_cls
# would break the loader for subsequent instantiations of the same
# check. Instead, target_cls is stashed on the proxy as an
# attribute and looked up at runtime.
if cls is not _TrialModeProxy:
instances = _extract_instances(args, kwargs)
if instances and instances[0].get("__discovery_service__") is not None:
proxy = super().__new__(_TrialModeProxy)
proxy._target_cls = cls
# Python does not call __init__ when __new__ returns an
# instance whose class is not a subclass of cls. _TrialModeProxy
# is not a subclass of cls (we can't subclass cls without
# tripping rtloader's "no subclasses" rule), so call __init__
# explicitly here.
proxy.__init__(*args, **kwargs)
return proxy
return super().__new__(cls)

@classmethod
def generate_configs(cls, service_dict):
"""Yield candidate complete instance dicts to try when this class is
scheduled in trial-mode (AD config discovery). Subclasses opting
into config discovery override this. Default: not supported."""
raise NotImplementedError(f"{cls.__name__} does not support config discovery; override generate_configs")

def __init__(self, *args, **kwargs):
# type: (*Any, **Any) -> None
"""
Expand Down Expand Up @@ -1611,3 +1642,75 @@ def load_config(yaml_str: str) -> Any:
raise ValueError(f'Failed to load config: {stderr.decode("utf-8", errors="replace")}')

return _parse_ast_config(stdout.strip().decode('utf-8'))


def _extract_instances(args, kwargs):
"""Pull the `instances` list out of the AgentCheck-style positional/kwarg args."""
if 'instances' in kwargs:
return kwargs['instances']
if len(args) > 3:
return args[3] # old-style: (name, init_config, agentConfig, instances)
if len(args) > 2 and isinstance(args[2], (list, tuple)):
return args[2] # new-style: (name, init_config, instances)
return None


class _TrialModeProxy(AgentCheck):
"""Proxy check that defers real work until trial-mode (config-discovery)
resolves. ``AgentCheck.__new__`` builds an instance of this class when it
sees a ``__discovery_service__`` payload, stashing the original target
class on ``self._target_cls``.

On the first ``run()``, the proxy iterates
``self._target_cls.generate_configs(service)``, constructs a fresh
target_cls instance per candidate (going through the full normal
``__init__`` + ``run_check_initializations`` + ``check`` lifecycle),
runs it, and commits the first whose ``run()`` returns no error
report. Subsequent runs delegate to that winning instance.

The proxy is *not* a subclass of ``target_cls`` — see the rationale in
``AgentCheck.__new__`` (rtloader's subclass detector skips classes that
have subclasses, so introducing one would break check loading).
"""

def __init__(self, *args, **kwargs):
# _target_cls was set by AgentCheck.__new__ before __init__.
AgentCheck.__init__(self, *args, **kwargs)
self._service_dict = self.instance["__discovery_service__"]
self._winner = None

def run(self):
if self._winner is not None:
return self._winner.run()
try:
self._run_trial()
except Exception as e:
return json.encode(
[
{
'message': self.sanitize(str(e)),
'traceback': self.sanitize(traceback.format_exc()),
}
]
)
return ''

def _run_trial(self):
last_error = None
tried = 0
for candidate in self._target_cls.generate_configs(self._service_dict):
tried += 1
inst = self._target_cls(self.name, self.init_config, [candidate])
# rtloader sets check_id and provider on the agent-visible check
# after construction; mirror them onto the candidate so its
# metric submissions key off the same check_id as the proxy.
inst.check_id = self.check_id
inst.provider = self.provider
error_report = inst.run()
if not error_report:
self._winner = inst
return
last_error = error_report
if tried == 0:
raise ConfigurationError("config-discovery: generate_configs() yielded no candidates")
raise ConfigurationError(f"config-discovery: no candidate accepted by check() ({last_error})")
Loading
Loading