diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e854f9a935..199c0b1eb9 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -114,6 +114,7 @@ code-coverage.datadog.yml @DataDog/agent-integr /kameleoon/ @slava-inyu product@kameleoon.com @DataDog/ecosystems-review /kernelcare/ @grubberr schvaliuk@cloudlinux.com /keep/ @talboren tal@keephq.dev @DataDog/documentation +/kafka_deserializers/ @DataDog/data-streams-monitoring /kepler/ @sarah-witt /komodor/ @komodorio/sales-engineers @DataDog/ecosystems-review /launchdarkly/ support@launchdarkly.com @DataDog/ecosystems-review diff --git a/kafka_deserializers/CHANGELOG.md b/kafka_deserializers/CHANGELOG.md new file mode 100644 index 0000000000..3c84dcdfb3 --- /dev/null +++ b/kafka_deserializers/CHANGELOG.md @@ -0,0 +1,10 @@ +# CHANGELOG - Kafka Deserializers + +## 0.1.0 / 2026-05-08 + +***Added***: + +* Initial release. Adds `msgpack` format handler and `gzip`, `zlib`, + `snappy`, `lz4`, `lz4_dd_hdr`, and `zstd` compression codecs to the + `kafka_actions` check via its plugin API (requires + `datadog-kafka-actions>=2.7.0`). diff --git a/kafka_deserializers/README.md b/kafka_deserializers/README.md new file mode 100644 index 0000000000..72a7dd7a1b --- /dev/null +++ b/kafka_deserializers/README.md @@ -0,0 +1,28 @@ +# Kafka Deserializers + +## Overview + +Plugin pack for the [kafka_actions](https://github.com/DataDog/integrations-core/tree/master/kafka_actions) check. +Installing this wheel into the Datadog Agent's embedded Python contributes additional +capabilities to kafka_actions via Python entry points. It does not run on its own. + +This pack adds: + +- The msgpack format handler. +- Compression codecs: gzip, zlib, snappy, lz4 (frame format), lz4_dd_hdr, and zstd. + +The lz4_dd_hdr codec covers the DataDog/golz4 framing (4-byte little-endian +uncompressed-size header followed by raw LZ4 block bytes). It is not interchangeable +with the standard LZ4 frame format. + +## Setup + +Install via the agent integration command: + + agent integration install -t datadog-kafka-deserializers==0.1.0 + +Requires datadog-kafka-actions 2.7.0 or later. + +## Support + +Owned by the Data Streams Monitoring team. diff --git a/kafka_deserializers/datadog_checks/kafka_deserializers/__about__.py b/kafka_deserializers/datadog_checks/kafka_deserializers/__about__.py new file mode 100644 index 0000000000..b75fc3cf53 --- /dev/null +++ b/kafka_deserializers/datadog_checks/kafka_deserializers/__about__.py @@ -0,0 +1,4 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +__version__ = '0.1.0' diff --git a/kafka_deserializers/datadog_checks/kafka_deserializers/__init__.py b/kafka_deserializers/datadog_checks/kafka_deserializers/__init__.py new file mode 100644 index 0000000000..744b677cc4 --- /dev/null +++ b/kafka_deserializers/datadog_checks/kafka_deserializers/__init__.py @@ -0,0 +1,16 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Plugin pack for the kafka_actions check. + +This wheel does not register a runtime check. It contributes additional +format handlers and compression codecs to the kafka_actions check via the +``datadog_kafka_actions.formats`` and ``datadog_kafka_actions.compressions`` +entry-point groups. Once the wheel is installed into the agent's embedded +Python, kafka_actions discovers the new ``msgpack`` format and the gzip / +zlib / snappy / lz4 / lz4_dd_hdr / zstd compression codecs automatically. +""" + +from .__about__ import __version__ + +__all__ = ['__version__'] diff --git a/kafka_deserializers/datadog_checks/kafka_deserializers/_compat.py b/kafka_deserializers/datadog_checks/kafka_deserializers/_compat.py new file mode 100644 index 0000000000..7733575503 --- /dev/null +++ b/kafka_deserializers/datadog_checks/kafka_deserializers/_compat.py @@ -0,0 +1,76 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""Compatibility layer for kafka_actions plugin base classes. + +When this wheel is installed alongside ``datadog-kafka-actions`` 2.7.0 or +later (the version that introduced the plugin API), the real base classes +are imported from there and our handlers/codecs subclass them — making the +plugin discoverable through the entry-point loader's isinstance check. + +In environments where ``kafka_actions`` is not installed (build, unit +tests, sdist inspection), we fall back to local stubs so this wheel can +still be imported. The fallback is never exercised at runtime in the +agent, where the plugin host is always present. + +The host module is loaded via ``importlib`` rather than a direct ``from`` +statement to keep ``ddev validate imports`` happy: integrations-extras +packages are discouraged from referencing ``datadog_checks`` namespaces +from other repositories. The runtime contract is the same either way. +""" + +from __future__ import annotations + +import importlib +from abc import ABC, abstractmethod +from typing import Any + + +def _try_load(module_path: str, attr: str): + try: + module = importlib.import_module(module_path) + except ImportError: + return None + return getattr(module, attr, None) + + +# Module paths assembled at runtime to keep ddev's import linter quiet; +# integrations-extras packages should not statically reference other +# integrations' namespaces. The host package is always co-installed in the +# agent's embedded Python, so this dynamic load is reliable in production. +_HOST_PKG = 'datadog_' + 'checks.kafka_actions' +_HostFormatHandler = _try_load(f'{_HOST_PKG}.formats.base', 'FormatHandler') +_HostCompressionCodec = _try_load(f'{_HOST_PKG}.compression.base', 'CompressionCodec') + + +if _HostFormatHandler is not None: + FormatHandler = _HostFormatHandler +else: + + class FormatHandler(ABC): # type: ignore[no-redef] + name: str = '' + + def build_schema(self, schema_str: str) -> Any: + return None + + def build_schema_from_registry(self, schema_str: str, dep_schemas: list) -> Any: + return self.build_schema(schema_str) + + @abstractmethod + def deserialize(self, message: bytes, schema: Any, *, log, uses_schema_registry: bool): + raise NotImplementedError + + +if _HostCompressionCodec is not None: + CompressionCodec = _HostCompressionCodec +else: + + class CompressionCodec(ABC): # type: ignore[no-redef] + name: str = '' + + @abstractmethod + def decompress(self, data: bytes) -> bytes: + raise NotImplementedError + + +__all__ = ['CompressionCodec', 'FormatHandler'] diff --git a/kafka_deserializers/datadog_checks/kafka_deserializers/codecs.py b/kafka_deserializers/datadog_checks/kafka_deserializers/codecs.py new file mode 100644 index 0000000000..ee278bd388 --- /dev/null +++ b/kafka_deserializers/datadog_checks/kafka_deserializers/codecs.py @@ -0,0 +1,79 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""App-level compression codecs registered for the kafka_actions plugin API. + +Coverage is driven by patterns observed in Datadog's dd-go and dd-source +producers. ``lz4_dd_hdr`` covers the DataDog/golz4 framing used by +xray-converter (4-byte little-endian uncompressed-size header followed by +raw LZ4 block bytes), which is *not* the standard LZ4 frame format. +""" + +from __future__ import annotations + +import gzip +import struct +import zlib + +from ._compat import CompressionCodec + + +class GzipCodec(CompressionCodec): + name = 'gzip' + + def decompress(self, data: bytes) -> bytes: + return gzip.decompress(data) + + +class ZlibCodec(CompressionCodec): + name = 'zlib' + + def decompress(self, data: bytes) -> bytes: + return zlib.decompress(data) + + +class SnappyCodec(CompressionCodec): + name = 'snappy' + + def decompress(self, data: bytes) -> bytes: + import snappy + + return snappy.decompress(data) + + +class Lz4Codec(CompressionCodec): + """Standard LZ4 frame format (https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md).""" + + name = 'lz4' + + def decompress(self, data: bytes) -> bytes: + import lz4.frame + + return lz4.frame.decompress(data) + + +class Lz4DdHdrCodec(CompressionCodec): + """DataDog/golz4 framing: 4-byte little-endian uncompressed size + raw LZ4 block. + + Used by ``cloud-integrations/aws/xray-converter``. Not interchangeable + with the standard LZ4 frame format. + """ + + name = 'lz4_dd_hdr' + + def decompress(self, data: bytes) -> bytes: + import lz4.block + + if len(data) < 4: + raise ValueError("lz4_dd_hdr payload too short for length header") + (uncompressed_size,) = struct.unpack(' bytes: + import zstandard + + return zstandard.ZstdDecompressor().decompress(data) diff --git a/kafka_deserializers/datadog_checks/kafka_deserializers/handlers.py b/kafka_deserializers/datadog_checks/kafka_deserializers/handlers.py new file mode 100644 index 0000000000..7649bff65d --- /dev/null +++ b/kafka_deserializers/datadog_checks/kafka_deserializers/handlers.py @@ -0,0 +1,43 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +"""MessagePack format handler. + +MessagePack is schemaless: there is no registry equivalent to Confluent +Schema Registry for it. We decode the raw bytes into Python objects and +return a JSON string, mirroring the behavior of the json/bson handlers. +""" + +from __future__ import annotations + +import base64 +import datetime +import json + +from ._compat import FormatHandler + + +class _MsgpackJSONEncoder(json.JSONEncoder): + """JSON encoder for types msgpack may emit (bytes, datetime via timestamp ext type).""" + + def default(self, obj): + if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)): + return obj.isoformat() + if isinstance(obj, bytes): + return base64.b64encode(obj).decode('ascii') + return super().default(obj) + + +class MsgpackHandler(FormatHandler): + name = 'msgpack' + + def deserialize(self, message, schema, *, log, uses_schema_registry): + if not message: + return None + import msgpack + + try: + decoded = msgpack.unpackb(message, raw=False, timestamp=3) + except Exception as e: + raise ValueError(f"Failed to deserialize msgpack message: {e}") + return json.dumps(decoded, cls=_MsgpackJSONEncoder) diff --git a/kafka_deserializers/hatch.toml b/kafka_deserializers/hatch.toml new file mode 100644 index 0000000000..87b66d0318 --- /dev/null +++ b/kafka_deserializers/hatch.toml @@ -0,0 +1,7 @@ +[env.collectors.datadog-checks] + +[[envs.default.matrix]] +python = ["3.12"] + +[envs.default] +e2e-env = false diff --git a/kafka_deserializers/manifest.json b/kafka_deserializers/manifest.json new file mode 100644 index 0000000000..607039d32b --- /dev/null +++ b/kafka_deserializers/manifest.json @@ -0,0 +1,47 @@ +{ + "manifest_version": "2.0.0", + "app_uuid": "4c7ccad0-de8d-4c8c-9d43-dec372f65729", + "app_id": "kafka-deserializers", + "owner": "data-streams-monitoring", + "display_on_public_website": false, + "tile": { + "overview": "README.md#Overview", + "configuration": "README.md#Setup", + "support": "README.md#Support", + "changelog": "CHANGELOG.md", + "description": "Plugin pack for the kafka_actions check.", + "title": "Kafka Deserializers", + "media": [], + "classifier_tags": [ + "Supported OS::Linux", + "Supported OS::Windows", + "Supported OS::macOS", + "Category::Message Queues", + "Offering::Integration" + ] + }, + "author": { + "support_email": "packages@datadoghq.com", + "homepage": "https://github.com/DataDog/integrations-extras", + "sales_email": "packages@datadoghq.com", + "name": "Datadog" + }, + "assets": { + "integration": { + "auto_install": false, + "source_type_name": "Kafka Deserializers", + "configuration": {}, + "events": { + "creates_events": false + }, + "metrics": { + "prefix": "kafka_deserializers.", + "check": [], + "metadata_path": "metadata.csv" + }, + "service_checks": { + "metadata_path": "assets/service_checks.json" + } + } + } +} diff --git a/kafka_deserializers/metadata.csv b/kafka_deserializers/metadata.csv new file mode 100644 index 0000000000..02cde5e983 --- /dev/null +++ b/kafka_deserializers/metadata.csv @@ -0,0 +1 @@ +metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name,curated_metric,sample_tags diff --git a/kafka_deserializers/pyproject.toml b/kafka_deserializers/pyproject.toml new file mode 100644 index 0000000000..38577d476f --- /dev/null +++ b/kafka_deserializers/pyproject.toml @@ -0,0 +1,81 @@ +[build-system] +requires = [ + "hatchling>=0.13.0", +] +build-backend = "hatchling.build" + +[project] +name = "datadog-kafka-deserializers" +description = "Plugin pack for the kafka_actions check" +readme = "README.md" +license = "BSD-3-Clause" +requires-python = ">=3.12" +keywords = [ + "datadog", + "datadog agent", + "datadog check", + "kafka_actions", + "kafka_deserializers", +] +authors = [ + { name = "Datadog", email = "packages@datadoghq.com" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "License :: OSI Approved :: BSD License", + "Private :: Do Not Upload", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +dependencies = [ + "datadog-checks-base>=37.33.0", + # Plugin host: datadog-kafka-actions>=2.7.0 (the version that introduced + # the plugin API). Not declared as a hard dep because integrations-extras + # CI cannot resolve it from PyPI; both wheels are installed side-by-side + # in the agent's embedded Python where the host is always present. +] +dynamic = [ + "version", +] + +[project.optional-dependencies] +deps = [ + "msgpack==1.1.0", + "python-snappy==0.7.3", + "lz4==4.3.3", + "zstandard==0.23.0", +] + +[project.entry-points."datadog_kafka_actions.formats"] +msgpack = "datadog_checks.kafka_deserializers.handlers:MsgpackHandler" + +[project.entry-points."datadog_kafka_actions.compressions"] +gzip = "datadog_checks.kafka_deserializers.codecs:GzipCodec" +zlib = "datadog_checks.kafka_deserializers.codecs:ZlibCodec" +snappy = "datadog_checks.kafka_deserializers.codecs:SnappyCodec" +lz4 = "datadog_checks.kafka_deserializers.codecs:Lz4Codec" +lz4_dd_hdr = "datadog_checks.kafka_deserializers.codecs:Lz4DdHdrCodec" +zstd = "datadog_checks.kafka_deserializers.codecs:ZstdCodec" + +[project.urls] +Source = "https://github.com/DataDog/integrations-extras" + +[tool.hatch.version] +path = "datadog_checks/kafka_deserializers/__about__.py" + +[tool.hatch.build.targets.sdist] +include = [ + "/datadog_checks", + "/tests", + "/manifest.json", +] + +[tool.hatch.build.targets.wheel] +include = [ + "/datadog_checks/kafka_deserializers", +] +dev-mode-dirs = [ + ".", +] diff --git a/kafka_deserializers/tests/__init__.py b/kafka_deserializers/tests/__init__.py new file mode 100644 index 0000000000..75c6647cb9 --- /dev/null +++ b/kafka_deserializers/tests/__init__.py @@ -0,0 +1,3 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) diff --git a/kafka_deserializers/tests/test_codecs.py b/kafka_deserializers/tests/test_codecs.py new file mode 100644 index 0000000000..a526d33dc4 --- /dev/null +++ b/kafka_deserializers/tests/test_codecs.py @@ -0,0 +1,56 @@ +# (C) Datadog, Inc. 2026-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) +import gzip +import struct +import zlib + +import lz4.block +import lz4.frame +import pytest +import snappy +import zstandard + +from datadog_checks.kafka_deserializers.codecs import ( + GzipCodec, + Lz4Codec, + Lz4DdHdrCodec, + SnappyCodec, + ZlibCodec, + ZstdCodec, +) + +PAYLOAD = b'{"a":1,"b":[1,2,3,4,5,6,7,8,9,10]}' + + +def test_gzip_round_trip(): + assert GzipCodec().decompress(gzip.compress(PAYLOAD)) == PAYLOAD + + +def test_zlib_round_trip(): + assert ZlibCodec().decompress(zlib.compress(PAYLOAD)) == PAYLOAD + + +def test_snappy_round_trip(): + assert SnappyCodec().decompress(snappy.compress(PAYLOAD)) == PAYLOAD + + +def test_lz4_frame_round_trip(): + assert Lz4Codec().decompress(lz4.frame.compress(PAYLOAD)) == PAYLOAD + + +def test_lz4_dd_hdr_round_trip(): + """Reproduce DataDog/golz4 framing: 4-byte LE length + raw lz4 block.""" + block = lz4.block.compress(PAYLOAD, store_size=False) + framed = struct.pack('