Skip to content

TypeError in dell_emc_ml3_drive check | cleaning needed state causes plugin crash #24

@uh-dk

Description

@uh-dk

Whenever a drive needs to be cleaned the check crashed with TypeError "'state' must be a checkmk State constant, got 1" when the rule parameter or override provided an integer ServiceState value (e.g. 1). The plugin passed raw ints to Result(state=...), which the Checkmk API rejects. This issue reproduces with the attached crash.info

CheckMK Crash info

{
    "core": "cmc",
    "crash_type": "check",
    "details": {
        "check_output": "check failed - please submit a crash report!",
        "check_type": "dell_emc_ml3_drive",
        "description": "Tape Drive 3",
        "enforced_service": false,
        "host": "Host-name-REMOVED",
        "inline_snmp": true,
        "is_cluster": false,
        "item": "3",
        "params": "Parameters({'cleaning_needed': 1})",
        "section": {
            "1": "TapeDrive(name='IBM ULT3580-HH8 SN-REMOVED', ava='3', cleaning='2', power_on_hours='40116', op_status='2')",
            "2": "TapeDrive(name='IBM ULT3580-HH8 SN-REMOVED', ava='3', cleaning='2', power_on_hours='35742', op_status='2')",
            "3": "TapeDrive(name='IBM ULT3580-HH8 SN-REMOVED', ava='3', cleaning='1', power_on_hours='5575', op_status='2')",
            "4": "TapeDrive(name='IBM ULT3580-HH8 SN-REMOVED', ava='3', cleaning='2', power_on_hours='2543', op_status='2')"
        }
    },
    "edition": "cme",
    "exc_traceback": [
        [
            "/omd/sites/Site-name-REMOVED/lib/python3/cmk/base/checkers.py",
            734,
            "get_aggregated_result",
            "check_result = check_function(**item_kw, **params_kw, **section_kws)"
        ],
        [
            "/omd/sites/Site-name-REMOVED/lib/python3/cmk/base/checkers.py",
            514,
            "__check_function",
            "return _aggregate_results(consume_check_results(check_function(*args, **kw)))"
        ],
        [
            "/omd/sites/Site-name-REMOVED/lib/python3/cmk/base/checkers.py",
            572,
            "consume_check_results",
            "for subr in subresults:"
        ],
        [
            "/omd/sites/Site-name-REMOVED/lib/python3/cmk/base/api/agent_based/register/check_plugins.py",
            95,
            "filtered_generator",
            "for element in generator(*args, **kwargs):"
        ],
        [
            "/omd/sites/Site-name-REMOVED/local/lib/python3/cmk_addons/plugins/dell_emc_ml3/agent_based/dell_emc_ml3_drive.py",
            91,
            "check_dell_emc_ml3_drive",
            "yield Result(state=status, summary=f\"Cleaning Status: {txt}\")"
        ],
        [
            "/omd/sites/Site-name-REMOVED/lib/python3.12/site-packages/cmk/agent_based/v1/_checking_classes.py",
            404,
            "__new__",
            "state, summary, details = _create_result_fields(**kwargs)  # type: ignore[misc]"
        ],
        [
            "/omd/sites/Site-name-REMOVED/lib/python3.12/site-packages/cmk/agent_based/v1/_checking_classes.py",
            430,
            "_create_result_fields",
            "raise TypeError(f\"'state' must be a checkmk State constant, got {state}\")"
        ]
    ],
    "exc_type": "TypeError",
    "exc_value": "'state' must be a checkmk State constant, got 1",
    "id": "b57a6c34-895e-11f0-a762-73d59b88190a",
    "local_vars": "eydkZXRhaWxzJzogTm9uZSwKICdub3RpY2UnOiBOb25lLAogJ3N0YXRlJzogMSwKICdzdW1tYXJ5JzogJ0NsZWFuaW5nIFN0YXR1czogY2xlYW5pbmcgbmVlZGVkJ30=",
    "os": "Ubuntu 22.04.5 LTS",
    "python_paths": [
        "/opt/omd/versions/2.3.0p34.cme/bin",
        "/omd/sites/Site-name-REMOVED/local/lib/python3",
        "/omd/sites/Site-name-REMOVED/lib/python3/cloud",
        "/omd/sites/Site-name-REMOVED/lib/python312.zip",
        "/omd/sites/Site-name-REMOVED/lib/python3.12",
        "/omd/sites/Site-name-REMOVED/lib/python3.12/lib-dynload",
        "/omd/sites/Site-name-REMOVED/lib/python3.12/site-packages",
        "/omd/sites/Site-name-REMOVED/lib/python3"
    ],
    "python_version": "3.12.9 (main, Apr  8 2025, 14:44:03) [GCC 13.2.0]",
    "time": 1756970031.3112025,
    "version": "2.3.0p34"
}

I did ask GitHub Copilot to fix the issue, and this is what it produced.
I tested it with my CheckMK 2.3 instance and it no longer crashes. The service now goes into WARN status as configured.

Can you either implement this change or adjust the code yourself so that the crash no longer occurs and release a new version?
Thank you very much.

Just the changed part

# ...existing code...

    # Allow override from rule parameters. These may be provided as ServiceState values
    # (which are ints) or already as State constants. Normalize them to State.
    if params.get("ava_map"):
        override = params["ava_map"].get(f"ava_map_{drive.ava}")
        if override is not None:
            status = _normalize_state(override)

# ...existing code...

    # cleaning map: keep default mapping using State constants. The rule parameter
    # `cleaning_needed` may be a ServiceState value (int) or State constant.
    default_cleaning_needed = params.get("cleaning_needed", State.WARN)
    default_cleaning_needed = _normalize_state(default_cleaning_needed)

    cleaning_map = {
        "1": (default_cleaning_needed, "cleaning needed"),
        "2": (State.OK, "no cleaning needed"),
    }
# ...existing code...

    if params.get("opa_map"):
        override = params["opa_map"].get(f"opa_map_{drive.op_status}", None)
        if override is not None:
            try:
                status = State(override)
            except Exception:
                status = override
# ...existing code...

Full dell_emc_ml3_drive.py with the change

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from typing import Any
from collections.abc import Mapping

from cmk.agent_based.v2 import (
    Service,
    Result,
    exists,
    State,
    CheckPlugin,
    SimpleSNMPSection,
    StringTable,
    DiscoveryResult,
    CheckResult,
    SNMPTree,
    check_levels,
    render,
)


from dataclasses import dataclass


@dataclass
class TapeDrive:
    name: str
    ava: str
    cleaning: str
    power_on_hours: str
    op_status: str


DELL_SECTION = dict[str, TapeDrive]


def parse_dell_emc_ml3_drive(string_table: StringTable) -> DELL_SECTION:
    parsed = {}
    for index, name, ava, cleaning, power_on_hours, op_status in string_table:
        parsed[index] = TapeDrive(name, ava, cleaning, power_on_hours, op_status)
    return parsed


def discover_tape_drive(section: DELL_SECTION) -> DiscoveryResult:
    for tape_drive in section:
        yield Service(item=tape_drive)


def check_dell_emc_ml3_drive(item: str, params: Mapping[str, Any], section) -> CheckResult:
    if not (drive := section.get(item)):
        return

    yield Result(state=State.OK, summary=drive.name)

    ava_map = {
        "1": (State.WARN, "other"),
        "2": (State.UNKNOWN, "unknown"),
        "3": (State.OK, "runningFullPower"),
        "4": (State.WARN, "warning"),
        "5": (State.WARN, "inTest"),
        "6": (State.WARN, "notApplicable"),
        "7": (State.WARN, "powerOff"),
        "8": (State.WARN, "offLine"),
        "9": (State.WARN, "offDuty"),
        "10": (State.CRIT, "degraded"),
        "11": (State.WARN, "notInstalled"),
        "12": (State.CRIT, "installError"),
        "13": (State.WARN, "powerSaveUnknown"),
        "14": (State.WARN, "powerSaveLowPowerMode"),
        "15": (State.WARN, "powerSaveStandby"),
        "16": (State.WARN, "powerCycle"),
        "17": (State.WARN, "powerSaveWarning"),
        "18": (State.WARN, "paused"),
        "19": (State.WARN, "notReady"),
        "20": (State.WARN, "notConfigured"),
        "21": (State.WARN, "quiesced"),
    }
    status, txt = ava_map.get(drive.ava, (State.UNKNOWN, "unknown Status Code"))
    # Allow override from rule parameters. These may be provided as ServiceState values
    # (which are ints) or already as State constants. Normalize them to State.
    if params.get("ava_map"):
        override = params["ava_map"].get(f"ava_map_{drive.ava}")
        if override is not None:
            # If override is an int (ServiceState), map to State; if already State, keep it.
            try:
                status = State(override)
            except Exception:
                # Fallback: if it's already a State-like object, just use it
                status = override

    yield Result(state=status, summary=f"availability State: {txt}")

    # cleaning map: keep default mapping using State constants. The rule parameter
    # `cleaning_needed` may be a ServiceState value (int) or State constant.
    default_cleaning_needed = params.get("cleaning_needed", State.WARN)
    try:
        default_cleaning_needed = State(default_cleaning_needed)
    except Exception:
        # if it's already a State-like object or invalid, keep as-is (Result will validate)
        pass

    cleaning_map = {
        "1": (default_cleaning_needed, "cleaning needed"),
        "2": (State.OK, "no cleaning needed"),
    }
    status, txt = cleaning_map.get(drive.cleaning, (State.UNKNOWN, "unknown Status"))
    yield Result(state=status, summary=f"Cleaning Status: {txt}")

    if drive.power_on_hours:
        yield from check_levels(
            int(drive.power_on_hours) * 3600,
            metric_name="uptime",
            render_func=render.timespan,
            label="Uptime",
        )

    op_status_map = {
        "0": (State.UNKNOWN, "unknown"),
        "1": (State.WARN, "other"),
        "2": (State.OK, "ok"),
        "3": (State.WARN, "degraded"),
        "4": (State.WARN, "stressed"),
        "5": (State.WARN, "predictiveFailure"),
        "6": (State.CRIT, "error"),
        "7": (State.CRIT, "non-RecoverableError"),
        "8": (State.WARN, "starting"),
        "9": (State.WARN, "stopping"),
        "10": (State.WARN, "stopped"),
        "11": (State.WARN, "inService"),
        "12": (State.WARN, "noContact"),
        "13": (State.CRIT, "lostCommunication"),
        "14": (State.WARN, "aborted"),
        "15": (State.WARN, "dormant"),
        "16": (State.CRIT, "supportingEntityInError"),
        "17": (State.OK, "completed"),
        "18": (State.WARN, "powerMode"),
        "19": (State.WARN, "dMTFReserved"),
    }
    status, txt = op_status_map.get(drive.op_status, (State.UNKNOWN, "unknown status"))
    if params.get("opa_map"):
        override = params["opa_map"].get(f"opa_map_{drive.op_status}", None)
        if override is not None:
            try:
                status = State(override)
            except Exception:
                status = override
    yield Result(state=status, summary=f"Operational Status: {txt}")


snmp_section_dell_emc_ml3_drive = SimpleSNMPSection(
    name="dell_emc_ml3_drive",
    detect=exists(".1.3.6.1.4.1.14851.3.1.6.2.1.*"),
    parse_function=parse_dell_emc_ml3_drive,
    fetch=SNMPTree(
        base=".1.3.6.1.4.1.14851.3.1.6.2.1",
        oids=[
            "1",
            "3",
            "5",
            "6",
            "10",
            "11",
        ],
    ),
)

check_plugin_dell_emc_ml3_drive = CheckPlugin(
    name="dell_emc_ml3_drive",
    service_name="Tape Drive %s",
    discovery_function=discover_tape_drive,
    check_function=check_dell_emc_ml3_drive,
    check_ruleset_name="dell_emc_ml3_drive",
    check_default_parameters={},
)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions