#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from typing import Any
from collections.abc import Mapping
from cmk.agent_based.v2 import (
Service,
Result,
exists,
State,
CheckPlugin,
SimpleSNMPSection,
StringTable,
DiscoveryResult,
CheckResult,
SNMPTree,
check_levels,
render,
)
from dataclasses import dataclass
@dataclass
class TapeDrive:
name: str
ava: str
cleaning: str
power_on_hours: str
op_status: str
DELL_SECTION = dict[str, TapeDrive]
def parse_dell_emc_ml3_drive(string_table: StringTable) -> DELL_SECTION:
parsed = {}
for index, name, ava, cleaning, power_on_hours, op_status in string_table:
parsed[index] = TapeDrive(name, ava, cleaning, power_on_hours, op_status)
return parsed
def discover_tape_drive(section: DELL_SECTION) -> DiscoveryResult:
for tape_drive in section:
yield Service(item=tape_drive)
def check_dell_emc_ml3_drive(item: str, params: Mapping[str, Any], section) -> CheckResult:
if not (drive := section.get(item)):
return
yield Result(state=State.OK, summary=drive.name)
ava_map = {
"1": (State.WARN, "other"),
"2": (State.UNKNOWN, "unknown"),
"3": (State.OK, "runningFullPower"),
"4": (State.WARN, "warning"),
"5": (State.WARN, "inTest"),
"6": (State.WARN, "notApplicable"),
"7": (State.WARN, "powerOff"),
"8": (State.WARN, "offLine"),
"9": (State.WARN, "offDuty"),
"10": (State.CRIT, "degraded"),
"11": (State.WARN, "notInstalled"),
"12": (State.CRIT, "installError"),
"13": (State.WARN, "powerSaveUnknown"),
"14": (State.WARN, "powerSaveLowPowerMode"),
"15": (State.WARN, "powerSaveStandby"),
"16": (State.WARN, "powerCycle"),
"17": (State.WARN, "powerSaveWarning"),
"18": (State.WARN, "paused"),
"19": (State.WARN, "notReady"),
"20": (State.WARN, "notConfigured"),
"21": (State.WARN, "quiesced"),
}
status, txt = ava_map.get(drive.ava, (State.UNKNOWN, "unknown Status Code"))
# Allow override from rule parameters. These may be provided as ServiceState values
# (which are ints) or already as State constants. Normalize them to State.
if params.get("ava_map"):
override = params["ava_map"].get(f"ava_map_{drive.ava}")
if override is not None:
# If override is an int (ServiceState), map to State; if already State, keep it.
try:
status = State(override)
except Exception:
# Fallback: if it's already a State-like object, just use it
status = override
yield Result(state=status, summary=f"availability State: {txt}")
# cleaning map: keep default mapping using State constants. The rule parameter
# `cleaning_needed` may be a ServiceState value (int) or State constant.
default_cleaning_needed = params.get("cleaning_needed", State.WARN)
try:
default_cleaning_needed = State(default_cleaning_needed)
except Exception:
# if it's already a State-like object or invalid, keep as-is (Result will validate)
pass
cleaning_map = {
"1": (default_cleaning_needed, "cleaning needed"),
"2": (State.OK, "no cleaning needed"),
}
status, txt = cleaning_map.get(drive.cleaning, (State.UNKNOWN, "unknown Status"))
yield Result(state=status, summary=f"Cleaning Status: {txt}")
if drive.power_on_hours:
yield from check_levels(
int(drive.power_on_hours) * 3600,
metric_name="uptime",
render_func=render.timespan,
label="Uptime",
)
op_status_map = {
"0": (State.UNKNOWN, "unknown"),
"1": (State.WARN, "other"),
"2": (State.OK, "ok"),
"3": (State.WARN, "degraded"),
"4": (State.WARN, "stressed"),
"5": (State.WARN, "predictiveFailure"),
"6": (State.CRIT, "error"),
"7": (State.CRIT, "non-RecoverableError"),
"8": (State.WARN, "starting"),
"9": (State.WARN, "stopping"),
"10": (State.WARN, "stopped"),
"11": (State.WARN, "inService"),
"12": (State.WARN, "noContact"),
"13": (State.CRIT, "lostCommunication"),
"14": (State.WARN, "aborted"),
"15": (State.WARN, "dormant"),
"16": (State.CRIT, "supportingEntityInError"),
"17": (State.OK, "completed"),
"18": (State.WARN, "powerMode"),
"19": (State.WARN, "dMTFReserved"),
}
status, txt = op_status_map.get(drive.op_status, (State.UNKNOWN, "unknown status"))
if params.get("opa_map"):
override = params["opa_map"].get(f"opa_map_{drive.op_status}", None)
if override is not None:
try:
status = State(override)
except Exception:
status = override
yield Result(state=status, summary=f"Operational Status: {txt}")
snmp_section_dell_emc_ml3_drive = SimpleSNMPSection(
name="dell_emc_ml3_drive",
detect=exists(".1.3.6.1.4.1.14851.3.1.6.2.1.*"),
parse_function=parse_dell_emc_ml3_drive,
fetch=SNMPTree(
base=".1.3.6.1.4.1.14851.3.1.6.2.1",
oids=[
"1",
"3",
"5",
"6",
"10",
"11",
],
),
)
check_plugin_dell_emc_ml3_drive = CheckPlugin(
name="dell_emc_ml3_drive",
service_name="Tape Drive %s",
discovery_function=discover_tape_drive,
check_function=check_dell_emc_ml3_drive,
check_ruleset_name="dell_emc_ml3_drive",
check_default_parameters={},
)
Whenever a drive needs to be cleaned the check crashed with TypeError "'state' must be a checkmk State constant, got 1" when the rule parameter or override provided an integer ServiceState value (e.g. 1). The plugin passed raw ints to Result(state=...), which the Checkmk API rejects. This issue reproduces with the attached crash.info
CheckMK Crash info
I did ask GitHub Copilot to fix the issue, and this is what it produced.
I tested it with my CheckMK 2.3 instance and it no longer crashes. The service now goes into WARN status as configured.
Can you either implement this change or adjust the code yourself so that the crash no longer occurs and release a new version?
Thank you very much.
Just the changed part
Full dell_emc_ml3_drive.py with the change