diff --git a/CHANGELOG.md b/CHANGELOG.md
index 63978fa573..28656849b7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,8 @@ CHANGELOG
 - Add validator that warns against the downsides of disabling in-place updates on compute and login nodes through DevSettings.
 - Upgrade jmespath to ~=1.0 (from ~=0.10).
 - Upgrade tabulate to <=0.9.0 (from <=0.8.10).
+- Add permission `cloudwatch:PutMetricData` to the head node policy so that clustermgtd is able to emit metrics.
+- Add alarm on missing clustermgtd heartbeat.
 
 **BUG FIXES**
 - Add validation to block updates that change tag order. Blocking such change prevents update failures.
diff --git a/cli/src/pcluster/constants.py b/cli/src/pcluster/constants.py
index 6e68cdaea7..8ac6db4601 100644
--- a/cli/src/pcluster/constants.py
+++ b/cli/src/pcluster/constants.py
@@ -186,6 +186,11 @@
 CW_ALARM_DATAPOINTS_TO_ALARM_DEFAULT = 1
 DETAILED_MONITORING_ENABLED_DEFAULT = False
 
+# CloudWatch Metrics
+CW_METRICS_NAMESPACE = "ParallelCluster"
+CW_METRICS_DIMENSION_CLUSTER_NAME = "ClusterName"
+CW_METRICS_CLUSTERMGTD_HEARTBEAT = "ClustermgtdHeartbeat"
+
 STACK_EVENTS_LOG_STREAM_NAME_FORMAT = "{}-cfn-events"
 
 PCLUSTER_IMAGE_NAME_REGEX = r"^[-_A-Za-z0-9{][-_A-Za-z0-9\s:{}\.]+[-_A-Za-z0-9}]$"
diff --git a/cli/src/pcluster/templates/cdk_builder_utils.py b/cli/src/pcluster/templates/cdk_builder_utils.py
index 8c7ee692e3..ebaeb78973 100644
--- a/cli/src/pcluster/templates/cdk_builder_utils.py
+++ b/cli/src/pcluster/templates/cdk_builder_utils.py
@@ -38,11 +38,13 @@
 from pcluster.constants import (
     COOKBOOK_PACKAGES_VERSIONS,
     CW_LOGS_RETENTION_DAYS_DEFAULT,
+    CW_METRICS_NAMESPACE,
     IAM_ROLE_PATH,
     LAMBDA_VPC_ACCESS_MANAGED_POLICY,
     PCLUSTER_CLUSTER_NAME_TAG,
     PCLUSTER_DYNAMODB_PREFIX,
     PCLUSTER_NODE_TYPE_TAG,
+    SLURM,
     ULTRASERVER_INSTANCE_PREFIX_LIST,
 )
 from pcluster.launch_template_utils import _LaunchTemplateBuilder
@@ -742,6 +744,19 @@ def _build_policy(self) -> List[iam.PolicyStatement]:
             ),
         ]
 
+        if self._config.scheduling.scheduler == SLURM:
+            policy.extend(
+                [
+                    iam.PolicyStatement(
+                        sid="CloudWatch",
+                        actions=["cloudwatch:PutMetricData"],
+                        effect=iam.Effect.ALLOW,
+                        resources=["*"],
+                        conditions={"StringEquals": {"cloudwatch:Namespace": CW_METRICS_NAMESPACE}},
+                    )
+                ]
+            )
+
         if (
             self._config.scheduling.scheduler == "slurm"
             and self._config.scheduling.settings
diff --git a/cli/src/pcluster/templates/cluster_stack.py b/cli/src/pcluster/templates/cluster_stack.py
index 66fdf179c5..e9a0ed483f 100644
--- a/cli/src/pcluster/templates/cluster_stack.py
+++ b/cli/src/pcluster/templates/cluster_stack.py
@@ -64,6 +64,9 @@
     CW_ALARM_PERIOD_DEFAULT,
     CW_LOG_GROUP_NAME_PREFIX,
     CW_LOGS_CFN_PARAM_NAME,
+    CW_METRICS_CLUSTERMGTD_HEARTBEAT,
+    CW_METRICS_DIMENSION_CLUSTER_NAME,
+    CW_METRICS_NAMESPACE,
     DEFAULT_EPHEMERAL_DIR,
     EFS_PORT,
     FSX_PORTS,
@@ -364,38 +367,64 @@ def _cw_metric_head_node(
     def _add_head_node_alarms(self):
         self.head_node_alarms = []
 
+        # Metric-specific configurations (only specify overrides from defaults)
         metrics_for_alarms = {
-            "Health": self._cw_metric_head_node("AWS/EC2", "StatusCheckFailed"),
-            "Cpu": self._cw_metric_head_node("AWS/EC2", "CPUUtilization"),
-            "Mem": self._cw_metric_head_node("CWAgent", "mem_used_percent"),
-            "Disk": self._cw_metric_head_node("CWAgent", "disk_used_percent", extra_dimensions={"path": "/"}),
+            "Health": {
+                "metric": self._cw_metric_head_node("AWS/EC2", "StatusCheckFailed"),
+                "threshold": 0,
+            },
+            "Cpu": {
+                "metric": self._cw_metric_head_node("AWS/EC2", "CPUUtilization"),
+            },
+            "Mem": {
+                "metric": self._cw_metric_head_node("CWAgent", "mem_used_percent"),
+            },
+            "Disk": {
+                "metric": self._cw_metric_head_node("CWAgent", "disk_used_percent", extra_dimensions={"path": "/"}),
+            },
         }
 
-        for metric_key, metric in metrics_for_alarms.items():
+        if self._condition_is_slurm():
+            metrics_for_alarms["ClustermgtdHeartbeat"] = {
+                "metric": self._cw_metric_head_node(
+                    CW_METRICS_NAMESPACE,
+                    CW_METRICS_CLUSTERMGTD_HEARTBEAT,
+                    extra_dimensions={CW_METRICS_DIMENSION_CLUSTER_NAME: self.config.cluster_name},
+                ),
+                "evaluation_periods": 10,
+                "datapoints_to_alarm": 10,
+                "comparison_operator": cloudwatch.ComparisonOperator.LESS_THAN_THRESHOLD,
+                "threshold": 1,
+                "treat_missing_data": cloudwatch.TreatMissingData.BREACHING,
+            }
+
+        for metric_key, alarm_config in metrics_for_alarms.items():
             alarm_id = f"HeadNode{metric_key}Alarm"
             alarm_name = f"{self.stack.stack_name}-HeadNode-{metric_key}"
-            threshold = 0 if metric_key == "Health" else CW_ALARM_PERCENT_THRESHOLD_DEFAULT
-            self.head_node_alarms.append(
-                cloudwatch.Alarm(
-                    scope=self.stack,
-                    id=alarm_id,
-                    alarm_name=alarm_name,
-                    metric=metric,
-                    evaluation_periods=CW_ALARM_EVALUATION_PERIODS_DEFAULT,
-                    threshold=threshold,
-                    comparison_operator=cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD,
-                    datapoints_to_alarm=CW_ALARM_DATAPOINTS_TO_ALARM_DEFAULT,
-                )
-            )
-
-        self.head_node_alarms.append(
-            cloudwatch.CompositeAlarm(
+            alarm = cloudwatch.Alarm(
                 scope=self.stack,
-                id="HeadNodeAlarm",
-                composite_alarm_name=f"{self.stack.stack_name}-HeadNode",
-                alarm_rule=cloudwatch.AlarmRule.any_of(*self.head_node_alarms),
+                id=alarm_id,
+                alarm_name=alarm_name,
+                metric=alarm_config["metric"],
+                evaluation_periods=alarm_config.get("evaluation_periods", CW_ALARM_EVALUATION_PERIODS_DEFAULT),
+                threshold=alarm_config.get("threshold", CW_ALARM_PERCENT_THRESHOLD_DEFAULT),
+                comparison_operator=alarm_config.get(
+                    "comparison_operator", cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD
+                ),
+                datapoints_to_alarm=alarm_config.get("datapoints_to_alarm", CW_ALARM_DATAPOINTS_TO_ALARM_DEFAULT),
+                treat_missing_data=alarm_config.get("treat_missing_data", cloudwatch.TreatMissingData.MISSING),
             )
+            alarm.node.add_dependency(self.wait_condition)
+            self.head_node_alarms.append(alarm)
+
+        composite_alarm = cloudwatch.CompositeAlarm(
+            scope=self.stack,
+            id="HeadNodeAlarm",
+            composite_alarm_name=f"{self.stack.stack_name}-HeadNode",
+            alarm_rule=cloudwatch.AlarmRule.any_of(*self.head_node_alarms),
         )
+        composite_alarm.node.add_dependency(self.wait_condition)
+        self.head_node_alarms.append(composite_alarm)
 
     def _add_iam_resources(self):
         head_node_iam_resources = HeadNodeIamResources(
diff --git a/cli/src/pcluster/templates/cw_dashboard_builder.py b/cli/src/pcluster/templates/cw_dashboard_builder.py
index d676739482..fff10bba2c 100644
--- a/cli/src/pcluster/templates/cw_dashboard_builder.py
+++ b/cli/src/pcluster/templates/cw_dashboard_builder.py
@@ -19,7 +19,12 @@
 
 from pcluster.config.cluster_config import BaseClusterConfig, ExistingFileCache, SharedFsxLustre
 from pcluster.config.common import SharedStorageType
-from pcluster.constants import Feature
+from pcluster.constants import (
+    CW_METRICS_CLUSTERMGTD_HEARTBEAT,
+    CW_METRICS_DIMENSION_CLUSTER_NAME,
+    CW_METRICS_NAMESPACE,
+    Feature,
+)
 from pcluster.utils import is_feature_supported
 
 MAX_WIDTH = 24
@@ -567,9 +572,21 @@ def _add_head_node_instance_metrics_graphs(self):
             new_pcluster_metric(title="Memory Used Percent", metrics=["mem_used_percent"], namespace="CWAgent"),
         ]
 
+        # Custom Metrics
+        pcluster_metrics = []
+        if self.config.scheduling.scheduler == "slurm":
+            pcluster_metrics.append(
+                new_pcluster_metric(
+                    title="Daemons Heartbeats",
+                    metrics=[CW_METRICS_CLUSTERMGTD_HEARTBEAT],
+                    namespace=CW_METRICS_NAMESPACE,
+                    additional_dimensions={CW_METRICS_DIMENSION_CLUSTER_NAME: self.config.cluster_name},
+                )
+            )
+
         # Create graphs for EC2 metrics and CW Agent metrics and update coordinates
         widgets_list = []
-        for metrics_param in ec2_metrics + cwagent_metrics:
+        for metrics_param in ec2_metrics + cwagent_metrics + pcluster_metrics:
             metrics_list = self._generate_metrics_list(metrics_param)
             graph_widget = self._generate_graph_widget(metrics_param.title, metrics_list)
             widgets_list.append(graph_widget)
diff --git a/cli/tests/pcluster/templates/test_cdk_builder_utils.py b/cli/tests/pcluster/templates/test_cdk_builder_utils.py
index fe8e3ff26f..f5276e5d8e 100644
--- a/cli/tests/pcluster/templates/test_cdk_builder_utils.py
+++ b/cli/tests/pcluster/templates/test_cdk_builder_utils.py
@@ -22,7 +22,7 @@
     SlurmComputeResource,
     SlurmQueue,
 )
-from pcluster.constants import PCLUSTER_CLUSTER_NAME_TAG, PCLUSTER_NODE_TYPE_TAG
+from pcluster.constants import CW_METRICS_NAMESPACE, PCLUSTER_CLUSTER_NAME_TAG, PCLUSTER_NODE_TYPE_TAG
 from pcluster.schemas.cluster_schema import ClusterSchema
 from pcluster.templates.cdk_builder import CDKTemplateBuilder
 from pcluster.templates.cdk_builder_utils import (
@@ -593,3 +593,48 @@ def _check_cleanup_role(
         assert_that("/parallelcluster/" in generated_template[cleanup_resource_old]["Properties"]["Path"]).is_true()
 
         assert_that(generated_template[cleanup_resource_old]["Properties"]).does_not_contain_key("RoleName")
+
+
+@pytest.mark.parametrize(
+    "config_file_name, expect_cloudwatch_permission",
+    [
+        ("config-slurm.yaml", True),
+        ("config-awsbatch.yaml", False),
+    ],
+)
+def test_head_node_policy(mocker, test_datadir, config_file_name, expect_cloudwatch_permission):
+    """Verify that cloudwatch:PutMetricData is added for slurm scheduler but not for awsbatch."""
+    mock_aws_api(mocker)
+    mocker.patch(
+        "pcluster.config.cluster_config.HeadNodeNetworking.availability_zone",
+        new_callable=PropertyMock(return_value="us-east-1a"),
+    )
+    mock_bucket(mocker)
+    mock_bucket_object_utils(mocker)
+
+    input_yaml = load_yaml_dict(test_datadir / config_file_name)
+    cluster_config = ClusterSchema(cluster_name="clustername").load(input_yaml)
+    generated_template, _ = CDKTemplateBuilder().build_cluster_template(
+        cluster_config=cluster_config, bucket=dummy_cluster_bucket(), stack_name="clustername"
+    )
+
+    # Find the CloudWatch policy statement in ParallelClusterPoliciesHeadNode
+    head_node_policies = generated_template["Resources"]["ParallelClusterPoliciesHeadNode"]["Properties"][
+        "PolicyDocument"
+    ]["Statement"]
+
+    cloudwatch_statement = next(
+        (stmt for stmt in head_node_policies if stmt.get("Sid") == "CloudWatch"),
+        None,
+    )
+
+    if expect_cloudwatch_permission:
+        assert_that(cloudwatch_statement).is_not_none()
+        assert_that(cloudwatch_statement["Action"]).is_equal_to("cloudwatch:PutMetricData")
+        assert_that(cloudwatch_statement["Effect"]).is_equal_to("Allow")
+        assert_that(cloudwatch_statement["Resource"]).is_equal_to("*")
+        assert_that(cloudwatch_statement["Condition"]).is_equal_to(
+            {"StringEquals": {"cloudwatch:Namespace": CW_METRICS_NAMESPACE}}
+        )
+    else:
+        assert_that(cloudwatch_statement).is_none()
diff --git a/cli/tests/pcluster/templates/test_cdk_builder_utils/test_head_node_policy/config-awsbatch.yaml b/cli/tests/pcluster/templates/test_cdk_builder_utils/test_head_node_policy/config-awsbatch.yaml
new file mode 100644
index 0000000000..719b0b4db0
--- /dev/null
+++ b/cli/tests/pcluster/templates/test_cdk_builder_utils/test_head_node_policy/config-awsbatch.yaml
@@ -0,0 +1,20 @@
+Image:
+  Os: alinux2
+HeadNode:
+  InstanceType: t3.micro
+  Ssh:
+    KeyName: String
+  Networking:
+    SubnetId: subnet-12345678
+Scheduling:
+  Scheduler: awsbatch
+  AwsBatchQueues:
+    - Name: queue1
+      Networking:
+        SubnetIds:
+          - subnet-12345678
+      ComputeResources:
+        - Name: compute_resource1
+          InstanceTypes:
+            - c4.xlarge
+          MaxvCpus: 10
diff --git a/cli/tests/pcluster/templates/test_cdk_builder_utils/test_head_node_policy/config-slurm.yaml b/cli/tests/pcluster/templates/test_cdk_builder_utils/test_head_node_policy/config-slurm.yaml
new file mode 100644
index 0000000000..c7a52acb66
--- /dev/null
+++ b/cli/tests/pcluster/templates/test_cdk_builder_utils/test_head_node_policy/config-slurm.yaml
@@ -0,0 +1,28 @@
+Image:
+  Os: alinux2
+HeadNode:
+  InstanceType: String
+  Ssh:
+    KeyName: String
+  Networking:
+    SubnetId: subnet-12345678
+  Iam:
+    S3Access:
+      - BucketName: testbucketpball
+        EnableWriteAccess: True
+Scheduling:
+  Scheduler: slurm
+  SlurmQueues:
+    - Name: queue1
+      ComputeResources:
+        - Name: compute_resource1
+          InstanceType: t3.micro
+          MinCount: 1
+          MaxCount: 5
+      Networking:
+        SubnetIds:
+          - subnet-12345678
+      Iam:
+        S3Access:
+          - BucketName: testbucketpball
+            EnableWriteAccess: True
diff --git a/cli/tests/pcluster/templates/test_cluster_stack.py b/cli/tests/pcluster/templates/test_cluster_stack.py
index 0553e9b229..8a0f314318 100644
--- a/cli/tests/pcluster/templates/test_cluster_stack.py
+++ b/cli/tests/pcluster/templates/test_cluster_stack.py
@@ -281,7 +281,7 @@ def test_add_alarms(mocker, config_file_name):
             "comparison_operator": "GreaterThanThreshold",
             "evaluation_periods": 1,
             "datapoints_to_alarm": 1,
-            "treat_missing_data": None,
+            "treat_missing_data": "missing",
         },
         "Cpu": {
             "name": "clustername-HeadNode-Cpu",
@@ -291,7 +291,7 @@ def test_add_alarms(mocker, config_file_name):
             "comparison_operator": "GreaterThanThreshold",
             "evaluation_periods": 1,
             "datapoints_to_alarm": 1,
-            "treat_missing_data": None,
+            "treat_missing_data": "missing",
         },
         "Mem": {
             "name": "clustername-HeadNode-Mem",
@@ -301,7 +301,7 @@ def test_add_alarms(mocker, config_file_name):
             "comparison_operator": "GreaterThanThreshold",
             "evaluation_periods": 1,
             "datapoints_to_alarm": 1,
-            "treat_missing_data": None,
+            "treat_missing_data": "missing",
         },
         "Disk": {
             "name": "clustername-HeadNode-Disk",
@@ -311,19 +311,45 @@ def test_add_alarms(mocker, config_file_name):
             "comparison_operator": "GreaterThanThreshold",
             "evaluation_periods": 1,
             "datapoints_to_alarm": 1,
-            "treat_missing_data": None,
+            "treat_missing_data": "missing",
         },
     }
 
+    if cluster.scheduling.scheduler == "slurm":
+        expected_alarms["Clustermgtd-Heartbeat"] = {
+            "name": "clustername-HeadNode-ClustermgtdHeartbeat",
+            "metric_name": "ClustermgtdHeartbeat",
+            "namespace": "ParallelCluster",
+            "threshold": 1,
+            "comparison_operator": "LessThanThreshold",
+            "evaluation_periods": 10,
+            "datapoints_to_alarm": 10,
+            "treat_missing_data": "breaching",
+        }
+
     if cluster.are_alarms_enabled:
-        # Verify each simple alarm exists with correct details
+        # Find the HeadNode wait condition resource name
+        wait_condition_resources = get_resources(
+            generated_template, type="AWS::CloudFormation::WaitCondition", name_pattern="^HeadNodeWaitCondition"
+        )
+        assert_that(wait_condition_resources).is_length(1)
+        wait_condition_name = list(wait_condition_resources.keys())[0]
+
+        # Collect simple alarm resource names for composite alarm rule verification
+        simple_alarm_resource_names = []
+
+        # Verify each simple alarm exists with correct details and depends on wait condition
         for _alarm_key, expected in expected_alarms.items():
             matched_resources = get_resources(
                 generated_template, type=simple_type, properties={"AlarmName": expected["name"]}
             )
             assert_that(matched_resources).is_length(1)
 
-            alarm_properties = list(matched_resources.values())[0]["Properties"]
+            alarm_resource_name = list(matched_resources.keys())[0]
+            simple_alarm_resource_names.append(alarm_resource_name)
+
+            alarm_resource = list(matched_resources.values())[0]
+            alarm_properties = alarm_resource["Properties"]
             assert_that(alarm_properties.get("AlarmName")).is_equal_to(expected["name"])
             assert_that(alarm_properties.get("MetricName")).is_equal_to(expected["metric_name"])
             assert_that(alarm_properties.get("Namespace")).is_equal_to(expected["namespace"])
@@ -332,12 +358,25 @@ def test_add_alarms(mocker, config_file_name):
             assert_that(alarm_properties.get("EvaluationPeriods")).is_equal_to(expected["evaluation_periods"])
             assert_that(alarm_properties.get("DatapointsToAlarm")).is_equal_to(expected["datapoints_to_alarm"])
             assert_that(alarm_properties.get("TreatMissingData")).is_equal_to(expected["treat_missing_data"])
+            assert_that(alarm_resource.get("DependsOn")).contains(wait_condition_name)
 
-        # Verify composite alarm exists
+        # Verify composite alarm exists with correct properties
         composite_alarms = get_resources(generated_template, type=composite_type)
         assert_that(composite_alarms).is_length(1)
-        composite_alarm_properties = list(composite_alarms.values())[0]["Properties"]
+        composite_alarm_resource = list(composite_alarms.values())[0]
+        composite_alarm_properties = composite_alarm_resource["Properties"]
         assert_that(composite_alarm_properties["AlarmName"]).is_equal_to("clustername-HeadNode")
+        assert_that(composite_alarm_resource.get("DependsOn")).contains(wait_condition_name)
+
+        # Verify composite alarm rule triggers on ANY of the simple alarms (OR logic)
+        alarm_rule = composite_alarm_properties.get("AlarmRule")
+        assert_that(alarm_rule).is_not_none()
+        alarm_rule_str = str(alarm_rule)
+        for alarm_resource_name in simple_alarm_resource_names:
+            assert_that(alarm_rule_str).contains(alarm_resource_name)
+        expected_or_count = len(simple_alarm_resource_names) - 1
+        actual_or_count = alarm_rule_str.count(" OR ")
+        assert_that(actual_or_count).is_equal_to(expected_or_count)
     else:
         matched_simple_alarms = get_resources(generated_template, type=simple_type)
         matched_composite_alarms = get_resources(generated_template, type=composite_type)
diff --git a/cli/tests/pcluster/templates/test_cw_dashboard_builder.py b/cli/tests/pcluster/templates/test_cw_dashboard_builder.py
index 647a69a182..f60c5c3378 100644
--- a/cli/tests/pcluster/templates/test_cw_dashboard_builder.py
+++ b/cli/tests/pcluster/templates/test_cw_dashboard_builder.py
@@ -58,7 +58,7 @@ def test_cw_dashboard_builder(mocker, test_datadir, set_env, config_file_name, r
     if cluster_config.is_cw_dashboard_enabled:
         assert_that(output_yaml).contains("CloudwatchDashboard")
         assert_that(output_yaml).contains("Head Node EC2 Metrics")
-        _verify_head_node_instance_metrics_graphs(output_yaml)
+        _verify_head_node_instance_metrics_graphs(output_yaml, cluster_config.scheduling.scheduler)
 
         if cluster_config.are_alarms_enabled:
             assert_that(output_yaml).contains("Cluster Alarms")
@@ -79,7 +79,7 @@ def test_cw_dashboard_builder(mocker, test_datadir, set_env, config_file_name, r
         assert_that(output_yaml).does_not_contain("CloudwatchDashboard")
         assert_that(output_yaml).does_not_contain("Head Node EC2 Metrics")
 
-    _verify_alarms(output_yaml, cluster_config.are_alarms_enabled)
+    _verify_alarms(output_yaml, cluster_config.are_alarms_enabled, cluster_config.scheduling.scheduler)
 
     if cluster_config.is_cw_logging_enabled:
         assert_that(output_yaml).contains("ClusterCWLogGroup")
@@ -87,7 +87,7 @@ def test_cw_dashboard_builder(mocker, test_datadir, set_env, config_file_name, r
         assert_that(output_yaml).does_not_contain("ClusterCWLogGroup")
 
 
-def _verify_alarms(output_yaml, alarms_enabled):
+def _verify_alarms(output_yaml, alarms_enabled, scheduler):
     if alarms_enabled:
         assert_that(output_yaml).contains("HeadNodeHealthAlarm")
         assert_that(output_yaml).contains("StatusCheckFailed")
@@ -101,6 +101,13 @@ def _verify_alarms(output_yaml, alarms_enabled):
         assert_that(output_yaml).contains("HeadNodeDiskAlarm")
         assert_that(output_yaml).contains("disk_used_percent")
 
+        # ClustermgtdHeartbeat alarm is only created for Slurm scheduler
+        if scheduler == "slurm":
+            assert_that(output_yaml).contains("HeadNodeClustermgtdHeartbeatAlarm")
+            assert_that(output_yaml).contains("ClustermgtdHeartbeat")
+        else:
+            assert_that(output_yaml).does_not_contain("HeadNodeClustermgtdHeartbeatAlarm")
+
     else:
         assert_that(output_yaml).does_not_contain("Cluster Alarms")
         assert_that(output_yaml).does_not_contain("AWS::CloudWatch::Alarm")
@@ -129,7 +136,7 @@ def _verify_metric_filter_dimensions(metric_filters):
         )
 
 
-def _verify_head_node_instance_metrics_graphs(output_yaml):
+def _verify_head_node_instance_metrics_graphs(output_yaml, scheduler):
     """Verify CloudWatch graphs within the Head Node Instance Metrics section."""
     assert_that(output_yaml).contains("Head Node Instance Metrics")
     assert_that(output_yaml).contains("CPU Utilization")
@@ -139,6 +146,12 @@ def _verify_head_node_instance_metrics_graphs(output_yaml):
     assert_that(output_yaml).contains("Disk Read/Write Ops")
     assert_that(output_yaml).contains("Disk Used Percent")
     assert_that(output_yaml).contains("Memory Used Percent")
+    # Daemons Heartbeats widget is only created for Slurm scheduler
+    if scheduler == "slurm":
+        assert_that(output_yaml).contains("Daemons Heartbeats")
+        assert_that(output_yaml).contains("ClustermgtdHeartbeat")
+    else:
+        assert_that(output_yaml).does_not_contain("Daemons Heartbeats")
 
 
 def _verify_ec2_metrics_conditions(cluster_config, output_yaml):
diff --git a/cli/tests/pcluster/utils.py b/cli/tests/pcluster/utils.py
index 8e31c445ce..0042e3abdc 100644
--- a/cli/tests/pcluster/utils.py
+++ b/cli/tests/pcluster/utils.py
@@ -10,6 +10,7 @@
 # limitations under the License.
 import itertools
 import os
+import re
 from copy import deepcopy
 
 import pytest
@@ -37,12 +38,18 @@ def load_cluster_model_from_yaml(config_file_name, test_datadir=None):
 
 
 def get_resources(
-    generated_template: dict, name: str = None, type: str = None, properties: dict = None, deletion_policy: str = None
+    generated_template: dict,
+    name: str = None,
+    name_pattern: str = None,
+    type: str = None,
+    properties: dict = None,
+    deletion_policy: str = None,
 ):
     return dict(
         (res_name, res_value)
         for res_name, res_value in generated_template.get("Resources", {}).items()
         if (name is None or res_name == name)
+        and (name_pattern is None or re.search(name_pattern, res_name))
         and (type is None or res_value.get("Type") == type)
         and (deletion_policy is None or res_value.get("DeletionPolicy") == deletion_policy)
         and (
diff --git a/tests/iam_policies/cluster-roles.cfn.yaml b/tests/iam_policies/cluster-roles.cfn.yaml
index 7ae7f6f0a5..b59ecd540f 100644
--- a/tests/iam_policies/cluster-roles.cfn.yaml
+++ b/tests/iam_policies/cluster-roles.cfn.yaml
@@ -136,6 +136,13 @@ Resources:
                   - route53:ChangeResourceRecordSets
                 Resource: '*'
                 Effect: Allow
+              - Action:
+                  - cloudwatch:PutMetricData
+                Resource: '*'
+                Effect: Allow
+                Condition:
+                  StringEquals:
+                    cloudwatch:namespace: 'ParallelCluster'
             Version: '2012-10-17'
     Type: AWS::IAM::Role
 
diff --git a/tests/integration-tests/tests/monitoring/test_monitoring.py b/tests/integration-tests/tests/monitoring/test_monitoring.py
index 061394d94b..a884c54bd0 100644
--- a/tests/integration-tests/tests/monitoring/test_monitoring.py
+++ b/tests/integration-tests/tests/monitoring/test_monitoring.py
@@ -10,6 +10,7 @@
 # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
 # See the License for the specific language governing permissions and limitations under the License.
 import datetime
+import logging
 import math
 import time
 
@@ -33,6 +34,7 @@ def test_monitoring(
     cw_log_enabled,
     alarms_enabled,
     region,
+    scheduler,
     pcluster_config_reader,
     clusters_factory,
     test_datadir,
@@ -48,13 +50,14 @@ def test_monitoring(
     headnode_instance_id = cluster.get_cluster_instance_ids(node_type="HeadNode")[0]
     compute_instance_ids = cluster.get_cluster_instance_ids(node_type="Compute")
     # the MinCount is set to 1, so we should have at least one compute node
+    logging.info(f"Retrieved compute nodes: {compute_instance_ids}")
     assert_that(compute_instance_ids).is_not_empty()
 
     # test CWAgent metrics
     # we only perform this test for one of the 3 test conditions
     # because this test could be time-consuming (we allow some retries to ensure we can get metrics data)
     if dashboard_enabled and cw_log_enabled:
-        _test_cw_agent_metrics(cw_client, headnode_instance_id, compute_instance_ids[0])
+        _test_metrics(cw_client, headnode_instance_id, compute_instance_ids[0], cluster.cfn_name, scheduler)
 
     # test dashboard and alarms
     _test_dashboard(cw_client, cluster.cfn_name, region, dashboard_enabled, cw_log_enabled)
@@ -65,21 +68,34 @@ def test_monitoring(
 
 
 @retry(stop_max_attempt_number=8, wait_fixed=minutes(2))
-def _test_cw_agent_metrics(cw_client, headnode_instance_id, compute_instance_id):
+def _test_metrics(cw_client, headnode_instance_id, compute_instance_id, cluster_name, scheduler):
     # query for the past 20 minutes
     start_timestamp, end_timestamp = _get_start_end_timestamp(minutes=20)
 
-    # test memory and disk metrics are collected for the head node
-    metrics_response_headnode = _get_metric_data(headnode_instance_id, cw_client, start_timestamp, end_timestamp)
+    # test memory, disk, and clustermgtd heartbeat metrics are collected for the head node
+    logging.info(f"Retrieving head node metrics from {start_timestamp} to {end_timestamp}")
+    metrics_response_headnode = _get_metric_data(
+        cw_client, start_timestamp, end_timestamp, instance_id=headnode_instance_id, cluster_name=cluster_name
+    )
+    logging.info("Head node metrics retrieved")
     mem_values = _get_metric_data_values(metrics_response_headnode, "mem")
     disk_values = _get_metric_data_values(metrics_response_headnode, "disk")
+    clustermgtd_heartbeat_values = _get_metric_data_values(metrics_response_headnode, "clustermgtd_heartbeat")
     assert_that(mem_values).is_not_empty()
     assert_that(disk_values).is_not_empty()
 
+    if scheduler == "slurm":
+        assert_that(clustermgtd_heartbeat_values).is_not_empty()
+    else:
+        assert_that(clustermgtd_heartbeat_values).is_empty()
+
     # wait for additional 1 minute to reduce the chance of false negative result for compute nodes
-    time.sleep(60)
+    sleep_seconds = 60
+    logging.info(f"Waiting {sleep_seconds} seconds for compute node metrics")
+    time.sleep(sleep_seconds)
+
     # test memory and disk metrics are not collected for compute nodes
-    metrics_response_compute = _get_metric_data(compute_instance_id, cw_client, start_timestamp, end_timestamp)
+    metrics_response_compute = _get_metric_data(cw_client, start_timestamp, end_timestamp, compute_instance_id)
     mem_values = _get_metric_data_values(metrics_response_compute, "mem")
     disk_values = _get_metric_data_values(metrics_response_compute, "disk")
     assert_that(mem_values).is_empty()
@@ -87,6 +103,8 @@ def _test_cw_agent_metrics(cw_client, headnode_instance_id, compute_instance_id)
 
 
 def _test_dashboard(cw_client, cluster_name, region, dashboard_enabled, cw_log_enabled):
+    # TODO: This assertion can be removed because the content of cluster dashboard is covered by unit tests.
+    #       At least let's not expand this assertion with more conditions.
     dashboard_name = "{0}-{1}".format(cluster_name, region)
     if dashboard_enabled:
         dashboard_response = cw_client.get_dashboard(DashboardName=dashboard_name)
@@ -108,6 +126,8 @@ def _test_dashboard(cw_client, cluster_name, region, dashboard_enabled, cw_log_e
 
 
 def _test_alarms(cw_client, cluster_name, headnode_instance_id, alarms_enabled):
+    # TODO: This assertion can be removed because the settings of cluster alarms are covered by unit tests.
+    #       At least let's not expand this assertion with more conditions.
     alarm_response = cw_client.describe_alarms(AlarmNamePrefix=cluster_name)
     if alarms_enabled:
         health_alarm_name = f"{cluster_name}-HeadNode-Health"
@@ -144,49 +164,71 @@ def _get_start_end_timestamp(minutes):
     return start_timestamp, end_timestamp_ceil
 
 
-def _get_metric_data(instance_id, cw_client, start_timestamp, end_timestamp):
-    metrics_response = cw_client.get_metric_data(
-        MetricDataQueries=[
-            {
-                "Id": "mem",
-                "MetricStat": {
-                    "Metric": {
-                        "Namespace": "CWAgent",
-                        "MetricName": "mem_used_percent",
-                        "Dimensions": [
-                            {
-                                "Name": "InstanceId",
-                                "Value": instance_id,
-                            }
-                        ],
-                    },
-                    "Period": 60,
-                    "Stat": "Maximum",
+def _get_metric_data(cw_client, start_timestamp, end_timestamp, instance_id, cluster_name=None):
+    """
+    Query CloudWatch metrics.
+
+    Args:
+        cw_client: CloudWatch client
+        start_timestamp: Start time for the query
+        end_timestamp: End time for the query
+        instance_id: EC2 instance ID for CWAgent metrics
+        cluster_name: Cluster name for ParallelCluster metrics (optional)
+    """
+    queries = [
+        {
+            "Id": "mem",
+            "MetricStat": {
+                "Metric": {
+                    "Namespace": "CWAgent",
+                    "MetricName": "mem_used_percent",
+                    "Dimensions": [{"Name": "InstanceId", "Value": instance_id}],
+                },
+                "Period": 60,
+                "Stat": "Maximum",
+            },
+        },
+        {
+            "Id": "disk",
+            "MetricStat": {
+                "Metric": {
+                    "Namespace": "CWAgent",
+                    "MetricName": "disk_used_percent",
+                    "Dimensions": [
+                        {"Name": "InstanceId", "Value": instance_id},
+                        {"Name": "path", "Value": "/"},
+                    ],
                 },
+                "Period": 60,
+                "Stat": "Maximum",
             },
+        },
+    ]
+
+    if cluster_name:
+        queries.append(
             {
-                "Id": "disk",
+                "Id": "clustermgtd_heartbeat",
                 "MetricStat": {
                     "Metric": {
-                        "Namespace": "CWAgent",
-                        "MetricName": "disk_used_percent",
+                        "Namespace": "ParallelCluster",
+                        "MetricName": "ClustermgtdHeartbeat",
                         "Dimensions": [
-                            {
-                                "Name": "InstanceId",
-                                "Value": instance_id,
-                            },
-                            {"Name": "path", "Value": "/"},
+                            {"Name": "ClusterName", "Value": cluster_name},
+                            {"Name": "InstanceId", "Value": instance_id},
                         ],
                     },
                     "Period": 60,
-                    "Stat": "Maximum",
+                    "Stat": "Sum",
                 },
-            },
-        ],
+            }
+        )
+
+    return cw_client.get_metric_data(
+        MetricDataQueries=queries,
         StartTime=start_timestamp,
         EndTime=end_timestamp,
     )
-    return metrics_response
 
 
 def _get_metric_data_values(response, query_id):