diff --git a/infrastructure/terraform/modules/eventpub/README.md b/infrastructure/terraform/modules/eventpub/README.md
index 4be7358..66d08ca 100644
--- a/infrastructure/terraform/modules/eventpub/README.md
+++ b/infrastructure/terraform/modules/eventpub/README.md
@@ -18,11 +18,15 @@
| [data\_plane\_bus\_arn](#input\_data\_plane\_bus\_arn) | Data plane event bus arn | `string` | n/a | yes |
| [default\_tags](#input\_default\_tags) | Default tag map for application to all taggable resources in the module | `map(string)` | `{}` | no |
| [enable\_event\_cache](#input\_enable\_event\_cache) | Enable caching of events to an S3 bucket | `bool` | `false` | no |
+| [enable\_event\_publishing\_anomaly\_detection](#input\_enable\_event\_publishing\_anomaly\_detection) | Enable CloudWatch anomaly detection alarm for SNS message publishing. Detects abnormal drops or spikes in event publishing volume. | `bool` | `true` | no |
| [enable\_firehose\_raw\_message\_delivery](#input\_enable\_firehose\_raw\_message\_delivery) | Enables raw message delivery on firehose subscription | `bool` | `false` | no |
| [enable\_sns\_delivery\_logging](#input\_enable\_sns\_delivery\_logging) | Enable SNS Delivery Failure Notifications | `bool` | `false` | no |
| [environment](#input\_environment) | The name of the terraformscaffold environment the module is called for | `string` | n/a | yes |
| [event\_cache\_buffer\_interval](#input\_event\_cache\_buffer\_interval) | The buffer interval for data firehose | `number` | `500` | no |
| [event\_cache\_expiry\_days](#input\_event\_cache\_expiry\_days) | s3 archiving expiry in days | `number` | `30` | no |
+| [event\_publishing\_anomaly\_band\_width](#input\_event\_publishing\_anomaly\_band\_width) | The width of the anomaly detection band. Higher values (e.g. 4-6) reduce sensitivity and noise, lower values (e.g. 2-3) increase sensitivity. Recommended: 2-4. | `number` | `5` | no |
+| [event\_publishing\_anomaly\_evaluation\_periods](#input\_event\_publishing\_anomaly\_evaluation\_periods) | Number of evaluation periods for the publishing anomaly alarm. Each period is defined by event\_publishing\_anomaly\_period. | `number` | `3` | no |
+| [event\_publishing\_anomaly\_period](#input\_event\_publishing\_anomaly\_period) | The period in seconds over which the specified statistic is applied for anomaly detection. Minimum 300 seconds (5 minutes). Recommended: 300-600. | `number` | `300` | no |
| [force\_destroy](#input\_force\_destroy) | When enabled will force destroy event-cache S3 bucket | `bool` | `false` | no |
| [group](#input\_group) | The name of the tfscaffold group | `string` | `null` | no |
| [iam\_permissions\_boundary\_arn](#input\_iam\_permissions\_boundary\_arn) | The ARN of the permissions boundary to use for the IAM role | `string` | `null` | no |
@@ -42,6 +46,7 @@
| Name | Description |
|------|-------------|
+| [publishing\_anomaly\_alarm](#output\_publishing\_anomaly\_alarm) | CloudWatch anomaly detection alarm details for SNS publishing |
| [s3\_bucket\_event\_cache](#output\_s3\_bucket\_event\_cache) | S3 Bucket ARN and Name for event cache |
| [sns\_topic](#output\_sns\_topic) | SNS Topic ARN and Name |
diff --git a/infrastructure/terraform/modules/eventpub/cloudwatch_metric_alarm_publishing_anomaly.tf b/infrastructure/terraform/modules/eventpub/cloudwatch_metric_alarm_publishing_anomaly.tf
new file mode 100644
index 0000000..18d81ea
--- /dev/null
+++ b/infrastructure/terraform/modules/eventpub/cloudwatch_metric_alarm_publishing_anomaly.tf
@@ -0,0 +1,42 @@
+resource "aws_cloudwatch_metric_alarm" "publishing_anomaly" {
+ count = var.enable_event_publishing_anomaly_detection ? 1 : 0
+
+ alarm_name = "${local.csi}-sns-publishing-anomaly"
+ alarm_description = "RELIABILITY: Anomaly detection alarm for abnormal SNS message publishing patterns. Detects unexpected drops or spikes in event publishing volume that may indicate service degradation or misconfiguration."
+ comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold"
+ evaluation_periods = var.event_publishing_anomaly_evaluation_periods # Number of evaluation periods for the publishing anomaly alarm.
+ threshold_metric_id = "ad1"
+ treat_missing_data = "notBreaching"
+ actions_enabled = true
+
+ tags = merge(
+ local.default_tags,
+ {
+ AlarmType = "AnomalyDetection"
+ AlarmPurpose = "EventPublishingAbnormality"
+ }
+ )
+
+ metric_query {
+ id = "m1"
+ return_data = true
+
+ metric {
+ metric_name = "NumberOfMessagesPublished"
+ namespace = "AWS/SNS"
+ period = var.event_publishing_anomaly_period # The period in seconds over which the specified statistic is applied for anomaly detection.
+ stat = "Sum"
+
+ dimensions = {
+ TopicName = aws_sns_topic.main.name
+ }
+ }
+ }
+
+ metric_query {
+ id = "ad1"
+ expression = "ANOMALY_DETECTION_BAND(m1, ${var.event_publishing_anomaly_band_width})" # The width of the anomaly detection band. Higher values (e.g. 4-6) reduce sensitivity and noise, lower values (e.g. 2-3) increase sensitivity.
+ label = "NumberOfMessagesPublished (expected)"
+ return_data = true
+ }
+}
diff --git a/infrastructure/terraform/modules/eventpub/outputs.tf b/infrastructure/terraform/modules/eventpub/outputs.tf
index e2ff3b3..cbba9df 100644
--- a/infrastructure/terraform/modules/eventpub/outputs.tf
+++ b/infrastructure/terraform/modules/eventpub/outputs.tf
@@ -13,3 +13,11 @@ output "s3_bucket_event_cache" {
bucket = module.s3bucket_event_cache[0].bucket
} : {}
}
+
+output "publishing_anomaly_alarm" {
+ description = "CloudWatch anomaly detection alarm details for SNS publishing"
+ value = var.enable_event_publishing_anomaly_detection ? {
+ arn = aws_cloudwatch_metric_alarm.publishing_anomaly[0].arn
+ name = aws_cloudwatch_metric_alarm.publishing_anomaly[0].alarm_name
+ } : null
+}
diff --git a/infrastructure/terraform/modules/eventpub/variables.tf b/infrastructure/terraform/modules/eventpub/variables.tf
index 41141f9..7bdaa30 100644
--- a/infrastructure/terraform/modules/eventpub/variables.tf
+++ b/infrastructure/terraform/modules/eventpub/variables.tf
@@ -129,3 +129,27 @@ variable "additional_policies_for_event_cache_bucket" {
description = "A list of JSON policies to use to build the bucket policy"
default = []
}
+
+variable "enable_event_publishing_anomaly_detection" {
+ type = bool
+ description = "Enable CloudWatch anomaly detection alarm for SNS message publishing. Detects abnormal drops or spikes in event publishing volume."
+ default = true
+}
+
+variable "event_publishing_anomaly_evaluation_periods" {
+ type = number
+ description = "Number of evaluation periods for the publishing anomaly alarm. Each period is defined by event_publishing_anomaly_period."
+ default = 3
+}
+
+variable "event_publishing_anomaly_period" {
+ type = number
+ description = "The period in seconds over which the specified statistic is applied for anomaly detection. Minimum 300 seconds (5 minutes). Recommended: 300-600."
+ default = 300
+}
+
+variable "event_publishing_anomaly_band_width" {
+ type = number
+ description = "The width of the anomaly detection band. Higher values (e.g. 4-6) reduce sensitivity and noise, lower values (e.g. 2-3) increase sensitivity. Recommended: 2-4."
+ default = 5
+}