From 89f4a0a5a65a79376620ce01a784ff5b90a5592b Mon Sep 17 00:00:00 2001 From: jamesthompson26-nhs Date: Mon, 2 Jun 2025 11:34:58 +0100 Subject: [PATCH 1/2] CCM-10257: Implement Eventpub in Core --- infrastructure/modules/eventpub/README.md | 2 ++ .../cloudwatch_metric_alarm_dlq_alarm.tf | 2 +- .../cloudwatch_metric_alarm_lambda_errors.tf | 17 ++++++++++++++++ ...atch_metric_alarm_sns_delivery_failures.tf | 16 +++++++++++++++ .../eventpub/iam_role_firehose_role.tf | 5 +++-- .../modules/eventpub/iam_role_lambda.tf | 5 +++-- .../modules/eventpub/iam_role_sns.tf | 5 +++-- .../eventpub/iam_role_sns_delivery_logging.tf | 5 +++-- infrastructure/modules/eventpub/variables.tf | 20 +++++++++++++++++++ 9 files changed, 68 insertions(+), 9 deletions(-) create mode 100644 infrastructure/modules/eventpub/cloudwatch_metric_alarm_lambda_errors.tf create mode 100644 infrastructure/modules/eventpub/cloudwatch_metric_alarm_sns_delivery_failures.tf diff --git a/infrastructure/modules/eventpub/README.md b/infrastructure/modules/eventpub/README.md index 8fd0135..156004a 100644 --- a/infrastructure/modules/eventpub/README.md +++ b/infrastructure/modules/eventpub/README.md @@ -11,6 +11,7 @@ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| +| [alarm\_prefixes](#input\_alarm\_prefixes) | Object containing prefixes for alarm descriptions, e.g. 'RELIABILITY:', 'SECURITY:', 'PERFORMANCE:' |
object({
dlq = string
sns_delivery = string
lambda = string
})
|
{
"dlq": null,
"lambda": null,
"sns_delivery": null
}
| no | | [aws\_account\_id](#input\_aws\_account\_id) | The AWS Account ID (numeric) | `string` | n/a | yes | | [component](#input\_component) | The name of the terraformscaffold component calling this module | `string` | n/a | yes | | [control\_plane\_bus\_arn](#input\_control\_plane\_bus\_arn) | Data plane event bus arn | `string` | n/a | yes | @@ -22,6 +23,7 @@ | [event\_cache\_buffer\_interval](#input\_event\_cache\_buffer\_interval) | The buffer interval for data firehose | `number` | `500` | no | | [event\_cache\_expiry\_days](#input\_event\_cache\_expiry\_days) | s3 archiving expiry in days | `number` | `30` | no | | [group](#input\_group) | The name of the tfscaffold group | `string` | `null` | no | +| [iam\_permissions\_boundary\_arn](#input\_iam\_permissions\_boundary\_arn) | The ARN of the permissions boundary to use for the IAM role | `string` | `null` | no | | [kms\_key\_arn](#input\_kms\_key\_arn) | KMS key arn to use for this function | `string` | n/a | yes | | [log\_level](#input\_log\_level) | The log level to be used in lambda functions within the component. Any log with a lower severity than the configured value will not be logged: https://docs.python.org/3/library/logging.html#levels | `string` | `"WARN"` | no | | [log\_retention\_in\_days](#input\_log\_retention\_in\_days) | The retention period in days for the Cloudwatch Logs events generated by the lambda function | `number` | n/a | yes | diff --git a/infrastructure/modules/eventpub/cloudwatch_metric_alarm_dlq_alarm.tf b/infrastructure/modules/eventpub/cloudwatch_metric_alarm_dlq_alarm.tf index ca4e814..3a2f193 100644 --- a/infrastructure/modules/eventpub/cloudwatch_metric_alarm_dlq_alarm.tf +++ b/infrastructure/modules/eventpub/cloudwatch_metric_alarm_dlq_alarm.tf @@ -1,6 +1,6 @@ resource "aws_cloudwatch_metric_alarm" "dlq_alarm" { alarm_name = "${local.csi}-dlq-messages-alarm" - alarm_description = "Alarm for messages in the DLQ" + alarm_description = "${var.alarm_prefixes.dlq} Alarm for messages in the DLQ" comparison_operator = "GreaterThanThreshold" evaluation_periods = 1 metric_name = "ApproximateNumberOfMessagesVisible" diff --git a/infrastructure/modules/eventpub/cloudwatch_metric_alarm_lambda_errors.tf b/infrastructure/modules/eventpub/cloudwatch_metric_alarm_lambda_errors.tf new file mode 100644 index 0000000..9140733 --- /dev/null +++ b/infrastructure/modules/eventpub/cloudwatch_metric_alarm_lambda_errors.tf @@ -0,0 +1,17 @@ +resource "aws_cloudwatch_metric_alarm" "lambda_errors" { + alarm_name = "${local.csi}-lambda-errors-alarm" + alarm_description = "${var.alarm_prefixes.lambda} Alarm for Lambda function errors" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = 1 + metric_name = "Errors" + namespace = "AWS/Lambda" + period = 300 + statistic = "Sum" + threshold = 1 + actions_enabled = true + treat_missing_data = "notBreaching" + + dimensions = { + FunctionName = aws_lambda_function.main.function_name + } +} diff --git a/infrastructure/modules/eventpub/cloudwatch_metric_alarm_sns_delivery_failures.tf b/infrastructure/modules/eventpub/cloudwatch_metric_alarm_sns_delivery_failures.tf new file mode 100644 index 0000000..93c9a1e --- /dev/null +++ b/infrastructure/modules/eventpub/cloudwatch_metric_alarm_sns_delivery_failures.tf @@ -0,0 +1,16 @@ +resource "aws_cloudwatch_metric_alarm" "sns_delivery_failures" { + alarm_name = "${local.csi}-sns-delivery-failures" + alarm_description = "${var.alarm_prefixes.sns_delivery} Alarm when SNS topic ${aws_sns_topic.main.name} has delivery failures" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = 1 + metric_name = "NumberOfNotificationsFailed" + namespace = "AWS/SNS" + period = 300 + statistic = "Sum" + threshold = 0 + treat_missing_data = "notBreaching" + + dimensions = { + TopicName = aws_sns_topic.main.name + } +} diff --git a/infrastructure/modules/eventpub/iam_role_firehose_role.tf b/infrastructure/modules/eventpub/iam_role_firehose_role.tf index 20414cc..2775280 100644 --- a/infrastructure/modules/eventpub/iam_role_firehose_role.tf +++ b/infrastructure/modules/eventpub/iam_role_firehose_role.tf @@ -1,8 +1,9 @@ resource "aws_iam_role" "firehose_role" { count = var.enable_event_cache ? 1 : 0 - name = "${local.csi}-firehose-role" - assume_role_policy = data.aws_iam_policy_document.firehose_assume_role[0].json + name = "${local.csi}-firehose-role" + assume_role_policy = data.aws_iam_policy_document.firehose_assume_role[0].json + permissions_boundary = var.iam_permissions_boundary_arn != null ? var.iam_permissions_boundary_arn : null } data "aws_iam_policy_document" "firehose_assume_role" { diff --git a/infrastructure/modules/eventpub/iam_role_lambda.tf b/infrastructure/modules/eventpub/iam_role_lambda.tf index 3d83e9a..d53bd25 100644 --- a/infrastructure/modules/eventpub/iam_role_lambda.tf +++ b/infrastructure/modules/eventpub/iam_role_lambda.tf @@ -1,6 +1,7 @@ resource "aws_iam_role" "lambda" { - name = local.csi - assume_role_policy = data.aws_iam_policy_document.lambda_assumerole.json + name = local.csi + assume_role_policy = data.aws_iam_policy_document.lambda_assumerole.json + permissions_boundary = var.iam_permissions_boundary_arn != null ? var.iam_permissions_boundary_arn : null } resource "aws_iam_policy" "lambda" { diff --git a/infrastructure/modules/eventpub/iam_role_sns.tf b/infrastructure/modules/eventpub/iam_role_sns.tf index 97bdc99..fa48a90 100644 --- a/infrastructure/modules/eventpub/iam_role_sns.tf +++ b/infrastructure/modules/eventpub/iam_role_sns.tf @@ -1,6 +1,7 @@ resource "aws_iam_role" "sns_role" { - name = "${local.csi}-sns-role" - assume_role_policy = data.aws_iam_policy_document.sns_assume_role.json + name = "${local.csi}-sns-role" + assume_role_policy = data.aws_iam_policy_document.sns_assume_role.json + permissions_boundary = var.iam_permissions_boundary_arn != null ? var.iam_permissions_boundary_arn : null } resource "aws_iam_policy" "firehose_delivery" { diff --git a/infrastructure/modules/eventpub/iam_role_sns_delivery_logging.tf b/infrastructure/modules/eventpub/iam_role_sns_delivery_logging.tf index a952bfe..313379e 100644 --- a/infrastructure/modules/eventpub/iam_role_sns_delivery_logging.tf +++ b/infrastructure/modules/eventpub/iam_role_sns_delivery_logging.tf @@ -1,8 +1,9 @@ resource "aws_iam_role" "sns_delivery_logging_role" { count = var.enable_sns_delivery_logging ? 1 : 0 - name = "${local.csi}-sns-delivery-logging" - assume_role_policy = data.aws_iam_policy_document.sns_delivery_logging_assume_role[0].json + name = "${local.csi}-sns-delivery-logging" + assume_role_policy = data.aws_iam_policy_document.sns_delivery_logging_assume_role[0].json + permissions_boundary = var.iam_permissions_boundary_arn != null ? var.iam_permissions_boundary_arn : null } data "aws_iam_policy_document" "sns_delivery_logging_assume_role" { diff --git a/infrastructure/modules/eventpub/variables.tf b/infrastructure/modules/eventpub/variables.tf index 7e81658..2df4d1d 100644 --- a/infrastructure/modules/eventpub/variables.tf +++ b/infrastructure/modules/eventpub/variables.tf @@ -108,3 +108,23 @@ variable "control_plane_bus_arn" { type = string description = "Data plane event bus arn" } + +variable "iam_permissions_boundary_arn" { + type = string + description = "The ARN of the permissions boundary to use for the IAM role" + default = null +} + +variable "alarm_prefixes" { + type = object({ + dlq = string + sns_delivery = string + lambda = string + }) + description = "Object containing prefixes for alarm descriptions, e.g. 'RELIABILITY:', 'SECURITY:', 'PERFORMANCE:'" + default = { + dlq = null + sns_delivery = null + lambda = null + } +} From 521ad7707c7def42d768584c9892efcdee019255 Mon Sep 17 00:00:00 2001 From: jamesthompson26-nhs Date: Mon, 9 Jun 2025 11:22:59 +0100 Subject: [PATCH 2/2] CCM-10257: Implement Eventpub in Core --- infrastructure/modules/eventpub/README.md | 1 - .../eventpub/cloudwatch_metric_alarm_dlq_alarm.tf | 2 +- .../cloudwatch_metric_alarm_lambda_errors.tf | 2 +- ...loudwatch_metric_alarm_sns_delivery_failures.tf | 2 +- infrastructure/modules/eventpub/variables.tf | 14 -------------- 5 files changed, 3 insertions(+), 18 deletions(-) diff --git a/infrastructure/modules/eventpub/README.md b/infrastructure/modules/eventpub/README.md index 156004a..c19e86b 100644 --- a/infrastructure/modules/eventpub/README.md +++ b/infrastructure/modules/eventpub/README.md @@ -11,7 +11,6 @@ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [alarm\_prefixes](#input\_alarm\_prefixes) | Object containing prefixes for alarm descriptions, e.g. 'RELIABILITY:', 'SECURITY:', 'PERFORMANCE:' |
object({
dlq = string
sns_delivery = string
lambda = string
})
|
{
"dlq": null,
"lambda": null,
"sns_delivery": null
}
| no | | [aws\_account\_id](#input\_aws\_account\_id) | The AWS Account ID (numeric) | `string` | n/a | yes | | [component](#input\_component) | The name of the terraformscaffold component calling this module | `string` | n/a | yes | | [control\_plane\_bus\_arn](#input\_control\_plane\_bus\_arn) | Data plane event bus arn | `string` | n/a | yes | diff --git a/infrastructure/modules/eventpub/cloudwatch_metric_alarm_dlq_alarm.tf b/infrastructure/modules/eventpub/cloudwatch_metric_alarm_dlq_alarm.tf index 3a2f193..eae6bc4 100644 --- a/infrastructure/modules/eventpub/cloudwatch_metric_alarm_dlq_alarm.tf +++ b/infrastructure/modules/eventpub/cloudwatch_metric_alarm_dlq_alarm.tf @@ -1,6 +1,6 @@ resource "aws_cloudwatch_metric_alarm" "dlq_alarm" { alarm_name = "${local.csi}-dlq-messages-alarm" - alarm_description = "${var.alarm_prefixes.dlq} Alarm for messages in the DLQ" + alarm_description = "RELIABILITY: Alarm for messages in the DLQ" comparison_operator = "GreaterThanThreshold" evaluation_periods = 1 metric_name = "ApproximateNumberOfMessagesVisible" diff --git a/infrastructure/modules/eventpub/cloudwatch_metric_alarm_lambda_errors.tf b/infrastructure/modules/eventpub/cloudwatch_metric_alarm_lambda_errors.tf index 9140733..56acc98 100644 --- a/infrastructure/modules/eventpub/cloudwatch_metric_alarm_lambda_errors.tf +++ b/infrastructure/modules/eventpub/cloudwatch_metric_alarm_lambda_errors.tf @@ -1,6 +1,6 @@ resource "aws_cloudwatch_metric_alarm" "lambda_errors" { alarm_name = "${local.csi}-lambda-errors-alarm" - alarm_description = "${var.alarm_prefixes.lambda} Alarm for Lambda function errors" + alarm_description = "RELIABILITY: Alarm for Lambda function errors" comparison_operator = "GreaterThanOrEqualToThreshold" evaluation_periods = 1 metric_name = "Errors" diff --git a/infrastructure/modules/eventpub/cloudwatch_metric_alarm_sns_delivery_failures.tf b/infrastructure/modules/eventpub/cloudwatch_metric_alarm_sns_delivery_failures.tf index 93c9a1e..e8ef124 100644 --- a/infrastructure/modules/eventpub/cloudwatch_metric_alarm_sns_delivery_failures.tf +++ b/infrastructure/modules/eventpub/cloudwatch_metric_alarm_sns_delivery_failures.tf @@ -1,6 +1,6 @@ resource "aws_cloudwatch_metric_alarm" "sns_delivery_failures" { alarm_name = "${local.csi}-sns-delivery-failures" - alarm_description = "${var.alarm_prefixes.sns_delivery} Alarm when SNS topic ${aws_sns_topic.main.name} has delivery failures" + alarm_description = "RELIABILITY: Alarm for SNS topic delivery failures" comparison_operator = "GreaterThanThreshold" evaluation_periods = 1 metric_name = "NumberOfNotificationsFailed" diff --git a/infrastructure/modules/eventpub/variables.tf b/infrastructure/modules/eventpub/variables.tf index 2df4d1d..e19c331 100644 --- a/infrastructure/modules/eventpub/variables.tf +++ b/infrastructure/modules/eventpub/variables.tf @@ -114,17 +114,3 @@ variable "iam_permissions_boundary_arn" { description = "The ARN of the permissions boundary to use for the IAM role" default = null } - -variable "alarm_prefixes" { - type = object({ - dlq = string - sns_delivery = string - lambda = string - }) - description = "Object containing prefixes for alarm descriptions, e.g. 'RELIABILITY:', 'SECURITY:', 'PERFORMANCE:'" - default = { - dlq = null - sns_delivery = null - lambda = null - } -}