diff --git a/terraform/core/32-kafka-event-streaming.tf b/terraform/core/32-kafka-event-streaming.tf deleted file mode 100644 index 48d533b53..000000000 --- a/terraform/core/32-kafka-event-streaming.tf +++ /dev/null @@ -1,55 +0,0 @@ -locals { - kafka_event_streaming_count = local.is_live_environment ? 1 : 0 - deploy_kafka_test_lambda = local.kafka_event_streaming_count > 0 && !local.is_production_environment -} - -module "kafka_event_streaming" { - count = local.kafka_event_streaming_count - source = "../modules/kafka-event-streaming" - tags = module.tags.values - environment = var.environment - project = var.project - - glue_iam_role = aws_iam_role.glue_role.name - glue_database_name = aws_glue_catalog_database.landing_zone_catalog_database.name - is_live_environment = local.is_live_environment - identifier_prefix = local.identifier_prefix - short_identifier_prefix = local.short_identifier_prefix - vpc_id = data.aws_vpc.network.id - subnet_ids = data.aws_subnets.network.ids - s3_bucket_to_write_to = module.raw_zone - bastion_private_key_ssm_parameter_name = aws_ssm_parameter.bastion_key.name - bastion_instance_id = aws_instance.bastion.id - role_arns_to_share_access_with = "" - cross_account_lambda_roles = [ - "arn:aws:iam::937934410339:role/mtfh-reporting-data-listener/development/mtfh-reporting-data-listener-lambdaExecutionRole", - "arn:aws:iam::364864573329:role/mtfh-reporting-data-listener/development/mtfh-reporting-data-listener-lambdaExecutionRole" - ] - - datahub_actions_security_group_id = module.datahub[0].datahub_actions_security_group_id - datahub_gms_security_group_id = module.datahub[0].datahub_gms_service_security_group_id - datahub_mae_consumer_security_group_id = module.datahub[0].datahub_mae_security_group_id - datahub_mce_consumer_security_group_id = module.datahub[0].datahub_mce_security_group_id - datahub_kafka_setup_security_group_id = module.datahub[0].datahub_kafka_setup_security_group_id - kafka_tester_lambda_security_group_id = lower(var.environment) != "prod" ? module.kafka_test_lambda[0].security_group_id : "" -} - -module "kafka_test_lambda" { - count = local.deploy_kafka_test_lambda ? 1 : 0 - source = "../modules/kafka-test-lambda" - lambda_name = "kafka-test" - tags = module.tags.values - vpc_id = data.aws_vpc.network.id - subnet_ids = data.aws_subnets.network.ids - identifier_prefix = local.short_identifier_prefix - lambda_artefact_storage_bucket = module.lambda_artefact_storage.bucket_id - kafka_cluster_arn = module.kafka_event_streaming[0].cluster_config.cluster_arn - kafka_cluster_kms_key_arn = module.kafka_event_streaming[0].cluster_config.kms_key_arn - kafka_cluster_name = module.kafka_event_streaming[0].cluster_config.cluster_name - kafka_security_group_id = module.kafka_event_streaming[0].cluster_config.vpc_security_groups - lambda_environment_variables = { - "TARGET_KAFKA_BROKERS" = module.kafka_event_streaming[0].cluster_config.bootstrap_brokers_tls - "SCHEMA_REGISTRY_URL" = module.kafka_event_streaming[0].schema_registry_url - - } -} \ No newline at end of file diff --git a/terraform/core/37-datahub.tf b/terraform/core/37-datahub.tf deleted file mode 100644 index db935924d..000000000 --- a/terraform/core/37-datahub.tf +++ /dev/null @@ -1,19 +0,0 @@ -module "datahub" { - count = local.is_live_environment ? 1 : 0 - - source = "../modules/datahub" - tags = module.tags.values - short_identifier_prefix = local.short_identifier_prefix - identifier_prefix = local.identifier_prefix - vpc_id = data.aws_vpc.network.id - vpc_subnet_ids = local.subnet_ids_list - is_live_environment = local.is_live_environment - datahub_url = var.datahub_url - kafka_properties = { - kafka_zookeeper_connect = module.kafka_event_streaming[0].cluster_config.zookeeper_connect_string - kafka_bootstrap_server = module.kafka_event_streaming[0].cluster_config.bootstrap_brokers_tls - } - schema_registry_properties = { - schema_registry_url = module.kafka_event_streaming[0].schema_registry_url - } -} diff --git a/terraform/modules/datahub/00-init.tf b/terraform/modules/datahub/00-init.tf deleted file mode 100644 index 71b8dc4a7..000000000 --- a/terraform/modules/datahub/00-init.tf +++ /dev/null @@ -1,14 +0,0 @@ -/* This defines the configuration of Terraform and AWS required Terraform Providers. - As this is a module, we don't have any explicity Provider blocks declared, as these - will be inherited from the parent Terraform. -*/ -terraform { - required_version = "~> 1.0" - - required_providers { - aws = { - source = "hashicorp/aws" - version = ">=4.0, <6.0" - } - } -} diff --git a/terraform/modules/datahub/01-inputs-required.tf b/terraform/modules/datahub/01-inputs-required.tf deleted file mode 100644 index 46394bd36..000000000 --- a/terraform/modules/datahub/01-inputs-required.tf +++ /dev/null @@ -1,49 +0,0 @@ -variable "tags" { - description = "AWS tags" - type = map(string) -} - -variable "short_identifier_prefix" { - description = "Project wide resource short identifier prefix" - type = string -} - -variable "identifier_prefix" { - description = "Project wide resource identifier prefix" - type = string -} - -variable "vpc_id" { - description = "VPC ID to deploy the datahub containers into" - type = string -} - -variable "vpc_subnet_ids" { - description = "A list of VPC Subnet IDs the server could be deployed in" - type = list(string) -} - -variable "kafka_properties" { - description = "Properties of the kafka data source DataHub will connect to" - type = object({ - kafka_zookeeper_connect = string - kafka_bootstrap_server = string - }) -} - -variable "schema_registry_properties" { - description = "Properties of the schema registry data source DataHub will connect to" - type = object({ - schema_registry_url = string - }) -} - -variable "is_live_environment" { - description = "A flag indicting if we are running in a live environment for setting up automation" - type = bool -} - -variable "datahub_url" { - description = "Datahub URL" - type = string -} diff --git a/terraform/modules/datahub/02-inputs-optional.tf b/terraform/modules/datahub/02-inputs-optional.tf deleted file mode 100644 index 1bdcc2ba0..000000000 --- a/terraform/modules/datahub/02-inputs-optional.tf +++ /dev/null @@ -1,9 +0,0 @@ -variable "enable_load_balancer" { - type = bool - default = false -} - -variable "hub_firewall_ips" { - type = list(string) - default = ["192.168.20.0/28", "192.168.21.0/28"] -} \ No newline at end of file diff --git a/terraform/modules/datahub/03-inputs-derived.tf b/terraform/modules/datahub/03-inputs-derived.tf deleted file mode 100644 index 932b4841b..000000000 --- a/terraform/modules/datahub/03-inputs-derived.tf +++ /dev/null @@ -1,7 +0,0 @@ -data "aws_vpc" "vpc" { - id = var.vpc_id -} - -data "aws_region" "current" {} - -data "aws_caller_identity" "current" {} diff --git a/terraform/modules/datahub/03-locals.tf b/terraform/modules/datahub/03-locals.tf deleted file mode 100644 index f374bc750..000000000 --- a/terraform/modules/datahub/03-locals.tf +++ /dev/null @@ -1,237 +0,0 @@ -locals { - datahub_version = "v0.8.32" - datahub_frontend_react = { - container_name = "datahub-frontend-react" - image_name = "linkedin/datahub-frontend-react" - image_tag = local.datahub_version - port = 9002 - cpu = 2048 - memory = 8192 - load_balancer_required = true - standalone_onetime_task = false - environment_variables = [ - { name : "PORT", value : "9002" }, - { name : "DATAHUB_GMS_HOST", value : aws_alb.datahub_gms.dns_name }, - { name : "DATAHUB_GMS_PORT", value : "8080" }, - { name : "DATAHUB_APP_VERSION", value : "1.0" }, - { name : "DATAHUB_PLAY_MEM_BUFFER_SIZE", value : "10MB" }, - { name : "JAVA_OPTS", value : "-Xms8g -Xmx8g -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml -Dlogback.debug=false -Dpidfile.path=/dev/null" }, - { name : "KAFKA_BOOTSTRAP_SERVER", value : var.kafka_properties.kafka_bootstrap_server }, - { name : "SPRING_KAFKA_PROPERTIES_SECURITY_PROTOCOL", value : "SSL" }, - { name : "DATAHUB_TRACKING_TOPIC", value : "DataHubUsageEvent_v1" }, - { name : "ELASTIC_CLIENT_HOST", value : aws_elasticsearch_domain.es.endpoint }, - { name : "ELASTIC_CLIENT_PORT", value : "443" }, - { name : "AUTH_OIDC_ENABLED", value : var.is_live_environment }, - { name : "AUTH_OIDC_DISCOVERY_URI", value : "https://accounts.google.com/.well-known/openid-configuration" }, - { name : "AUTH_OIDC_BASE_URL", value : var.datahub_url }, - { name : "AUTH_OIDC_SCOPE", value : "openid email" }, - { name : "AUTH_OIDC_USER_NAME_CLAIM", value : "email" }, - { name : "AUTH_OIDC_USER_NAME_CLAIM_REGEX", value : "([^@]+)" }, - { name : "DATAHUB_ANALYTICS_ENABLED", value : "false" }, - { name : "AUTH_JAAS_ENABLED", value : "true" } - ] - secrets = [ - { name : "DATAHUB_SECRET", valueFrom : aws_ssm_parameter.datahub_password.arn }, - { name : "AUTH_OIDC_CLIENT_ID", valueFrom : data.aws_ssm_parameter.datahub_google_client_id.arn }, - { name : "AUTH_OIDC_CLIENT_SECRET", valueFrom : data.aws_ssm_parameter.datahub_google_client_secret.arn }, - ] - port_mappings = [ - { containerPort : 9002, hostPort : 9002, protocol : "tcp" } - ] - mount_points = [] - volumes = [] - } - datahub_gms = { - container_name = "datahub-gms" - image_name = "linkedin/datahub-gms" - image_tag = local.datahub_version - port = 8080 - cpu = 2048 - memory = 8192 - load_balancer_required = true - standalone_onetime_task = false - environment_variables = [ - { name : "DATASET_ENABLE_SCSI", value : "false" }, - { name : "EBEAN_DATASOURCE_USERNAME", value : aws_db_instance.datahub.username }, - { name : "EBEAN_DATASOURCE_HOST", value : aws_db_instance.datahub.endpoint }, - { name : "EBEAN_DATASOURCE_URL", value : "jdbc:mysql://${aws_db_instance.datahub.endpoint}/${aws_db_instance.datahub.identifier}?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8" }, - { name : "EBEAN_DATASOURCE_DRIVER", value : "com.mysql.jdbc.Driver" }, - { name : "KAFKA_BOOTSTRAP_SERVER", value : var.kafka_properties.kafka_bootstrap_server }, - { name : "KAFKA_SCHEMAREGISTRY_URL", value : var.schema_registry_properties.schema_registry_url }, - { name : "SPRING_KAFKA_PROPERTIES_SECURITY_PROTOCOL", value : "SSL" }, - { name : "ELASTICSEARCH_HOST", value : aws_elasticsearch_domain.es.endpoint }, - { name : "ELASTICSEARCH_PORT", value : "443" }, - { name : "ELASTICSEARCH_USE_SSL", value : "true" }, - { name : "ELASTICSEARCH_SSL_PROTOCOL", value : "TLSv1.2" }, - { name : "GRAPH_SERVICE_IMPL", value : "elasticsearch" }, - { name : "PE_CONSUMER_ENABLED", value : "true" }, - { name : "JAVA_OPTS", value : "-Xms8g -Xmx8g" }, - { name : "ENTITY_REGISTRY_CONFIG_PATH", value : "/datahub/datahub-gms/resources/entity-registry.yml" }, - { name : "MAE_CONSUMER_ENABLED", value : "true" }, - { name : "MCE_CONSUMER_ENABLED", value : "true" }, - { name : "UI_INGESTION_ENABLED", value : "true" }, - { name : "UI_INGESTION_DEFAULT_CLI_VERSION", value : "0.8.26.6" }, - ] - secrets = [ - { name : "EBEAN_DATASOURCE_PASSWORD", valueFrom : aws_ssm_parameter.datahub_rds_password.arn }, - ] - port_mappings = [ - { containerPort : 8080, hostPort : 8080, protocol : "tcp" } - ] - mount_points = [] - volumes = [] - } - datahub_mae_consumer = { - container_name = "datahub-mae-consumer" - image_name = "linkedin/datahub-mae-consumer" - image_tag = local.datahub_version - port = 9090 - cpu = 2048 - memory = 8192 - load_balancer_required = false - standalone_onetime_task = false - environment_variables = [ - { name : "MAE_CONSUMER_ENABLED", value : "true" }, - { name : "PE_CONSUMER_ENABLED", value : "true" }, - { name : "KAFKA_BOOTSTRAP_SERVER", value : var.kafka_properties.kafka_bootstrap_server }, - { name : "KAFKA_SCHEMAREGISTRY_URL", value : var.schema_registry_properties.schema_registry_url }, - { name : "SPRING_KAFKA_PROPERTIES_SECURITY_PROTOCOL", value : "SSL" }, - { name : "ELASTICSEARCH_HOST", value : aws_elasticsearch_domain.es.endpoint }, - { name : "ELASTICSEARCH_PORT", value : "443" }, - { name : "ELASTICSEARCH_USE_SSL", value : "true" }, - { name : "ELASTICSEARCH_SSL_PROTOCOL", value : "TLSv1.2" }, - { name : "GMS_HOST", value : aws_alb.datahub_gms.dns_name }, - { name : "GMS_PORT", value : "8080" }, - { name : "GRAPH_SERVICE_IMPL", value : "elasticsearch" }, - { name : "ENTITY_REGISTRY_CONFIG_PATH", value : "/datahub/datahub-mae-consumer/resources/entity-registry.yml" }, - ] - secrets = [] - port_mappings = [ - { containerPort : 9090, hostPort : 9090, protocol : "tcp" } - ] - mount_points = [] - volumes = [] - } - datahub_mce_consumer = { - container_name = "datahub-mce-consumer" - image_name = "linkedin/datahub-mce-consumer" - image_tag = local.datahub_version - port = 9090 - cpu = 2048 - memory = 8192 - load_balancer_required = false - standalone_onetime_task = false - environment_variables = [ - { name : "MCE_CONSUMER_ENABLED", value : "true" }, - { name : "KAFKA_BOOTSTRAP_SERVER", value : var.kafka_properties.kafka_bootstrap_server }, - { name : "KAFKA_SCHEMAREGISTRY_URL", value : var.schema_registry_properties.schema_registry_url }, - { name : "SPRING_KAFKA_PROPERTIES_SECURITY_PROTOCOL", value : "SSL" }, - { name : "GMS_HOST", value : aws_alb.datahub_gms.dns_name }, - { name : "GMS_PORT", value : "8080" }, - ] - secrets = [] - port_mappings = [ - { containerPort : 9090, hostPort : 9090, protocol : "tcp" } - ] - mount_points = [] - volumes = [] - } - datahub_actions = { - container_name = "datahub-actions" - image_name = "acryldata/acryl-datahub-actions" - image_tag = "head" - port = 80 - cpu = 2048 - memory = 8192 - standalone_onetime_task = false - load_balancer_required = false - environment_variables = [ - { name : "GMS_HOST", value : aws_alb.datahub_gms.dns_name }, - { name : "GMS_PORT", value : "8080" }, - { name : "KAFKA_BOOTSTRAP_SERVER", value : var.kafka_properties.kafka_bootstrap_server }, - { name : "SCHEMA_REGISTRY_URL", value : var.schema_registry_properties.schema_registry_url }, - { name : "METADATA_AUDIT_EVENT_NAME", value : "MetadataAuditEvent_v4" }, - { name : "METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME", value : "MetadataChangeLog_Versioned_v1" }, - { name : "DATAHUB_SYSTEM_CLIENT_ID", value : "__datahub_system" }, - { name : "KAFKA_PROPERTIES_SECURITY_PROTOCOL", value : "SSL" }, - { name : "AWS_DEFAULT_REGION", value : data.aws_region.current.name }, - { name : "AWS_ROLE", value : aws_iam_role.datahub_role.arn }, - { name : "GLUE_EXTRACT_TRANSFORMS", value : "false" }, - { name : "GMS_URL", value : "http://${aws_alb.datahub_gms.dns_name}:8080" } - ] - secrets = [ - { name : "DATAHUB_SYSTEM_CLIENT_SECRET", valueFrom : aws_ssm_parameter.datahub_password.arn }, - { name : "AWS_ACCESS_KEY_ID", valueFrom : aws_ssm_parameter.datahub_aws_access_key_id.arn }, - { name : "AWS_SECRET_ACCESS_KEY", valueFrom : aws_ssm_parameter.datahub_aws_secret_access_key.arn }, - ] - port_mappings = [] - mount_points = [] - volumes = [] - } - mysql_setup = { - container_name = "mysql-setup" - image_name = "acryldata/datahub-mysql-setup" - image_tag = local.datahub_version - port = 3306 - cpu = 256 - memory = 2048 - load_balancer_required = false - standalone_onetime_task = true - environment_variables = [ - { name : "MYSQL_HOST", value : aws_db_instance.datahub.address }, - { name : "MYSQL_PORT", value : aws_db_instance.datahub.port }, - { name : "MYSQL_USERNAME", value : aws_db_instance.datahub.username }, - { name : "DATAHUB_DB_NAME", value : aws_db_instance.datahub.identifier }, - ] - secrets = [ - { name : "MYSQL_PASSWORD", valueFrom : aws_ssm_parameter.datahub_rds_password.arn }, - ] - port_mappings = [] - mount_points = [] - volumes = [] - } - elasticsearch_setup = { - container_name = "elasticsearch-setup" - image_name = "linkedin/datahub-elasticsearch-setup" - image_tag = local.datahub_version - port = 443 - cpu = 256 - memory = 2048 - load_balancer_required = false - standalone_onetime_task = true - environment_variables = [ - { name : "ELASTICSEARCH_HOST", value : aws_elasticsearch_domain.es.endpoint }, - { name : "ELASTICSEARCH_PORT", value : "443" }, - { name : "ELASTICSEARCH_USE_SSL", value : "true" }, - { name : "ELASTICSEARCH_SSL_PROTOCOL", value : "TLSv1.2" }, - { name : "USE_AWS_ELASTICSEARCH", value : "true" }, - ] - secrets = [] - port_mappings = [] - mount_points = [] - volumes = [] - } - kafka_setup = { - container_name = "kafka-setup" - image_name = "linkedin/datahub-kafka-setup" - image_tag = local.datahub_version - port = 443 - cpu = 256 - memory = 2048 - load_balancer_required = false - standalone_onetime_task = true - environment_variables = [ - { name : "KAFKA_ZOOKEEPER_CONNECT", value : var.kafka_properties.kafka_zookeeper_connect }, - { name : "KAFKA_BOOTSTRAP_SERVER", value : var.kafka_properties.kafka_bootstrap_server }, - # This must be "ssl" in lower case. The kafka-setup container overrides the default SSL properties when "SSL" is provided. - # To get around this we set this to lower case, the defaults are then not overridden and the container can connect to kafka using SSL correctly - # Kafka-setup container problem code: https://github.com/datahub-project/datahub/blob/master/docker/kafka-setup/kafka-setup.sh#L14-L21 - { name : "KAFKA_PROPERTIES_SECURITY_PROTOCOL", value : "ssl" }, - { name : "PLATFORM_EVENT_TOPIC_NAME", value : "PlatformEvent_v1" } - ] - secrets = [] - port_mappings = [] - mount_points = [] - volumes = [] - } -} diff --git a/terraform/modules/datahub/04-aws-alb-frontend-react.tf b/terraform/modules/datahub/04-aws-alb-frontend-react.tf deleted file mode 100644 index 818219614..000000000 --- a/terraform/modules/datahub/04-aws-alb-frontend-react.tf +++ /dev/null @@ -1,67 +0,0 @@ -resource "aws_security_group" "datahub_frontend_react" { - name = "${var.short_identifier_prefix}datahub-frontend-alb" - description = "Restricts access to the DataHub Frontend React Application Load Balancer" - vpc_id = var.vpc_id - revoke_rules_on_delete = true - tags = merge(var.tags, { - "Name" : "DataHub Frontend React Load Balancer" - }) -} - -resource "aws_security_group_rule" "datahub_frontend_react_egress" { - type = "egress" - security_group_id = aws_security_group.datahub_frontend_react.id - description = "Allow all outbound traffic" - from_port = 0 - to_port = 0 - protocol = "-1" - cidr_blocks = ["0.0.0.0/0"] - ipv6_cidr_blocks = ["::/0"] -} - -resource "aws_security_group_rule" "datahub_frontend_react_ingress" { - - type = "ingress" - description = "Allow inbound HTTP traffic on Datahub Frontend port" - from_port = local.datahub_frontend_react.port - to_port = local.datahub_frontend_react.port - protocol = "tcp" - cidr_blocks = var.hub_firewall_ips - security_group_id = aws_security_group.datahub_frontend_react.id -} - -resource "aws_alb_target_group" "datahub_frontend_react" { - name = "${var.short_identifier_prefix}datahub-frontend" - port = local.datahub_frontend_react.port - protocol = "HTTP" - vpc_id = var.vpc_id - target_type = "ip" - health_check { - protocol = "HTTP" - path = "/admin" - port = local.datahub_frontend_react.port - interval = 60 - } -} - -resource "aws_alb" "datahub_frontend_react" { - name = "${var.short_identifier_prefix}datahub-frontend" - internal = true - load_balancer_type = "application" - security_groups = [aws_security_group.datahub_frontend_react.id] - subnets = var.vpc_subnet_ids - lifecycle { - prevent_destroy = true - } -} - -resource "aws_alb_listener" "datahub_frontend_react" { - load_balancer_arn = aws_alb.datahub_frontend_react.arn - port = local.datahub_frontend_react.port - protocol = "HTTP" - - default_action { - type = "forward" - target_group_arn = aws_alb_target_group.datahub_frontend_react.arn - } -} diff --git a/terraform/modules/datahub/04-aws-alb-gms.tf b/terraform/modules/datahub/04-aws-alb-gms.tf deleted file mode 100644 index fcfab860f..000000000 --- a/terraform/modules/datahub/04-aws-alb-gms.tf +++ /dev/null @@ -1,77 +0,0 @@ -resource "aws_security_group" "datahub_gms" { - name = "${var.short_identifier_prefix}datahub-gms-alb" - description = "Restricts access to the DataHub Application Load Balancer" - vpc_id = var.vpc_id - revoke_rules_on_delete = true - - tags = merge(var.tags, { - "Name" : "DataHub GMS Load Balancer" - }) -} - -resource "aws_security_group_rule" "datahub_gms_egress" { - type = "egress" - description = "Allow all outbound traffic" - from_port = 0 - to_port = 0 - protocol = "-1" - cidr_blocks = ["0.0.0.0/0"] - ipv6_cidr_blocks = ["::/0"] - security_group_id = aws_security_group.datahub_gms.id -} - - -locals { - security_groups = { - frontend_react_security_group_id = module.datahub_frontend_react.security_group_id - mae_consumer_security_group_id = module.datahub_mae_consumer.security_group_id - mce_consumer_security_group_id = module.datahub_mce_consumer.security_group_id - actions_security_group_id = module.datahub_actions.security_group_id - } -} - -resource "aws_security_group_rule" "datahub_gms_ingress" { - for_each = local.security_groups - - type = "ingress" - description = "Allow inbound HTTP traffic from Datahub containers" - from_port = local.datahub_gms.port - to_port = local.datahub_gms.port - protocol = "tcp" - source_security_group_id = each.value - security_group_id = aws_security_group.datahub_gms.id -} - -resource "aws_alb_target_group" "datahub_gms" { - name = "${var.short_identifier_prefix}datahub-gms" - port = local.datahub_gms.port - protocol = "HTTP" - vpc_id = var.vpc_id - target_type = "ip" - - health_check { - protocol = "HTTP" - path = "/health" - port = local.datahub_gms.port - interval = 60 - } -} - -resource "aws_alb" "datahub_gms" { - name = "${var.short_identifier_prefix}datahub-gms" - internal = true - load_balancer_type = "application" - security_groups = [aws_security_group.datahub_gms.id] - subnets = var.vpc_subnet_ids -} - -resource "aws_alb_listener" "datahub_gms" { - load_balancer_arn = aws_alb.datahub_gms.arn - port = local.datahub_gms.port - protocol = "HTTP" - - default_action { - type = "forward" - target_group_arn = aws_alb_target_group.datahub_gms.arn - } -} diff --git a/terraform/modules/datahub/06-ecs-cluster.tf b/terraform/modules/datahub/06-ecs-cluster.tf deleted file mode 100644 index cadd883d5..000000000 --- a/terraform/modules/datahub/06-ecs-cluster.tf +++ /dev/null @@ -1,4 +0,0 @@ -resource "aws_ecs_cluster" "datahub" { - tags = var.tags - name = "${var.short_identifier_prefix}datahub" -} \ No newline at end of file diff --git a/terraform/modules/datahub/07-cloudwatch-logs.tf b/terraform/modules/datahub/07-cloudwatch-logs.tf deleted file mode 100644 index 3d0dd25c3..000000000 --- a/terraform/modules/datahub/07-cloudwatch-logs.tf +++ /dev/null @@ -1,4 +0,0 @@ -resource "aws_cloudwatch_log_group" "datahub" { - name = "${var.short_identifier_prefix}datahub" - tags = var.tags -} \ No newline at end of file diff --git a/terraform/modules/datahub/08-ecs-services.tf b/terraform/modules/datahub/08-ecs-services.tf deleted file mode 100644 index 380e58598..000000000 --- a/terraform/modules/datahub/08-ecs-services.tf +++ /dev/null @@ -1,133 +0,0 @@ -module "datahub_frontend_react" { - source = "../aws-ecs-docker-service" - tags = var.tags - short_identifier_prefix = var.short_identifier_prefix - ecs_cluster_arn = aws_ecs_cluster.datahub.arn - vpc_id = var.vpc_id - vpc_subnet_ids = var.vpc_subnet_ids - cloudwatch_log_group_name = aws_cloudwatch_log_group.datahub.name - container_properties = local.datahub_frontend_react - load_balancer_properties = { - target_group_properties = [{ arn = aws_alb_target_group.datahub_frontend_react.arn, port = aws_alb_target_group.datahub_frontend_react.port }] - security_group_id = aws_security_group.datahub_frontend_react.id - } - is_live_environment = var.is_live_environment -} - -module "datahub_gms" { - source = "../aws-ecs-docker-service" - tags = var.tags - short_identifier_prefix = var.short_identifier_prefix - ecs_cluster_arn = aws_ecs_cluster.datahub.arn - vpc_id = var.vpc_id - vpc_subnet_ids = var.vpc_subnet_ids - cloudwatch_log_group_name = aws_cloudwatch_log_group.datahub.name - container_properties = local.datahub_gms - load_balancer_properties = { - target_group_properties = [{ arn = aws_alb_target_group.datahub_gms.arn, port = aws_alb_target_group.datahub_gms.port }] - security_group_id = aws_security_group.datahub_gms.id - } - is_live_environment = var.is_live_environment -} - -module "datahub_mae_consumer" { - source = "../aws-ecs-docker-service" - tags = var.tags - short_identifier_prefix = var.short_identifier_prefix - ecs_cluster_arn = aws_ecs_cluster.datahub.arn - vpc_id = var.vpc_id - vpc_subnet_ids = var.vpc_subnet_ids - cloudwatch_log_group_name = aws_cloudwatch_log_group.datahub.name - container_properties = local.datahub_mae_consumer - load_balancer_properties = { - target_group_properties = [] - security_group_id = null - } - is_live_environment = var.is_live_environment -} - -module "datahub_mce_consumer" { - source = "../aws-ecs-docker-service" - tags = var.tags - short_identifier_prefix = var.short_identifier_prefix - ecs_cluster_arn = aws_ecs_cluster.datahub.arn - vpc_id = var.vpc_id - vpc_subnet_ids = var.vpc_subnet_ids - cloudwatch_log_group_name = aws_cloudwatch_log_group.datahub.name - container_properties = local.datahub_mce_consumer - load_balancer_properties = { - target_group_properties = [] - security_group_id = null - } - is_live_environment = var.is_live_environment -} - -module "datahub_actions" { - source = "../aws-ecs-docker-service" - tags = var.tags - short_identifier_prefix = var.short_identifier_prefix - ecs_cluster_arn = aws_ecs_cluster.datahub.arn - vpc_id = var.vpc_id - vpc_subnet_ids = var.vpc_subnet_ids - cloudwatch_log_group_name = aws_cloudwatch_log_group.datahub.name - container_properties = local.datahub_actions - load_balancer_properties = { - target_group_properties = [] - security_group_id = null - } - is_live_environment = var.is_live_environment -} - -module "mysql_setup" { - source = "../aws-ecs-docker-service" - tags = var.tags - short_identifier_prefix = var.short_identifier_prefix - ecs_cluster_arn = aws_ecs_cluster.datahub.arn - vpc_id = var.vpc_id - vpc_subnet_ids = var.vpc_subnet_ids - cloudwatch_log_group_name = aws_cloudwatch_log_group.datahub.name - container_properties = local.mysql_setup - load_balancer_properties = { - target_group_properties = [] - security_group_id = null - } - is_live_environment = var.is_live_environment - depends_on = [ - aws_db_instance.datahub - ] -} - -module "elasticsearch_setup" { - source = "../aws-ecs-docker-service" - tags = var.tags - short_identifier_prefix = var.short_identifier_prefix - ecs_cluster_arn = aws_ecs_cluster.datahub.arn - vpc_id = var.vpc_id - vpc_subnet_ids = var.vpc_subnet_ids - cloudwatch_log_group_name = aws_cloudwatch_log_group.datahub.name - container_properties = local.elasticsearch_setup - load_balancer_properties = { - target_group_properties = [] - security_group_id = null - } - is_live_environment = var.is_live_environment - depends_on = [ - aws_elasticsearch_domain.es - ] -} - -module "kafka_setup" { - source = "../aws-ecs-docker-service" - tags = var.tags - short_identifier_prefix = var.short_identifier_prefix - ecs_cluster_arn = aws_ecs_cluster.datahub.arn - vpc_id = var.vpc_id - vpc_subnet_ids = var.vpc_subnet_ids - cloudwatch_log_group_name = aws_cloudwatch_log_group.datahub.name - container_properties = local.kafka_setup - load_balancer_properties = { - target_group_properties = [] - security_group_id = null - } - is_live_environment = var.is_live_environment -} diff --git a/terraform/modules/datahub/09-ssm.tf b/terraform/modules/datahub/09-ssm.tf deleted file mode 100644 index 1e415c2e9..000000000 --- a/terraform/modules/datahub/09-ssm.tf +++ /dev/null @@ -1,49 +0,0 @@ -resource "aws_ssm_parameter" "datahub_password" { - name = "/${var.identifier_prefix}/datahub/datahub_password" - type = "SecureString" - value = random_password.datahub_secret.result - tags = merge(var.tags, { - "Name" : "Datahub Password" - }) -} - -resource "random_password" "datahub_secret" { - length = 16 - special = true - override_special = "!#$%&*()-_=+[]{}<>:?" -} - -data "aws_ssm_parameter" "datahub_google_client_id" { - name = "/dataplatform/datahub/datahub_google_client_id" -} - -data "aws_ssm_parameter" "datahub_google_client_secret" { - name = "/dataplatform/datahub/datahub_google_client_secret" -} - -resource "aws_ssm_parameter" "datahub_rds_password" { - name = "/${var.identifier_prefix}/datahub/datahub_rds_password" - type = "SecureString" - value = aws_db_instance.datahub.password - tags = merge(var.tags, { - "Name" : "Datahub RDS Password" - }) -} - -resource "aws_ssm_parameter" "datahub_aws_access_key_id" { - name = "/${var.identifier_prefix}/datahub/datahub_aws_access_key_id" - type = "SecureString" - value = aws_iam_access_key.datahub_access_key.id - tags = merge(var.tags, { - "Name" : "Datahub AWS Access Key Id" - }) -} - -resource "aws_ssm_parameter" "datahub_aws_secret_access_key" { - name = "/${var.identifier_prefix}/datahub/datahub_aws_secret_access_key" - type = "SecureString" - value = aws_iam_access_key.datahub_access_key.secret - tags = merge(var.tags, { - "Name" : "Datahub AWS Secret Access Key" - }) -} \ No newline at end of file diff --git a/terraform/modules/datahub/10-rds.tf b/terraform/modules/datahub/10-rds.tf deleted file mode 100644 index fd1507a0b..000000000 --- a/terraform/modules/datahub/10-rds.tf +++ /dev/null @@ -1,49 +0,0 @@ -resource "aws_db_instance" "datahub" { - allocated_storage = 15 - engine = "mysql" - engine_version = "8.0" - instance_class = "db.t3.micro" - username = "datahub" - identifier = replace("${var.short_identifier_prefix}datahub", "-", "") - password = random_password.datahub_secret.result - db_subnet_group_name = aws_db_subnet_group.datahub.name - vpc_security_group_ids = [aws_security_group.datahub.id] - skip_final_snapshot = false - deletion_protection = var.is_live_environment - backup_retention_period = 14 - backup_window = "22:00-22:31" - maintenance_window = "Wed:23:13-Wed:23:43" - ca_cert_identifier = "rds-ca-rsa2048-g1" - apply_immediately = true - allow_major_version_upgrade = true - tags = var.tags -} - -resource "aws_db_subnet_group" "datahub" { - tags = var.tags - name = "${var.short_identifier_prefix}datahub" - subnet_ids = var.vpc_subnet_ids -} - -resource "aws_security_group" "datahub" { - name = "${var.short_identifier_prefix}datahub" - vpc_id = var.vpc_id - - ingress { - from_port = 3306 - to_port = 3306 - protocol = "tcp" - cidr_blocks = [ - data.aws_vpc.vpc.cidr_block, - ] - } - - egress { - description = "Allow all outbound traffic" - from_port = 0 - to_port = 0 - protocol = "-1" - cidr_blocks = ["0.0.0.0/0"] - ipv6_cidr_blocks = ["::/0"] - } -} diff --git a/terraform/modules/datahub/11-elastic-search.tf b/terraform/modules/datahub/11-elastic-search.tf deleted file mode 100644 index 0f0d884cd..000000000 --- a/terraform/modules/datahub/11-elastic-search.tf +++ /dev/null @@ -1,67 +0,0 @@ -resource "aws_elasticsearch_domain" "es" { - domain_name = "${var.short_identifier_prefix}elasticsearch" - elasticsearch_version = "7.9" - - cluster_config { - instance_type = "m4.large.elasticsearch" - zone_awareness_enabled = true - instance_count = 3 - - zone_awareness_config { - availability_zone_count = 3 - } - } - - domain_endpoint_options { - enforce_https = true - tls_security_policy = "Policy-Min-TLS-1-2-2019-07" - } - - encrypt_at_rest { - enabled = true - } - - ebs_options { - ebs_enabled = true - volume_size = 10 - } - - vpc_options { - subnet_ids = var.vpc_subnet_ids - security_group_ids = [aws_security_group.es.id] - } - - advanced_options = { - "rest.action.multi.allow_explicit_index" = "true" - } - - access_policies = <> ~/.ssh/authorized_keys -``` - -#### Start the Tunnel - -```shell -ssh -i .ssh/id_rsa -L 9002:{datahub_frontend_load_balancer_dns_name}:9002 ec2-user@localhost -v -``` - -### Local Tunnel - -```shell -aws-vault exec hackney-dataplatform-development -- aws ssm start-session --target {bastion_id} --document-name AWS-StartPortForwardingSession --parameters '{"portNumber":["9002"],"localPortNumber":["9002"]}' -``` - -### Architecture - -![Datahub Architecture](./Datahub.png) diff --git a/terraform/modules/datahub/datasource-ingestion-recipes/glue-example.yml b/terraform/modules/datahub/datasource-ingestion-recipes/glue-example.yml deleted file mode 100644 index 061b319f3..000000000 --- a/terraform/modules/datahub/datasource-ingestion-recipes/glue-example.yml +++ /dev/null @@ -1,10 +0,0 @@ -source: - type: glue - config: - aws_region: '${AWS_DEFAULT_REGION}' - aws_role: '${AWS_ROLE}' - extract_transforms: '${GLUE_EXTRACT_TRANSFORMS}' -sink: - type: datahub-rest - config: - server: '${GMS_URL}' diff --git a/terraform/modules/kafka-event-streaming/00-init.tf b/terraform/modules/kafka-event-streaming/00-init.tf deleted file mode 100644 index b24f1a3ed..000000000 --- a/terraform/modules/kafka-event-streaming/00-init.tf +++ /dev/null @@ -1,14 +0,0 @@ -/* This defines the configuration of Terraform and AWS required Terraform Providers. - As this is a module, we don't have any explicity Provider blocks declared, as these - will be inherited from the parent Terraform. -*/ -terraform { - required_version = ">= 0.14.3" - - required_providers { - aws = { - source = "hashicorp/aws" - version = ">= 4.11" - } - } -} diff --git a/terraform/modules/kafka-event-streaming/01-inputs-required.tf b/terraform/modules/kafka-event-streaming/01-inputs-required.tf deleted file mode 100644 index 23bb59849..000000000 --- a/terraform/modules/kafka-event-streaming/01-inputs-required.tf +++ /dev/null @@ -1,104 +0,0 @@ -variable "tags" { - type = map(string) -} - -variable "project" { - description = "The project name." - type = string -} - -variable "environment" { - description = "Environment e.g. Dev, Stg, Prod, Mgmt." - type = string -} - -variable "identifier_prefix" { - type = string -} - -variable "short_identifier_prefix" { - description = "Short project wide resource identifier prefix" - type = string -} - -variable "vpc_id" { - description = "VPC ID to deploy the kafta instance into" - type = string -} - -variable "subnet_ids" { - type = list(string) -} - -variable "role_arns_to_share_access_with" { - description = "" -} - -variable "cross_account_lambda_roles" { - type = list(string) - description = "Role ARNs of Lambda functions in other accounts that need to access the glue schema registry" -} - -variable "s3_bucket_to_write_to" { - type = object({ - bucket_id = string - kms_key_arn = string - kms_key_id = string - bucket_arn = string - }) -} - -variable "bastion_instance_id" { - description = "Instance ID of the bastion" - type = string -} - -variable "bastion_private_key_ssm_parameter_name" { - description = "SSM paramater name where the bastion private key is stored" - type = string -} - -variable "is_live_environment" { - description = "A flag indicting if we are running in a live environment for setting up automation" - type = bool -} - -variable "glue_iam_role" { - description = "Name of the role that can be used to crawl the resulting data" - type = string -} - -variable "glue_database_name" { - description = "Name of the database to crawl the streamed data to" - type = string -} - -variable "datahub_gms_security_group_id" { - description = "Security group id of Datahub GMS" - type = string -} - -variable "datahub_mae_consumer_security_group_id" { - description = "Security group id of Datahub MAE consumer" - type = string -} - -variable "datahub_mce_consumer_security_group_id" { - description = "Security group id of Datahub MCE consumer" - type = string -} - -variable "datahub_actions_security_group_id" { - description = "Security group id of Datahub Actions" - type = string -} - -variable "kafka_tester_lambda_security_group_id" { - description = "Security group id of the kafka tester lambda" - type = string -} - -variable "datahub_kafka_setup_security_group_id" { - description = "security group id of the one time kafka setup task for datahub" - type = string -} diff --git a/terraform/modules/kafka-event-streaming/02-input-derived.tf b/terraform/modules/kafka-event-streaming/02-input-derived.tf deleted file mode 100644 index d78fce49c..000000000 --- a/terraform/modules/kafka-event-streaming/02-input-derived.tf +++ /dev/null @@ -1 +0,0 @@ -data "aws_caller_identity" "current" {} \ No newline at end of file diff --git a/terraform/modules/kafka-event-streaming/10-dependencies.tf b/terraform/modules/kafka-event-streaming/10-dependencies.tf deleted file mode 100644 index 3901b86b8..000000000 --- a/terraform/modules/kafka-event-streaming/10-dependencies.tf +++ /dev/null @@ -1,18 +0,0 @@ -module "kafka_dependency_storage" { - source = "../s3-bucket" - tags = var.tags - project = var.project - environment = var.environment - identifier_prefix = var.identifier_prefix - bucket_name = "Kafka Dependency Storage" - bucket_identifier = "kafka-dependency-storage" - include_backup_policy_tags = false -} - -resource "aws_s3_object" "kafka_connector_s3" { - bucket = module.kafka_dependency_storage.bucket_id - key = "plugins/confluentinc-kafka-connect-s3-10.0.5-merged.zip" - acl = "private" - source = "${path.module}/plugins/confluentinc-kafka-connect-s3-10.0.5-merged.zip" - source_hash = filemd5("${path.module}/plugins/confluentinc-kafka-connect-s3-10.0.5-merged.zip") -} diff --git a/terraform/modules/kafka-event-streaming/20-security-groups.tf b/terraform/modules/kafka-event-streaming/20-security-groups.tf deleted file mode 100644 index 4e05d4387..000000000 --- a/terraform/modules/kafka-event-streaming/20-security-groups.tf +++ /dev/null @@ -1,142 +0,0 @@ -data "aws_secretsmanager_secret" "kafka_intra_account_ingress_rules" { - name = "${var.identifier_prefix}-manually-managed-value-kafka-intra-account-ingress-rules" -} - -data "aws_secretsmanager_secret_version" "kafka_intra_account_ingress_rules" { - secret_id = data.aws_secretsmanager_secret.kafka_intra_account_ingress_rules.id -} - -locals { - kafka_intra_account_ingress_rules = jsondecode(data.aws_secretsmanager_secret_version.kafka_intra_account_ingress_rules.secret_string) -} - -resource "aws_security_group" "kafka" { - name = "${var.short_identifier_prefix}kafka" - tags = var.tags - vpc_id = var.vpc_id - description = "Specifies rules for traffic to the kafka cluster" -} - -resource "aws_security_group_rule" "allow_inbound_to_zookeeper" { - description = "Allows inbound traffic on ZooKeeper port" - type = "ingress" - from_port = 2182 - to_port = 2182 - protocol = "TCP" - security_group_id = aws_security_group.kafka.id - self = true -} - -resource "aws_security_group_rule" "datahub_actions_ingress" { - description = "Allows inbound traffic from Datahub Actions" - type = "ingress" - from_port = 9094 - to_port = 9094 - protocol = "TCP" - source_security_group_id = var.datahub_actions_security_group_id - security_group_id = aws_security_group.kafka.id -} - -resource "aws_security_group_rule" "datahub_gms_ingress" { - description = "Allows inbound traffic from Datahub Generalized Metadata Service (GMS)" - type = "ingress" - from_port = 9094 - to_port = 9094 - protocol = "TCP" - source_security_group_id = var.datahub_gms_security_group_id - security_group_id = aws_security_group.kafka.id -} - -resource "aws_security_group_rule" "datahub_mae_ingress" { - description = "Allows inbound traffic from Datahub Metadata Audit Event (MAE)" - type = "ingress" - from_port = 9094 - to_port = 9094 - protocol = "TCP" - source_security_group_id = var.datahub_mae_consumer_security_group_id - security_group_id = aws_security_group.kafka.id -} - -resource "aws_security_group_rule" "datahub_mce_ingress" { - description = "Allows inbound traffic from Datahub Metadata Change Event (MCE)" - type = "ingress" - from_port = 9094 - to_port = 9094 - protocol = "TCP" - source_security_group_id = var.datahub_mce_consumer_security_group_id - security_group_id = aws_security_group.kafka.id -} - -resource "aws_security_group_rule" "allow_outbound_traffic_to_s3" { - description = "Allow outbound traffic to port 443 to allow writing to S3" - security_group_id = aws_security_group.kafka.id - protocol = "TCP" - from_port = 443 - to_port = 443 - type = "egress" - cidr_blocks = ["0.0.0.0/0"] - ipv6_cidr_blocks = ["::/0"] -} - -resource "aws_security_group_rule" "allow_outbound_traffic_to_schema_registry" { - description = "Allow outbound traffic to schema registry load balancer" - security_group_id = aws_security_group.kafka.id - protocol = "TCP" - from_port = 8081 - to_port = 8081 - type = "egress" - source_security_group_id = module.schema_registry.load_balancer_security_group_id -} - -resource "aws_security_group_rule" "allow_inboud_traffic_from_housing_account" { - description = "Allow inbound traffic from housing account subnets that the reporting listener lambda is deployed to" - security_group_id = aws_security_group.kafka.id - protocol = "TCP" - from_port = 9094 - to_port = 9094 - type = "ingress" - cidr_blocks = local.kafka_intra_account_ingress_rules["cidr_blocks"] -} - -resource "aws_security_group_rule" "allow_inbound_traffic_from_tester_lambda" { - count = lower(var.environment) != "prod" ? 1 : 0 - description = "Allows inbound traffic from the tester lambda on dev and pre-prod" - security_group_id = aws_security_group.kafka.id - protocol = "TCP" - from_port = 9094 - to_port = 9094 - type = "ingress" - source_security_group_id = var.kafka_tester_lambda_security_group_id -} - -resource "aws_security_group_rule" "allow_inbound_traffic_from_schema_registry_service" { - description = "Allows inbound traffic from schema registry service" - security_group_id = aws_security_group.kafka.id - protocol = "TCP" - from_port = 9094 - to_port = 9094 - type = "ingress" - source_security_group_id = module.schema_registry.schema_registry_security_group_id -} - -#kafka setup service for Datahub is a one time task, so this rule don't apply under normal operation -resource "aws_security_group_rule" "datahub_kafka_setup_ingress" { - description = "Allows inbound traffic from Datahubs one time Kafka setup task" - type = "ingress" - from_port = 9094 - to_port = 9094 - protocol = "TCP" - source_security_group_id = var.datahub_kafka_setup_security_group_id - security_group_id = aws_security_group.kafka.id -} - -#same as above, only required during datahub setup -resource "aws_security_group_rule" "datahub_kafka_setup_zookeeper_ingress" { - description = "Allows inbound traffic to zookeeper from Datahubs one time Kafka setup task" - type = "ingress" - from_port = 2182 - to_port = 2182 - protocol = "TCP" - source_security_group_id = var.datahub_kafka_setup_security_group_id - security_group_id = aws_security_group.kafka.id -} diff --git a/terraform/modules/kafka-event-streaming/30-keys.tf b/terraform/modules/kafka-event-streaming/30-keys.tf deleted file mode 100644 index 3f1931445..000000000 --- a/terraform/modules/kafka-event-streaming/30-keys.tf +++ /dev/null @@ -1,33 +0,0 @@ -locals { - default_arn = [ - "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root", - ] -} - -resource "aws_kms_key" "kafka" { - tags = var.tags - description = "${var.short_identifier_prefix} - Kafka Streaming" - - deletion_window_in_days = 10 - enable_key_rotation = true - - policy = data.aws_iam_policy_document.kafka_client_access.json -} - -data "aws_iam_policy_document" "kafka_client_access" { - statement { - actions = ["kms:*"] - - principals { - identifiers = concat(var.cross_account_lambda_roles, local.default_arn) - type = "AWS" - } - - resources = ["*"] - } -} - -resource "aws_kms_alias" "key_alias" { - name = lower("alias/${var.short_identifier_prefix}kafka-${aws_msk_cluster.kafka_cluster.cluster_name}") - target_key_id = aws_kms_key.kafka.key_id -} \ No newline at end of file diff --git a/terraform/modules/kafka-event-streaming/40-logs.tf b/terraform/modules/kafka-event-streaming/40-logs.tf deleted file mode 100644 index b86ecbbac..000000000 --- a/terraform/modules/kafka-event-streaming/40-logs.tf +++ /dev/null @@ -1,9 +0,0 @@ -resource "aws_cloudwatch_log_group" "connector_log_group" { - tags = var.tags - name = "${var.short_identifier_prefix}kafka-connector" -} - -resource "aws_cloudwatch_log_group" "broker_log_group" { - tags = var.tags - name = "${var.short_identifier_prefix}event-streaming-broker-logs" -} \ No newline at end of file diff --git a/terraform/modules/kafka-event-streaming/45-glue-crawler.tf b/terraform/modules/kafka-event-streaming/45-glue-crawler.tf deleted file mode 100644 index 9c2317d15..000000000 --- a/terraform/modules/kafka-event-streaming/45-glue-crawler.tf +++ /dev/null @@ -1,29 +0,0 @@ -resource "aws_glue_crawler" "crawler" { - tags = var.tags - - database_name = var.glue_database_name - name = "${var.short_identifier_prefix}event-streaming-topics-crawler" - role = var.glue_iam_role - - s3_target { - path = "s3://${var.s3_bucket_to_write_to.bucket_id}/event-streaming/" - - exclusions = ["*.json", "*.txt", "*.zip", "*.xlsx"] - } - - schema_change_policy { - delete_behavior = "DELETE_FROM_DATABASE" - update_behavior = "UPDATE_IN_DATABASE" - } - - configuration = jsonencode({ - Version = 1.0, - Grouping = { - TableGroupingPolicy = "CombineCompatibleSchemas" - TableLevelConfiguration = 3 - } - CrawlerOutput = { - Partitions = { AddOrUpdateBehavior = "InheritFromTable" } - } - }) -} \ No newline at end of file diff --git a/terraform/modules/kafka-event-streaming/50-cluster.tf b/terraform/modules/kafka-event-streaming/50-cluster.tf deleted file mode 100644 index d8ccd55dc..000000000 --- a/terraform/modules/kafka-event-streaming/50-cluster.tf +++ /dev/null @@ -1,32 +0,0 @@ -resource "aws_msk_cluster" "kafka_cluster" { - cluster_name = "${var.short_identifier_prefix}event-streaming" - kafka_version = "3.9.x" - number_of_broker_nodes = 3 - - broker_node_group_info { - instance_type = "kafka.t3.small" - client_subnets = var.subnet_ids - security_groups = [aws_security_group.kafka.id] - - storage_info { - ebs_storage_info { - volume_size = 200 - } - } - } - - encryption_info { - encryption_at_rest_kms_key_arn = aws_kms_key.kafka.arn - } - - logging_info { - broker_logs { - cloudwatch_logs { - enabled = true - log_group = aws_cloudwatch_log_group.broker_log_group.name - } - } - } - - tags = var.tags -} diff --git a/terraform/modules/kafka-event-streaming/60-connect.tf b/terraform/modules/kafka-event-streaming/60-connect.tf deleted file mode 100644 index e7d94aa34..000000000 --- a/terraform/modules/kafka-event-streaming/60-connect.tf +++ /dev/null @@ -1,106 +0,0 @@ -resource "aws_mskconnect_custom_plugin" "avro_converter_s3_sink" { - content_type = "ZIP" - name = "${var.short_identifier_prefix}confluentinc-kafka-connect-s3-10-0-5-merged" - description = "A bundle consisting of the Confluentinc S3 Connect and Avro Deserializer" - location { - s3 { - bucket_arn = module.kafka_dependency_storage.bucket_arn - file_key = aws_s3_object.kafka_connector_s3.key - } - } -} - - -locals { - topics = [ - "tenure_api", - "contact_details_api" - ] -} - -resource "aws_mskconnect_connector" "topics" { - for_each = toset(local.topics) - name = replace(lower("${var.short_identifier_prefix}${each.value}"), "/[^a-zA-Z0-9]+/", "-") - description = "Kafka connector to write ${each.value} events to S3" - - kafkaconnect_version = "2.7.1" - - capacity { - autoscaling { - mcu_count = 1 - min_worker_count = 1 - max_worker_count = 2 - - scale_in_policy { - cpu_utilization_percentage = 20 - } - - scale_out_policy { - cpu_utilization_percentage = 80 - } - } - } - - connector_configuration = { - "connector.class" = "io.confluent.connect.s3.S3SinkConnector" - "flush.size" = "1" - "tasks.max" = "2" - "topics" = each.value - "topics.dir" = "event-streaming" - "s3.bucket.name" = var.s3_bucket_to_write_to.bucket_id - "s3.sse.kms.key.id" = var.s3_bucket_to_write_to.kms_key_id - "s3.region" = "eu-west-2" - "key.converter" = "org.apache.kafka.connect.storage.StringConverter" - "key.converter.schemas.enable" = "False" - "value.converter" = "io.confluent.connect.avro.AvroConverter" - "value.converter.schema.registry.url" = "http://${module.schema_registry.load_balancer_dns_name}:8081" - "value.converter.schemas.enable" = "True" - "storage.class" = "io.confluent.connect.s3.storage.S3Storage" - "format.class" = "io.confluent.connect.s3.format.parquet.ParquetFormat" - "schema.compatibility" = "BACKWARD" - "errors.log.enable" = "True" - "partitioner.class" = "io.confluent.connect.storage.partitioner.TimeBasedPartitioner" - "path.format" = "'import_year'=YYYY/'import_month'=MM/'import_day'=dd/'import_date'=YYYYMMdd" - "locale" = "en-GB" - "timezone" = "UTC" - "partition.duration.ms" = "86400000" - - } - - kafka_cluster { - apache_kafka_cluster { - bootstrap_servers = aws_msk_cluster.kafka_cluster.bootstrap_brokers_tls - - vpc { - security_groups = [aws_security_group.kafka.id] - subnets = var.subnet_ids - } - } - } - - kafka_cluster_client_authentication { - authentication_type = "NONE" - } - - kafka_cluster_encryption_in_transit { - encryption_type = "TLS" - } - - plugin { - custom_plugin { - arn = aws_mskconnect_custom_plugin.avro_converter_s3_sink.arn - revision = aws_mskconnect_custom_plugin.avro_converter_s3_sink.latest_revision - } - } - - log_delivery { - worker_log_delivery { - cloudwatch_logs { - log_group = aws_cloudwatch_log_group.connector_log_group.name - enabled = true - } - } - } - - service_execution_role_arn = aws_iam_role.kafka_connector.arn -} \ No newline at end of file diff --git a/terraform/modules/kafka-event-streaming/70-schema.tf b/terraform/modules/kafka-event-streaming/70-schema.tf deleted file mode 100644 index 155b557c7..000000000 --- a/terraform/modules/kafka-event-streaming/70-schema.tf +++ /dev/null @@ -1,23 +0,0 @@ -module "schema_registry" { - source = "../kafka-schema-registry" - tags = var.tags - environment = var.environment - identifier_prefix = var.short_identifier_prefix - project = var.project - vpc_id = var.vpc_id - subnet_ids = var.subnet_ids - bootstrap_servers = aws_msk_cluster.kafka_cluster.bootstrap_brokers_tls - bastion_private_key_ssm_parameter_name = var.bastion_private_key_ssm_parameter_name - bastion_instance_id = var.bastion_instance_id - topics = local.topics - is_live_environment = var.is_live_environment - - datahub_actions_security_group_id = var.datahub_actions_security_group_id - datahub_gms_security_group_id = var.datahub_gms_security_group_id - datahub_mae_consumer_security_group_id = var.datahub_mae_consumer_security_group_id - datahub_mce_consumer_security_group_id = var.datahub_mce_consumer_security_group_id - kafka_security_group_id = aws_security_group.kafka.id - housing_intra_account_ingress_cidr = local.kafka_intra_account_ingress_rules["cidr_blocks"] - schema_registry_alb_security_group_id = module.schema_registry.load_balancer_security_group_id - kafka_tester_lambda_security_group_id = var.kafka_tester_lambda_security_group_id -} diff --git a/terraform/modules/kafka-event-streaming/80-iam.tf b/terraform/modules/kafka-event-streaming/80-iam.tf deleted file mode 100644 index 1c3a44925..000000000 --- a/terraform/modules/kafka-event-streaming/80-iam.tf +++ /dev/null @@ -1,165 +0,0 @@ -data "aws_iam_policy_document" "kafka_connector_assume_role" { - statement { - effect = "Allow" - actions = ["sts:AssumeRole"] - - principals { - identifiers = ["kafkaconnect.amazonaws.com"] - type = "Service" - } - } -} - -resource "aws_iam_role" "kafka_connector" { - tags = var.tags - - name = "${var.short_identifier_prefix}kafka-connector" - assume_role_policy = data.aws_iam_policy_document.kafka_connector_assume_role.json -} - -data "aws_iam_policy_document" "kafka_connector_write_to_s3" { - statement { - effect = "Allow" - actions = [ - "s3:ListAllMyBuckets", - ] - resources = [ - "*" - ] - } - statement { - effect = "Allow" - actions = [ - "s3:List*", - "s3:Get*" - ] - resources = [ - var.s3_bucket_to_write_to.bucket_arn - ] - } - - statement { - effect = "Allow" - actions = [ - "s3:Put*", - "s3:Get*", - "s3:AbortMultipartUpload", - "s3:ListMultipartUploadParts", - "s3:ListBucketMultipartUploads" - ] - resources = [ - "${var.s3_bucket_to_write_to.bucket_arn}/*" - ] - } - - statement { - effect = "Allow" - actions = [ - "kms:Encrypt", - "kms:Decrypt", - "kms:ReEncrypt*", - "kms:GenerateDataKey*", - "kms:DescribeKey", - "kms:CreateGrant", - "kms:RetireGrant" - ] - resources = [ - var.s3_bucket_to_write_to.kms_key_arn - ] - } -} - -data "aws_iam_policy_document" "kafka_connector_cloud_watch" { - statement { - effect = "Allow" - sid = "CloudWatchLogWriting" - actions = [ - "logs:CreateLogGroup", - "logs:CreateLogStream", - "logs:PutLogEvents", - "logs:AssociateKmsKey" - ] - resources = [ - "arn:aws:logs:*:*:*" - ] - } - - statement { - effect = "Allow" - sid = "CloudWatchMetricRecording" - actions = [ - "cloudwatch:PutMetricData", - ] - resources = [ - "*" - ] - } -} - -# TODO: make this less permissive -data "aws_iam_policy_document" "glue_schema_access" { - statement { - effect = "Allow" - sid = "GetSchema" - actions = ["glue:*"] - resources = ["*"] - } -} - -data "aws_iam_policy_document" "kafka_connector_kafka_access" { - statement { - effect = "Allow" - actions = [ - "kafka-cluster:Connect", - "kafka-cluster:AlterCluster", - "kafka-cluster:DescribeCluster" - ] - resources = [ - aws_msk_cluster.kafka_cluster.arn - ] - } - statement { - effect = "Allow" - actions = [ - "kafka-cluster:*Topic*", - "kafka-cluster:WriteData", - "kafka-cluster:ReadData", - ] - resources = [ - aws_msk_cluster.kafka_cluster.arn, # dont need? - "arn:aws:kafka:eu-west-2:${data.aws_caller_identity.current.account_id}:topic/${aws_msk_cluster.kafka_cluster.cluster_name}/*" - ] - } - statement { - effect = "Allow" - actions = [ - "kafka-cluster:AlterGroup", - "kafka-cluster:DescribeGroup" - ] - resources = [ - aws_msk_cluster.kafka_cluster.arn, # dont need? - "arn:aws:kafka:eu-west-2:${data.aws_caller_identity.current.account_id}:group/${aws_msk_cluster.kafka_cluster.cluster_name}/*" - ] - } -} - -data "aws_iam_policy_document" "kafka_connector" { - source_policy_documents = [ - data.aws_iam_policy_document.kafka_connector_write_to_s3.json, - data.aws_iam_policy_document.kafka_connector_cloud_watch.json, - data.aws_iam_policy_document.kafka_connector_kafka_access.json, - data.aws_iam_policy_document.glue_schema_access.json - ] -} - -resource "aws_iam_policy" "kafka_connector" { - tags = var.tags - - name = lower("${var.short_identifier_prefix}kafka-connector") - policy = data.aws_iam_policy_document.kafka_connector.json -} - -resource "aws_iam_role_policy_attachment" "kafka_connector" { - role = aws_iam_role.kafka_connector.name - policy_arn = aws_iam_policy.kafka_connector.arn -} \ No newline at end of file diff --git a/terraform/modules/kafka-event-streaming/99-outputs.tf b/terraform/modules/kafka-event-streaming/99-outputs.tf deleted file mode 100644 index a372af860..000000000 --- a/terraform/modules/kafka-event-streaming/99-outputs.tf +++ /dev/null @@ -1,16 +0,0 @@ -output "schema_registry_url" { - value = "http://${module.schema_registry.load_balancer_dns_name}:8081" -} - -output "cluster_config" { - value = { - zookeeper_connect_string = aws_msk_cluster.kafka_cluster.zookeeper_connect_string - bootstrap_brokers = aws_msk_cluster.kafka_cluster.bootstrap_brokers - bootstrap_brokers_tls = aws_msk_cluster.kafka_cluster.bootstrap_brokers_tls - vpc_security_groups = [aws_security_group.kafka.id] - vpc_subnets = var.subnet_ids - cluster_name = aws_msk_cluster.kafka_cluster.cluster_name - cluster_arn = aws_msk_cluster.kafka_cluster.arn - kms_key_arn = aws_kms_key.kafka.arn - } -} \ No newline at end of file diff --git a/terraform/modules/kafka-event-streaming/README.md b/terraform/modules/kafka-event-streaming/README.md deleted file mode 100644 index 62b28baa8..000000000 --- a/terraform/modules/kafka-event-streaming/README.md +++ /dev/null @@ -1,54 +0,0 @@ -## Local Development & Testing - -Kafka takes a long time to create in AWS, specifically the MSK connectors. Due to this kafka is not deployed to dev environments by default. -If you wish to test kafka changes in a dev environment then please follow these steps: - -1. In ```32-kafka-event-streaming.tf``` modify the count on line 2 so that it reads ```kafka_event_streaming_count = local.is_live_environment ? 1 : 1```. Do not commit this change -2. Deploy terraform/core to your dev environment, this will deploy kafka and the lambda for testing kafka -3. In the AWS console search for a lambda with the following name: ```{your-terraform-workspace-name}-kafka-test``` -6. In the /lambdas/kafka-test/lambda-events folder you will find json files containing the correct lambda test event message structure to trigger operations against the kafka cluster -7. Take the contents of one of the files and paste it into the Event JSON window on the test tab of the lambda and then click test to run it - 1. list-all-topics: - 1. This is fairly straight forward, it will print out a list of topics currently in the cluster - 2. send-message-to-topic: - 1. This will fire a preconfigured message stored in /lambdas/kafka-test/topics-messages to the topic of your choice - 2. Once the lambda has completed successfully you should see the event has been processed by Kafka and that the data has appeared in the event-streaming folder in the raw zone - -## Schema Registry UI -We haven't had chance to setup an UI for the schema registry in the environment yet, so I have instead been using the -docker image and port forwarding to it via the bastion. It's a little convoluted, but it just works once setup. - -### Docker - -```shell -docker pull landoop/schema-registry-ui -docker run --rm -p 8000:8000 \ - -e "SCHEMAREGISTRY_URL=http://localhost:8081" \ - landoop/schema-registry-ui -``` - -## Schema Registry -Schema name should be: `{topic}-value` for example `tenure_api-value` - - -## MSK Connect -Working Settings -``` -connector.class=io.confluent.connect.s3.S3SinkConnector -s3.region=eu-west-2 -flush.size=1 -schema.compatibility=NONE -tasks.max=2 -topics=tenure_api -s3.sse.kms.key.id=8c5aa61d-8dab-4127-9190-5dfabc20d84c -key.converter.schemas.enable=false -value.converter.schema.registry.url=http://10.120.30.78:8081 -format.class=io.confluent.connect.s3.format.parquet.ParquetFormat -partitioner.class=io.confluent.connect.storage.partitioner.DefaultPartitioner -value.converter.schemas.enable=true -value.converter=io.confluent.connect.avro.AvroConverter -storage.class=io.confluent.connect.s3.storage.S3Storage -errors.log.enable=true -s3.bucket.name=dataplatform-joates-raw-zone -key.converter=org.apache.kafka.connect.storage.StringConverter -``` \ No newline at end of file diff --git a/terraform/modules/kafka-event-streaming/plugins/confluentinc-kafka-connect-s3-10.0.5-merged.zip b/terraform/modules/kafka-event-streaming/plugins/confluentinc-kafka-connect-s3-10.0.5-merged.zip deleted file mode 100644 index de58fedbd..000000000 Binary files a/terraform/modules/kafka-event-streaming/plugins/confluentinc-kafka-connect-s3-10.0.5-merged.zip and /dev/null differ diff --git a/terraform/modules/kafka-event-streaming/plugins/confluentinc-kafka-connect-s3-10.0.5.zip b/terraform/modules/kafka-event-streaming/plugins/confluentinc-kafka-connect-s3-10.0.5.zip deleted file mode 100644 index 7d3ef5b0d..000000000 Binary files a/terraform/modules/kafka-event-streaming/plugins/confluentinc-kafka-connect-s3-10.0.5.zip and /dev/null differ diff --git a/terraform/modules/kafka-schema-registry/00-init.tf b/terraform/modules/kafka-schema-registry/00-init.tf deleted file mode 100644 index 4ebd8a477..000000000 --- a/terraform/modules/kafka-schema-registry/00-init.tf +++ /dev/null @@ -1,14 +0,0 @@ -/* This defines the configuration of Terraform and AWS required Terraform Providers. - As this is a module, we don't have any explicity Provider blocks declared, as these - will be inherited from the parent Terraform. -*/ -terraform { - required_version = ">= 0.14.3" - - required_providers { - aws = { - source = "hashicorp/aws" - version = ">= 3.72" - } - } -} diff --git a/terraform/modules/kafka-schema-registry/01-inputs-required.tf b/terraform/modules/kafka-schema-registry/01-inputs-required.tf deleted file mode 100644 index 09e1b2de4..000000000 --- a/terraform/modules/kafka-schema-registry/01-inputs-required.tf +++ /dev/null @@ -1,91 +0,0 @@ -variable "tags" { - type = map(string) -} - -variable "project" { - description = "The project name." - type = string -} - -variable "environment" { - description = "Environment e.g. Dev, Stg, Prod, Mgmt." - type = string -} - -variable "identifier_prefix" { - type = string -} - -variable "vpc_id" { - description = "VPC ID to deploy the kafta instance into" - type = string -} - -variable "subnet_ids" { - type = list(string) -} - -variable "bootstrap_servers" { - description = "One or more DNS names (or IP addresses) and port pairs." - type = string -} - -variable "bastion_private_key_ssm_parameter_name" { - description = "SSM paramater name where the bastion private key is stored" - type = string -} - -variable "bastion_instance_id" { - description = "Instance ID of the bastion" - type = string -} - -variable "topics" { - description = "List of kafka topics to manage schemas for" - type = list(string) -} - -variable "is_live_environment" { - description = "A flag indicting if we are running in a live environment for setting up automation" - type = bool -} - -variable "datahub_gms_security_group_id" { - description = "Security group id of Datahub GMS" - type = string -} - -variable "datahub_mae_consumer_security_group_id" { - description = "Security group id of Datahub MAE consumer" - type = string -} - -variable "datahub_mce_consumer_security_group_id" { - description = "Security group id of Datahub MCE consumer" - type = string -} - -variable "kafka_security_group_id" { - description = "Security group id of kafka" - type = string -} - -variable "housing_intra_account_ingress_cidr" { - description = "Cidr block for intra account ingress rules for housing" - type = list(string) -} - -variable "schema_registry_alb_security_group_id" { - description = "Security group id of schema registry ALB" - type = string -} - -variable "kafka_tester_lambda_security_group_id" { - description = "Security group id of the tester lambda" - type = string -} - -variable "datahub_actions_security_group_id" { - description = "security group id for GMS" - type = string -} diff --git a/terraform/modules/kafka-schema-registry/02-input-derived.tf b/terraform/modules/kafka-schema-registry/02-input-derived.tf deleted file mode 100644 index d78fce49c..000000000 --- a/terraform/modules/kafka-schema-registry/02-input-derived.tf +++ /dev/null @@ -1 +0,0 @@ -data "aws_caller_identity" "current" {} \ No newline at end of file diff --git a/terraform/modules/kafka-schema-registry/10-aws-alb.tf b/terraform/modules/kafka-schema-registry/10-aws-alb.tf deleted file mode 100644 index 2e3d95a51..000000000 --- a/terraform/modules/kafka-schema-registry/10-aws-alb.tf +++ /dev/null @@ -1,132 +0,0 @@ -data "aws_subnets" "subnet_ids" { - filter { - name = "vpc-id" - values = [var.vpc_id] - } -} - -data "aws_subnet" "subnets" { - count = length(data.aws_subnets.subnet_ids.ids) - id = tolist(data.aws_subnets.subnet_ids.ids)[count.index] -} - -resource "aws_security_group" "schema_registry_alb" { - name = "${var.identifier_prefix}schema-registry-alb" - description = "Restricts access to the Schema Registry Load Balancer" - vpc_id = var.vpc_id - revoke_rules_on_delete = true - - tags = merge(var.tags, { - "Name" : "Schema Registry Load Balancer" - }) -} - -resource "aws_security_group_rule" "datahub_actions_ingress" { - description = "Allows inbound traffic from Datahub Actions" - type = "ingress" - from_port = 8081 - to_port = 8081 - protocol = "TCP" - source_security_group_id = var.datahub_actions_security_group_id - security_group_id = aws_security_group.schema_registry_alb.id -} - -resource "aws_security_group_rule" "datahub_gms_ingress" { - description = "Allows inbound traffic from Datahub Generalized Metadata Service (GMS)" - type = "ingress" - from_port = 8081 - to_port = 8081 - protocol = "TCP" - source_security_group_id = var.datahub_gms_security_group_id - security_group_id = aws_security_group.schema_registry_alb.id -} - -resource "aws_security_group_rule" "datahub_mae_ingress" { - description = "Allows inbound traffic from Datahub Metadata Audit Event (MAE)" - type = "ingress" - from_port = 8081 - to_port = 8081 - protocol = "TCP" - source_security_group_id = var.datahub_mae_consumer_security_group_id - security_group_id = aws_security_group.schema_registry_alb.id -} - -resource "aws_security_group_rule" "datahub_mce_ingress" { - description = "Allows inbound traffic from Datahub Metadata Change Event (MCE)" - type = "ingress" - from_port = 8081 - to_port = 8081 - protocol = "TCP" - source_security_group_id = var.datahub_mce_consumer_security_group_id - security_group_id = aws_security_group.schema_registry_alb.id -} - -resource "aws_security_group_rule" "housing_listener_ingress" { - description = "Allow inbound traffic from housing account subnets that the reporting listener lambda is deployed to" - type = "ingress" - from_port = 8081 - to_port = 8081 - protocol = "TCP" - cidr_blocks = var.housing_intra_account_ingress_cidr - security_group_id = aws_security_group.schema_registry_alb.id -} - -resource "aws_security_group_rule" "allow_all_outbound_traffic" { - description = "Allow all outbound traffic" - security_group_id = aws_security_group.schema_registry_alb.id - protocol = "-1" - from_port = 0 - to_port = 0 - type = "egress" - cidr_blocks = ["0.0.0.0/0"] - ipv6_cidr_blocks = ["::/0"] -} - -resource "aws_security_group_rule" "kafka_tester_ingress" { - count = lower(var.environment) != "prod" ? 1 : 0 - - description = "Allows inbound traffic from the tester lambda on dev and pre-prod" - type = "ingress" - from_port = 8081 - to_port = 8081 - protocol = "TCP" - source_security_group_id = var.kafka_tester_lambda_security_group_id - security_group_id = aws_security_group.schema_registry_alb.id -} - -resource "aws_security_group_rule" "allow_kafka_ingress" { - description = "Allows inbound traffic from Kafka" - type = "ingress" - from_port = 8081 - to_port = 8081 - protocol = "TCP" - source_security_group_id = var.kafka_security_group_id - security_group_id = aws_security_group.schema_registry_alb.id -} - -resource "aws_alb_target_group" "schema_registry" { - name = "${var.identifier_prefix}schema-registry" - port = 8081 - protocol = "HTTP" - vpc_id = var.vpc_id - target_type = "ip" -} - -resource "aws_alb" "schema_registry" { - name = "${var.identifier_prefix}schema-registry-alb" - internal = true - load_balancer_type = "application" - security_groups = [aws_security_group.schema_registry_alb.id] - subnets = data.aws_subnet.subnets.*.id -} - -resource "aws_alb_listener" "schema_registry_http" { - load_balancer_arn = aws_alb.schema_registry.arn - port = "8081" - protocol = "HTTP" - - default_action { - type = "forward" - target_group_arn = aws_alb_target_group.schema_registry.arn - } -} diff --git a/terraform/modules/kafka-schema-registry/20-aws-ecs.tf b/terraform/modules/kafka-schema-registry/20-aws-ecs.tf deleted file mode 100644 index 7cd356b03..000000000 --- a/terraform/modules/kafka-schema-registry/20-aws-ecs.tf +++ /dev/null @@ -1,168 +0,0 @@ -locals { - taskCommand = < ecs.json; -export HOST_IP=$(python -c "import json; f = open('ecs.json').read(); data = json.loads(f); print(data['Networks'][0]['IPv4Addresses'][0])"); -export HOST_PORT=$(python -c "import json; f = open('ecs.json').read(); data = json.loads(f); print(data['Ports'][0]['HostPort'])"); -export SCHEMA_REGISTRY_HOST_NAME=$HOSTNAME; -export SCHEMA_REGISTRY_LISTENERS="http://$HOST_IP:8081"; -/etc/confluent/docker/run -COMMAND -} - -resource "aws_ecs_cluster" "schema_registry" { - name = "${var.identifier_prefix}kafka-schema-registry" - tags = var.tags -} - -data "aws_iam_policy_document" "schema_registry" { - statement { - actions = [ - "sts:AssumeRole" - ] - principals { - identifiers = [ - "ecs-tasks.amazonaws.com" - ] - type = "Service" - } - } -} - -resource "aws_iam_role" "schema_registry" { - tags = var.tags - - name = lower("${var.identifier_prefix}kafka-schema-registry") - assume_role_policy = data.aws_iam_policy_document.schema_registry.json -} - -resource "aws_iam_role_policy_attachment" "schema_registry" { - role = aws_iam_role.schema_registry.name - policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" -} - -resource "aws_ecs_task_definition" "schema_registry" { - family = "${var.identifier_prefix}kafka-schema-registry" - requires_compatibilities = ["FARGATE"] - network_mode = "awsvpc" - cpu = 256 - memory = 1024 - //task_role_arn = aws_iam_role.schema_registry.arn - execution_role_arn = aws_iam_role.schema_registry.arn - - container_definitions = jsonencode([ - { - name = "schema-registry" - image = "confluentinc/cp-schema-registry:5.3.0" - essential = true - entryPoint = ["sh", "-c"] - command = [local.taskCommand] - environment = [ - { name = "SCHEMA_REGISTRY_ACCESS_CONTROL_ALLOW_METHODS", value = "GET,POST,PUT,OPTIONS" }, - { name = "SCHEMA_REGISTRY_ACCESS_CONTROL_ALLOW_ORIGIN", value = "*" }, - { name = "SCHEMA_REGISTRY_DEBUG", value = "true" }, - { name = "SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS", value = var.bootstrap_servers }, - { name = "SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL", value = "SSL" } - ] - portMappings = [ - { - containerPort = 8081 - hostPort = 8081 - } - ], - logConfiguration = { - logDriver = "awslogs", - options = { - "awslogs-group" = aws_cloudwatch_log_group.kafka_schema_registry.name, - "awslogs-region" = "eu-west-2", - "awslogs-stream-prefix" = "ecs" - } - } - } - ]) -} - -resource "aws_security_group" "schema_registry_service" { - name = "${var.identifier_prefix}schema-registry-service" - description = "Restricts access to the Schema Registry ECS Service" - vpc_id = var.vpc_id - revoke_rules_on_delete = true - - egress { - description = "Allow all outbound traffic" - from_port = 0 - to_port = 0 - protocol = "-1" - cidr_blocks = ["0.0.0.0/0"] - ipv6_cidr_blocks = ["::/0"] - } - - ingress { - description = "Allows inbound traffic from Datahub Generalized Metadata Service (GMS)" - from_port = 8081 - to_port = 8081 - protocol = "TCP" - security_groups = [var.datahub_gms_security_group_id] - } - - ingress { - description = "Allows inbound traffic from Datahub Metadata Audit Event (MAE)" - from_port = 8081 - to_port = 8081 - protocol = "TCP" - security_groups = [var.datahub_mae_consumer_security_group_id] - } - - ingress { - description = "Allows inbound traffic from Datahub Metadata Change Event (MCE)" - from_port = 8081 - to_port = 8081 - protocol = "TCP" - security_groups = [var.datahub_mce_consumer_security_group_id] - } - - ingress { - description = "Allows inbound traffic from Kafka" - from_port = 8081 - to_port = 8081 - protocol = "TCP" - security_groups = [var.kafka_security_group_id] - } - - ingress { - description = "Allow inbound traffic from schema registry ALB" - from_port = 8081 - to_port = 8081 - protocol = "TCP" - security_groups = [var.schema_registry_alb_security_group_id] - } - - tags = merge(var.tags, { - "Name" : "Schema Registry ECS Service" - }) -} - -resource "aws_ecs_service" "schema_registry" { - name = "${var.identifier_prefix}kafka-schema-registry" - cluster = aws_ecs_cluster.schema_registry.id - task_definition = aws_ecs_task_definition.schema_registry.arn - desired_count = 2 - //iam_role = aws_iam_role.schema_registry.arn - - launch_type = "FARGATE" - - network_configuration { - subnets = data.aws_subnet.subnets.*.id - security_groups = [aws_security_group.schema_registry_service.id] - assign_public_ip = false - } - - load_balancer { - target_group_arn = aws_alb_target_group.schema_registry.arn - container_name = "schema-registry" - container_port = 8081 - } - - lifecycle { - ignore_changes = [desired_count] - } -} diff --git a/terraform/modules/kafka-schema-registry/30-register-schemas.tf b/terraform/modules/kafka-schema-registry/30-register-schemas.tf deleted file mode 100644 index 70840db66..000000000 --- a/terraform/modules/kafka-schema-registry/30-register-schemas.tf +++ /dev/null @@ -1,21 +0,0 @@ -resource "null_resource" "register_schemas" { - for_each = toset(var.topics) - triggers = { - shell_hash = filesha256("${path.module}/schemas/${each.value}.json") - } - - provisioner "local-exec" { - interpreter = ["bash", "-c"] - command = < $key_file -chmod 400 $key_file -schema_string=$(jq -c . $path_to_schema_file | jq -R) - - -ssh -4 -i $key_file -f -M \ - -L 8081:${schema_registry_url//\"}:8081 \ - -o "UserKnownHostsFile=/dev/null" \ - -o "StrictHostKeyChecking=no" \ - -o ProxyCommand="aws ssm start-session --target %h --document AWS-StartSSHSession --parameters portNumber=%p --region=eu-west-2" \ - -o ExitOnForwardFailure=yes \ - ec2-user@${instance_id//\"} \ - sleep 10 - -curl -X POST -H "Content-Type: application/vnd.schemaregistry.v1+json" --data "{ \"schema\": ${schema_string} }" "http://localhost:8081/subjects/$topic_name-value/versions" - -rm -f $key_file diff --git a/terraform/modules/kafka-test-lambda/00-init.tf b/terraform/modules/kafka-test-lambda/00-init.tf deleted file mode 100644 index 71b8dc4a7..000000000 --- a/terraform/modules/kafka-test-lambda/00-init.tf +++ /dev/null @@ -1,14 +0,0 @@ -/* This defines the configuration of Terraform and AWS required Terraform Providers. - As this is a module, we don't have any explicity Provider blocks declared, as these - will be inherited from the parent Terraform. -*/ -terraform { - required_version = "~> 1.0" - - required_providers { - aws = { - source = "hashicorp/aws" - version = ">=4.0, <6.0" - } - } -} diff --git a/terraform/modules/kafka-test-lambda/01-inputs-required.tf b/terraform/modules/kafka-test-lambda/01-inputs-required.tf deleted file mode 100644 index 3bac76520..000000000 --- a/terraform/modules/kafka-test-lambda/01-inputs-required.tf +++ /dev/null @@ -1,52 +0,0 @@ -variable "tags" { - description = "AWS tags" - type = map(string) -} - -variable "identifier_prefix" { - description = "Project wide resource identifier prefix" - type = string -} - -variable "lambda_artefact_storage_bucket" { - type = string -} - -variable "kafka_cluster_arn" { - type = string -} - -variable "kafka_cluster_kms_key_arn" { - type = string -} - -variable "kafka_cluster_name" { - type = string -} - -variable "kafka_security_group_id" { - type = list(string) -} - -variable "lambda_name" { - type = string - - validation { - condition = length(var.lambda_name) <= 51 - error_message = "The lambda_name must be less than 51 characters long." - } -} - -variable "lambda_environment_variables" { - description = "An object containing environment variables to be used in the Lambda" - type = map(string) -} - -variable "vpc_id" { - description = "VPC ID to deploy the kafta instance into" - type = string -} - -variable "subnet_ids" { - type = list(string) -} diff --git a/terraform/modules/kafka-test-lambda/02-inputs-optional.tf b/terraform/modules/kafka-test-lambda/02-inputs-optional.tf deleted file mode 100644 index e69de29bb..000000000 diff --git a/terraform/modules/kafka-test-lambda/03-input-derived.tf b/terraform/modules/kafka-test-lambda/03-input-derived.tf deleted file mode 100644 index 8fc4b38cc..000000000 --- a/terraform/modules/kafka-test-lambda/03-input-derived.tf +++ /dev/null @@ -1 +0,0 @@ -data "aws_caller_identity" "current" {} diff --git a/terraform/modules/kafka-test-lambda/10-lambda.tf b/terraform/modules/kafka-test-lambda/10-lambda.tf deleted file mode 100644 index b00db83ca..000000000 --- a/terraform/modules/kafka-test-lambda/10-lambda.tf +++ /dev/null @@ -1,178 +0,0 @@ -data "aws_iam_policy_document" "lambda_assume_role" { - statement { - actions = [ - "sts:AssumeRole" - ] - principals { - identifiers = [ - "lambda.amazonaws.com", - "kafka.amazonaws.com" - ] - type = "Service" - } - } -} - -locals { - command = "make install-requirements" - confluent_kafka_command = "docker run -v \"$PWD\":/var/task \"lambci/lambda:build-python3.8\" /bin/sh -c \"pip install -r requirements.txt -t python/lib/python3.8/site-packages/; exit\"" - # This ensures that this data resource will not be evaluated until - # after the null_resource has been created. - lambda_exporter_id = null_resource.run_install_requirements.id - - # This value gives us something to implicitly depend on - # in the archive_file below. - source_dir = "../../lambdas/kafka_test" -} - -resource "aws_iam_role" "lambda" { - tags = var.tags - name = lower("${var.identifier_prefix}${var.lambda_name}") - assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json -} - -data "aws_iam_policy_document" "lambda" { - statement { - actions = [ - "logs:CreateLogGroup", - "logs:CreateLogStream", - "logs:PutLogEvents", - ] - effect = "Allow" - resources = [ - "*" - ] - } - statement { - effect = "Allow" - actions = [ - "ec2:CreateNetworkInterface", - "ec2:DescribeNetworkInterfaces", - "ec2:DescribeVpcs", - "ec2:DeleteNetworkInterface", - "ec2:DescribeSubnets", - "ec2:DescribeSecurityGroups" - ] - resources = ["*"] - } - statement { - effect = "Allow" - actions = [ - "kafka:DescribeCluster", - "kafka:DescribeClusterV2", - "kafka:GetBootstrapBrokers" - ] - resources = ["*"] - } - statement { - effect = "Allow" - actions = [ - "kafka-cluster:Connect", - "kafka-cluster:AlterCluster", - "kafka-cluster:DescribeCluster", - "kafka-cluster:DescribeGroup", - "kafka-cluster:AlterGroup", - "kafka-cluster:DescribeTopic", - "kafka-cluster:ReadData", - "kafka-cluster:WriteData", - "kafka-cluster:*Topic*", - "kafka-cluster:DescribeClusterDynamicConfiguration" - ] - resources = [ - var.kafka_cluster_arn, - "arn:aws:kafka:eu-west-2:${data.aws_caller_identity.current.account_id}:cluster/${var.kafka_cluster_name}/*", - "arn:aws:kafka:eu-west-2:${data.aws_caller_identity.current.account_id}:topic/${var.kafka_cluster_name}/*", - "arn:aws:kafka:eu-west-2:${data.aws_caller_identity.current.account_id}:group/${var.kafka_cluster_name}/*" - ] - } - - statement { - effect = "Allow" - actions = [ - "kms:Encrypt", - "kms:Decrypt", - "kms:ReEncrypt*", - "kms:GenerateDataKey*", - "kms:DescribeKey", - "kms:CreateGrant", - "kms:RetireGrant" - ] - resources = [ - var.kafka_cluster_kms_key_arn - ] - } -} - -resource "aws_iam_policy" "lambda" { - tags = var.tags - - name_prefix = lower("${var.identifier_prefix}${var.lambda_name}") - policy = data.aws_iam_policy_document.lambda.json -} - -resource "aws_iam_role_policy_attachment" "lambda" { - - role = aws_iam_role.lambda.name - policy_arn = aws_iam_policy.lambda.arn -} - - -resource "null_resource" "run_install_requirements" { - triggers = { - dir_sha1 = sha1(join("", [for f in fileset(path.module, "../../../lambdas/kafka_test/**") : filesha1("${path.module}/${f}")])) - } - - provisioner "local-exec" { - interpreter = ["bash", "-c"] - command = local.command - working_dir = "${path.module}/../../../lambdas/kafka_test/" - } -} - -data "archive_file" "lambda" { - type = "zip" - source_dir = local.source_dir - output_path = "../../lambdas/kafka_test.zip" - depends_on = [null_resource.run_install_requirements] - output_file_mode = "0666" -} - -resource "aws_s3_object" "lambda" { - bucket = var.lambda_artefact_storage_bucket - key = "kafka_test.zip" - source = data.archive_file.lambda.output_path - acl = "private" - source_hash = null_resource.run_install_requirements.triggers["dir_sha1"] - depends_on = [data.archive_file.lambda] -} - -resource "aws_lambda_function" "lambda" { - tags = var.tags - - role = aws_iam_role.lambda.arn - handler = "main.lambda_handler" - runtime = "python3.8" - function_name = lower("${var.identifier_prefix}${var.lambda_name}") - s3_bucket = var.lambda_artefact_storage_bucket - s3_key = aws_s3_object.lambda.key - source_code_hash = data.archive_file.lambda.output_base64sha256 - timeout = 900 - memory_size = 256 - - ephemeral_storage { - size = 512 - } - environment { - variables = var.lambda_environment_variables - } - - vpc_config { - security_group_ids = var.kafka_security_group_id - subnet_ids = var.subnet_ids - } - - depends_on = [ - null_resource.run_install_requirements, - aws_s3_object.lambda - ] -} diff --git a/terraform/modules/kafka-test-lambda/20-security-groups.tf b/terraform/modules/kafka-test-lambda/20-security-groups.tf deleted file mode 100644 index 1f66f6ea8..000000000 --- a/terraform/modules/kafka-test-lambda/20-security-groups.tf +++ /dev/null @@ -1,15 +0,0 @@ -resource "aws_security_group" "kafka-test" { - name = "${var.identifier_prefix}kafka-test" - tags = var.tags - vpc_id = var.vpc_id - description = "Specifies rules for traffic to the kafka-test lambda" - - egress { - description = "Allow all outbound traffic within the security group" - from_port = 0 - to_port = 0 - protocol = "-1" - cidr_blocks = ["0.0.0.0/0"] - ipv6_cidr_blocks = ["::/0"] - } -} diff --git a/terraform/modules/kafka-test-lambda/99-outputs.tf b/terraform/modules/kafka-test-lambda/99-outputs.tf deleted file mode 100644 index bcc484fe7..000000000 --- a/terraform/modules/kafka-test-lambda/99-outputs.tf +++ /dev/null @@ -1,3 +0,0 @@ -output "security_group_id" { - value = aws_security_group.kafka-test.id -}