diff --git a/CHANGELOG.md b/CHANGELOG.md index 6597b10..714ed54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ Optional release notice. ## [Unreleased] - YYYY-MM-DD +- [Added] Transit Gateway egress mode for new VPCs: set `enable_transit_gateway = true` (+ `transit_gateway_id`) to route private-subnet egress through a Transit Gateway instead of NAT gateways; IPv6 egress is opt-in via `transit_gateway_ipv6_egress`. See [Transit Gateway egress](README.md#transit-gateway-egress) ([#115](https://github.com/quiltdata/iac/pull/115)) + ## [1.7.2] - 2026-06-08 - [Fixed] Bump `modules/cnames` AWS provider constraint from `~> 5.0` to `~> 6.0` so it resolves alongside the `vpc` module's `aws >= 6.28` requirement — using `quilt` + `cnames` in one root previously failed `terraform init` ([#117](https://github.com/quiltdata/iac/pull/117)) diff --git a/README.md b/README.md index 6cd7d3f..d40ca15 100644 --- a/README.md +++ b/README.md @@ -771,6 +771,67 @@ resource "aws_vpc_endpoint" "api_gateway_endpoint" { } ``` +### Transit Gateway egress + +By default a new Quilt VPC reaches the internet through Quilt-created NAT +gateways. If you operate a Transit Gateway (TGW) as your egress boundary, set +`enable_transit_gateway = true` (only with `create_new_vpc = true`). Quilt still +creates the VPC, subnets, and endpoints, but instead disables the NAT gateways +and the IPv6 egress-only IGW, attaches the VPC to your TGW (in the intra +subnets), and points each private route table's default route at the TGW. The +S3 gateway endpoint is unchanged, so bulk S3 traffic stays on the endpoint and +does **not** traverse the TGW — only genuinely external egress does. + +```hcl +module "quilt" { + # ... + create_new_vpc = true + enable_transit_gateway = true + transit_gateway_id = "tgw-0123456789abcdef0" # an existing TGW, or one created in this same config + # transit_gateway_ipv6_egress = true # only if your TGW carries IPv6 egress +} +``` + +`transit_gateway_id` may be a value known only after apply (e.g. a TGW you +create in the same Terraform configuration) — the toggle is the separate +`enable_transit_gateway` bool, so this does not break planning. + +**You must provide the egress path.** Quilt owns only the VPC→TGW leg. Before +apply, your TGW must: + +- be reachable from the deployment account — share it via + [AWS Resource Access Manager](https://aws.amazon.com/ram/) and accept the VPC + attachment (or enable auto-accept) if the TGW lives in another account; +- have route tables that forward the VPC's egress out to the internet (e.g. via + a central egress VPC / NAT) **and** route return traffic back to the VPC's + CIDR. + +**CIDR uniqueness:** a TGW cannot route between overlapping CIDRs, so any VPCs +attached to the same TGW must have non-overlapping ranges. Set `cidr` +accordingly if more than one Quilt stack shares a TGW (the default is +`10.0.0.0/16`). + +**IPv6** egress through the TGW is opt-in (`transit_gateway_ipv6_egress`, +default `false`). The VPC is dual-stack, so set this `true` **only if your TGW +actually carries IPv6 egress**: pointing `::/0` at a TGW that can't route IPv6 +black-holes those packets, and clients without +[Happy Eyeballs](https://en.wikipedia.org/wiki/Happy_Eyeballs) IPv6+IPv4 dual +stack support (e.g. Python's `requests`/`urllib3`) then stall on the connection +timeout before falling back +to IPv4. Left `false`, the new VPC has no IPv6 default route, so an IPv6 +attempt fails immediately (`ENETUNREACH`) and the client uses IPv4 with no +delay. + +**Reversibility:** removing `enable_transit_gateway` (or setting it `false`) +restores the NAT gateways and IPv6 egress-only IGW. Toggling it on or off for an +already-deployed VPC recreates/destroys NAT gateways and their Elastic IPs and +briefly interrupts egress, so do it in a maintenance window. Either direction +also **changes the stack's public egress IP** — disabling releases the NAT +Elastic IPs (AWS won't hand the same ones back), and enabling sends egress out +through the TGW's NAT instead — so anything that allowlists Quilt's egress +address (a license endpoint, a partner firewall, a SaaS IP allowlist) must be +updated, or it breaks silently. + ### Profile You may wish to set a specific AWS profile before executing `terraform` commands. diff --git a/VARIABLES.md b/VARIABLES.md index 6014c82..3e7c366 100644 --- a/VARIABLES.md +++ b/VARIABLES.md @@ -26,6 +26,9 @@ This document provides comprehensive documentation for all variables available i | `user_subnets` | `list(string)` | `null` | ALB subnet IDs (exactly 2 required for internal ALB with existing VPC) | | `user_security_group` | `string` | `null` | Security group ID for ALB access (required for existing VPC) | | `api_endpoint` | `string` | `null` | VPC endpoint ID for API Gateway (required for internal ALB with existing VPC) | +| `enable_transit_gateway` | `bool` | `false` | Route private-subnet egress through a Transit Gateway instead of NAT gateways (`create_new_vpc = true` only). Disables NAT + the IPv6 egress-only IGW; requires `transit_gateway_id`. See [Transit Gateway egress](README.md#transit-gateway-egress). | +| `transit_gateway_id` | `string` | `null` | Transit Gateway to attach to (required when `enable_transit_gateway = true`). May be a value known only after apply (e.g. a TGW created in the same configuration). | +| `transit_gateway_ipv6_egress` | `bool` | `false` | Also route IPv6 (`::/0`) egress through the TGW. Leave off unless the TGW carries IPv6 egress, otherwise IPv6 traffic would be black-holed. | ### Database Configuration Variables diff --git a/examples/main.tf b/examples/main.tf index f9e8420..4fffab2 100644 --- a/examples/main.tf +++ b/examples/main.tf @@ -137,6 +137,9 @@ module "quilt" { # user_security_group = "sg-YOUR-SECURITY-GROUP" # For ALB access # user_subnets = ["subnet-YOUR-USER-1", "subnet-YOUR-USER-2"] # For ALB (if internal = true) # api_endpoint = "vpce-YOUR-VPC-ENDPOINT" # VPC endpoint (if internal = true) + # enable_transit_gateway = true # Route private-subnet egress via a TGW instead of NAT (create_new_vpc = true only) + # transit_gateway_id = "tgw-YOUR-TRANSIT-GATEWAY-ID" # Required when enable_transit_gateway = true; the TGW must route to the internet and back + # transit_gateway_ipv6_egress = true # Only if the TGW carries IPv6 egress; off = no IPv6 default route (clients use IPv4) # CloudFormation notifications (optional) # stack_notification_arns = ["arn:aws:sns:YOUR-AWS-REGION:YOUR-ACCOUNT-ID:quilt-notifications"] diff --git a/modules/quilt/main.tf b/modules/quilt/main.tf index 80e3648..d8bf322 100644 --- a/modules/quilt/main.tf +++ b/modules/quilt/main.tf @@ -14,6 +14,10 @@ module "vpc" { cidr = var.cidr internal = var.internal + enable_transit_gateway = var.enable_transit_gateway + transit_gateway_id = var.transit_gateway_id + transit_gateway_ipv6_egress = var.transit_gateway_ipv6_egress + create_new_vpc = var.create_new_vpc existing_api_endpoint = var.api_endpoint existing_vpc_id = var.vpc_id diff --git a/modules/quilt/tests/smoke/main.tf b/modules/quilt/tests/smoke/main.tf index 1f00705..c8c287c 100644 --- a/modules/quilt/tests/smoke/main.tf +++ b/modules/quilt/tests/smoke/main.tf @@ -66,14 +66,32 @@ variable "user_subnets" { default = null } +variable "enable_transit_gateway" { + type = bool + default = false +} + +variable "transit_gateway_id" { + type = string + default = null +} + +variable "transit_gateway_ipv6_egress" { + type = bool + default = false +} + # New inputs added to the quilt module must be threaded through here, or the # smoke coverage silently narrows (the new input is never exercised). module "quilt" { source = "../../" - name = "quilt-test" - parameters = {} - template_file = "${path.module}/fixtures/quilt.yaml" + name = "quilt-test" + parameters = {} + template_file = "${path.module}/fixtures/quilt.yaml" + enable_transit_gateway = var.enable_transit_gateway + transit_gateway_id = var.transit_gateway_id + transit_gateway_ipv6_egress = var.transit_gateway_ipv6_egress create_new_vpc = var.create_new_vpc internal = var.internal diff --git a/modules/quilt/tests/smoke/smoke.tftest.hcl b/modules/quilt/tests/smoke/smoke.tftest.hcl index 5df8a71..3c169ab 100644 --- a/modules/quilt/tests/smoke/smoke.tftest.hcl +++ b/modules/quilt/tests/smoke/smoke.tftest.hcl @@ -32,6 +32,37 @@ run "new_vpc_plans" { } } +run "new_vpc_transit_gateway_plans" { + command = plan + variables { + create_new_vpc = true + internal = false + enable_transit_gateway = true + transit_gateway_id = "tgw-00000000000000000" + } + # TGW egress mode on a new VPC must plan end-to-end through the public module. + assert { + condition = output.stack_name == "quilt-test" + error_message = "The CloudFormation stack must be named after var.name" + } +} + +run "new_vpc_transit_gateway_ipv6_plans" { + command = plan + variables { + create_new_vpc = true + internal = false + enable_transit_gateway = true + transit_gateway_id = "tgw-00000000000000000" + transit_gateway_ipv6_egress = true + } + # TGW egress with IPv6 opted in must also plan end-to-end. + assert { + condition = output.stack_name == "quilt-test" + error_message = "The CloudFormation stack must be named after var.name" + } +} + run "new_vpc_internal_plans" { command = plan variables { diff --git a/modules/quilt/variables.tf b/modules/quilt/variables.tf index dad6263..7c3e1c9 100644 --- a/modules/quilt/variables.tf +++ b/modules/quilt/variables.tf @@ -29,6 +29,28 @@ variable "internal" { description = "If true create an inward ELBv2, else create an internet-facing ELBv2." } +variable "enable_transit_gateway" { + type = bool + default = false + description = "Route private subnet egress through a Transit Gateway instead of NAT gateways. Only supported when create_new_vpc == true. When true, transit_gateway_id is required, and NAT gateways and the IPv6 egress-only gateway are disabled. (Toggle is a separate bool so transit_gateway_id may be a value known only after apply, e.g. a TGW created in the same configuration.)" +} + +variable "transit_gateway_id" { + type = string + default = null + description = "Transit Gateway ID for private subnet egress. Required when enable_transit_gateway == true; may be a computed value (e.g. a TGW created in the same configuration)." + validation { + condition = var.transit_gateway_id == null || can(regex("^tgw-[0-9a-f]+$", var.transit_gateway_id)) + error_message = "transit_gateway_id must be null or a valid Transit Gateway ID (e.g. tgw-0123456789abcdef0)." + } +} + +variable "transit_gateway_ipv6_egress" { + type = bool + default = false + description = "When enable_transit_gateway is true, also route IPv6 (::/0) egress through the Transit Gateway. Set true only if the Transit Gateway carries IPv6 egress: pointing ::/0 at a TGW that can't route IPv6 black-holes the traffic and stalls clients without Happy Eyeballs (e.g. Python requests/urllib3) on the connection timeout. Left false (default), the VPC has no IPv6 default route, so IPv6 attempts fail immediately and clients use IPv4 with no delay. No effect when enable_transit_gateway is false." +} + variable "db_snapshot_identifier" { type = string nullable = true diff --git a/modules/vpc/main.tf b/modules/vpc/main.tf index e8c928a..c66d32f 100644 --- a/modules/vpc/main.tf +++ b/modules/vpc/main.tf @@ -17,6 +17,7 @@ locals { "user_security_group (required)" : var.existing_user_security_group != null, "user_subnets (required if var.internal == true and var.create_new_vpc == false, else must be null)" : (var.internal && !var.create_new_vpc) == (var.existing_user_subnets != null) "api_endpoint (required if var.internal == true, else must be null)" : var.internal == (var.existing_api_endpoint != null), + "enable_transit_gateway == false (TGW egress requires create_new_vpc == true)" : var.enable_transit_gateway == false, } new_network_requires = { "create_new_vpc == true" : var.create_new_vpc == true, @@ -32,6 +33,11 @@ locals { new_network_valid = alltrue(values(local.new_network_requires)) configuration_error = !local.existing_network_valid && !local.new_network_valid + # TGW egress is gated on the bool (not transit_gateway_id != null) so the + # resource counts stay known at plan time even when transit_gateway_id is a + # computed value (e.g. a TGW created in the same configuration). + transit_gateway_enabled = local.new_network_valid && var.enable_transit_gateway + azs = slice(data.aws_availability_zones.available.names, 0, 2) subnet_cidrs = [for k, v in local.azs : cidrsubnet(var.cidr, 1, k)] } @@ -71,8 +77,52 @@ module "vpc" { enable_dns_hostnames = true enable_dns_support = true - enable_nat_gateway = true - one_nat_gateway_per_az = true + enable_nat_gateway = !var.enable_transit_gateway + one_nat_gateway_per_az = !var.enable_transit_gateway + create_egress_only_igw = !var.enable_transit_gateway +} + +resource "aws_ec2_transit_gateway_vpc_attachment" "egress" { + count = local.transit_gateway_enabled ? 1 : 0 + + # Intra subnets only host the attachment ENIs (they have no internet route). + # The egress default routes go in the private route tables below — don't move + # this to private_subnets. + subnet_ids = module.vpc.intra_subnets + transit_gateway_id = var.transit_gateway_id + vpc_id = module.vpc.vpc_id + ipv6_support = var.transit_gateway_ipv6_egress ? "enable" : "disable" + + tags = { + Name = "${var.name}-egress" + } + + lifecycle { + precondition { + condition = var.transit_gateway_id != null + error_message = "transit_gateway_id is required when enable_transit_gateway is true." + } + } +} + +resource "aws_route" "private_tgw_ipv4_egress" { + count = local.transit_gateway_enabled ? length(module.vpc.private_route_table_ids) : 0 + + route_table_id = module.vpc.private_route_table_ids[count.index] + destination_cidr_block = "0.0.0.0/0" + transit_gateway_id = var.transit_gateway_id + + depends_on = [aws_ec2_transit_gateway_vpc_attachment.egress] +} + +resource "aws_route" "private_tgw_ipv6_egress" { + count = local.transit_gateway_enabled && var.transit_gateway_ipv6_egress ? length(module.vpc.private_route_table_ids) : 0 + + route_table_id = module.vpc.private_route_table_ids[count.index] + destination_ipv6_cidr_block = "::/0" + transit_gateway_id = var.transit_gateway_id + + depends_on = [aws_ec2_transit_gateway_vpc_attachment.egress] } // Module name no longer accurate (see description); changing name causes tf apply to fail diff --git a/modules/vpc/tests/validation.tftest.hcl b/modules/vpc/tests/validation.tftest.hcl index 064bf0c..918d1fe 100644 --- a/modules/vpc/tests/validation.tftest.hcl +++ b/modules/vpc/tests/validation.tftest.hcl @@ -182,3 +182,187 @@ run "existing_vpc_missing_inputs_is_rejected" { # create_new_vpc = false without the required existing_* inputs is incomplete. expect_failures = [output.configuration_error] } + +# --- Transit Gateway egress mode -------------------------------------------- + +run "new_vpc_with_transit_gateway" { + command = plan + + variables { + create_new_vpc = true + internal = false + enable_transit_gateway = true + transit_gateway_id = "tgw-00000000000000000" + existing_vpc_id = null + existing_api_endpoint = null + existing_intra_subnets = null + existing_private_subnets = null + existing_public_subnets = null + existing_user_security_group = null + existing_user_subnets = null + } + + # A new VPC with enable_transit_gateway is the supported TGW egress mode and + # must plan cleanly. + assert { + condition = !strcontains(output.configuration_error, "❌") + error_message = "A new VPC with transit_gateway_id must satisfy every requirement" + } + + # The TGW attachment and the IPv4 default egress route must be planned, + # pointed at the supplied gateway. + assert { + condition = length(aws_ec2_transit_gateway_vpc_attachment.egress) == 1 + error_message = "Exactly one TGW VPC attachment must be created" + } + + assert { + condition = aws_ec2_transit_gateway_vpc_attachment.egress[0].transit_gateway_id == "tgw-00000000000000000" + error_message = "The TGW attachment must target the supplied transit_gateway_id" + } + + assert { + condition = length(aws_route.private_tgw_ipv4_egress) > 0 + error_message = "IPv4 default egress routes to the TGW must be planned" + } + + # IPv6 egress via the TGW is opt-in (transit_gateway_ipv6_egress, default + # false). With it off, no ::/0 route is created and the attachment does not + # advertise IPv6 support, so IPv6 traffic is not black-holed at the TGW. + assert { + condition = length(aws_route.private_tgw_ipv6_egress) == 0 + error_message = "No IPv6 egress route should be planned when transit_gateway_ipv6_egress is false" + } + + assert { + condition = aws_ec2_transit_gateway_vpc_attachment.egress[0].ipv6_support == "disable" + error_message = "The TGW attachment must not advertise IPv6 support when transit_gateway_ipv6_egress is false" + } +} + +run "new_vpc_with_transit_gateway_ipv6_egress" { + command = plan + + variables { + create_new_vpc = true + internal = false + enable_transit_gateway = true + transit_gateway_id = "tgw-00000000000000000" + transit_gateway_ipv6_egress = true + existing_vpc_id = null + existing_api_endpoint = null + existing_intra_subnets = null + existing_private_subnets = null + existing_public_subnets = null + existing_user_security_group = null + existing_user_subnets = null + } + + # Opting in routes the IPv6 default route through the TGW and enables IPv6 + # support on the attachment. + assert { + condition = length(aws_route.private_tgw_ipv6_egress) > 0 + error_message = "IPv6 egress routes to the TGW must be planned when transit_gateway_ipv6_egress is true" + } + + assert { + condition = aws_ec2_transit_gateway_vpc_attachment.egress[0].ipv6_support == "enable" + error_message = "The TGW attachment must advertise IPv6 support when transit_gateway_ipv6_egress is true" + } +} + +run "existing_vpc_with_transit_gateway_is_rejected" { + command = plan + + variables { + create_new_vpc = false + internal = false + enable_transit_gateway = true + transit_gateway_id = "tgw-00000000000000000" + existing_vpc_id = "vpc-00000000000000000" + existing_api_endpoint = null + existing_intra_subnets = ["subnet-intra-a", "subnet-intra-b"] + existing_private_subnets = ["subnet-priv-a", "subnet-priv-b"] + existing_public_subnets = ["subnet-pub-a", "subnet-pub-b"] + existing_user_security_group = "sg-00000000000000000" + existing_user_subnets = null + } + + # enable_transit_gateway is only supported with create_new_vpc = true; + # combining it with an existing VPC must fail fast rather than silently + # ignore the request. + expect_failures = [output.configuration_error] +} + +run "transit_gateway_enabled_without_id_is_rejected" { + command = plan + + variables { + create_new_vpc = true + internal = false + enable_transit_gateway = true + transit_gateway_id = null + existing_vpc_id = null + existing_api_endpoint = null + existing_intra_subnets = null + existing_private_subnets = null + existing_public_subnets = null + existing_user_security_group = null + existing_user_subnets = null + } + + # enable_transit_gateway = true requires a transit_gateway_id; the attachment + # precondition must reject a null id. + expect_failures = [aws_ec2_transit_gateway_vpc_attachment.egress] +} + +run "transit_gateway_id_invalid_format_is_rejected" { + command = plan + + variables { + create_new_vpc = true + internal = false + enable_transit_gateway = true + transit_gateway_id = "not-a-tgw-id" + existing_vpc_id = null + existing_api_endpoint = null + existing_intra_subnets = null + existing_private_subnets = null + existing_public_subnets = null + existing_user_security_group = null + existing_user_subnets = null + } + + # A malformed transit_gateway_id must be rejected by the variable validation. + expect_failures = [var.transit_gateway_id] +} + +run "transit_gateway_id_without_enable_is_noop" { + command = plan + + variables { + create_new_vpc = true + internal = false + enable_transit_gateway = false + transit_gateway_id = "tgw-00000000000000000" + existing_vpc_id = null + existing_api_endpoint = null + existing_intra_subnets = null + existing_private_subnets = null + existing_public_subnets = null + existing_user_security_group = null + existing_user_subnets = null + } + + # transit_gateway_id is the value, enable_transit_gateway is the toggle: an id + # set without enabling the mode is a no-op — no attachment, no TGW routes. + assert { + condition = length(aws_ec2_transit_gateway_vpc_attachment.egress) == 0 + error_message = "No TGW attachment should be created when enable_transit_gateway is false" + } + + assert { + condition = length(aws_route.private_tgw_ipv4_egress) == 0 && length(aws_route.private_tgw_ipv6_egress) == 0 + error_message = "No TGW egress routes should be created when enable_transit_gateway is false" + } +} diff --git a/modules/vpc/variables.tf b/modules/vpc/variables.tf index f05b3f3..2dfffcb 100644 --- a/modules/vpc/variables.tf +++ b/modules/vpc/variables.tf @@ -23,6 +23,28 @@ variable "internal" { nullable = false } +variable "enable_transit_gateway" { + type = bool + default = false + description = "Route private subnet egress through a Transit Gateway instead of NAT gateways. Only supported when create_new_vpc == true. When true, transit_gateway_id is required, and NAT gateways and the IPv6 egress-only gateway are disabled. (Toggle is a separate bool so transit_gateway_id may be a value known only after apply, e.g. a TGW created in the same configuration.)" +} + +variable "transit_gateway_id" { + type = string + default = null + description = "Transit Gateway ID for private subnet egress. Required when enable_transit_gateway == true; may be a computed value (e.g. a TGW created in the same configuration)." + validation { + condition = var.transit_gateway_id == null || can(regex("^tgw-[0-9a-f]+$", var.transit_gateway_id)) + error_message = "transit_gateway_id must be null or a valid Transit Gateway ID (e.g. tgw-0123456789abcdef0)." + } +} + +variable "transit_gateway_ipv6_egress" { + type = bool + default = false + description = "When enable_transit_gateway is true, also route IPv6 (::/0) egress through the Transit Gateway. Set true only if the Transit Gateway carries IPv6 egress: pointing ::/0 at a TGW that can't route IPv6 black-holes the traffic and stalls clients without Happy Eyeballs (e.g. Python requests/urllib3) on the connection timeout. Left false (default), the VPC has no IPv6 default route, so IPv6 attempts fail immediately and clients use IPv4 with no delay. No effect when enable_transit_gateway is false." +} + variable "existing_vpc_id" { type = string }