Skip to content

Commit 66b7829

Browse files
authored
enhancement: Add spot instance and managed node group support (#200)
* enhancement: Add spot instance and managed node group support * chore: fix references to map functions removed in tf 0.15 * fix: Pin the terraform version in the validation gha workflow because of issues with submodules when using TF 0.15
1 parent ad0b60b commit 66b7829

File tree

15 files changed

+151
-88
lines changed

15 files changed

+151
-88
lines changed

.github/workflows/terraform.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ jobs:
1313
steps:
1414
- uses: actions/checkout@v2
1515
- uses: hashicorp/setup-terraform@v1
16+
with:
17+
terraform_version: 0.14.8 # Required as of Apr 15 2021 because of breaking changes in tf 0.15
18+
1619
- name: Install Zero
1720
id: install_zero
1821
run: |

templates/kubernetes/terraform/environments/prod/main.tf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,4 +106,8 @@ module "kubernetes" {
106106
# Should not be less than 2 for production. 2 can handle a significant amount of traffic and should give a reasonable amount of redundancy in the case of
107107
# needing to do deployments of the controller or unexpected termination of a node with a controller pod on it.
108108
nginx_ingress_replicas = 2
109+
110+
# The Node Termination Handler should be enabled when using spot instances in your cluster, as it is responsible for gracefully draining a node that is due to be terminated.
111+
# It can also be used to cleanly handle scheduled maintenance events on On-Demand instances, though it runs as a daemonset, so will run 1 pod on each node in your cluster.
112+
enable_node_termination_handler = false
109113
}

templates/kubernetes/terraform/environments/stage/main.tf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,8 @@ module "kubernetes" {
103103
cache_store = "<% index .Params `cacheStore` %>"
104104

105105
nginx_ingress_replicas = 1
106+
107+
# The Node Termination Handler should be enabled when using spot instances in your cluster, as it is responsible for gracefully draining a node that is due to be terminated.
108+
# It can also be used to cleanly handle scheduled maintenance events on On-Demand instances, though it runs as a daemonset, so will run 1 pod on each node in your cluster.
109+
enable_node_termination_handler = true
106110
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
locals {
2+
termination_handler_namespace = "kube-system"
3+
termination_handler_helm_values = {
4+
jsonLogging : true
5+
enablePrometheusServer : (var.metrics_type == "prometheus") ? 1 : 0
6+
7+
podMonitor : {
8+
create : (var.metrics_type == "prometheus")
9+
}
10+
}
11+
}
12+
13+
14+
resource "helm_release" "node_termination_handler" {
15+
count = var.enable_node_termination_handler ? 1 : 0
16+
name = "node-termination-handler"
17+
repository = "https://aws.github.io/eks-charts"
18+
chart = "aws-node-termination-handler"
19+
version = "0.15.0"
20+
namespace = local.termination_handler_namespace
21+
values = [jsonencode(local.termination_handler_helm_values)]
22+
}
23+

templates/kubernetes/terraform/modules/kubernetes/user_auth.tf

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,7 @@
1-
locals {
2-
# To prevent coupling to rds engine names
3-
type_map = {
4-
"postgres" : "postgres",
5-
"mysql" : "mysql",
6-
}
7-
db_type = local.type_map[data.aws_db_instance.database.engine]
8-
}
9-
101
module "user_auth" {
11-
count = length(var.user_auth)
12-
source = "commitdev/zero/aws//modules/user_auth"
13-
version = "0.1.21"
2+
count = length(var.user_auth)
3+
source = "commitdev/zero/aws//modules/user_auth"
4+
version = "0.1.21"
145

156
name = var.user_auth[count.index].name
167
auth_namespace = var.user_auth[count.index].auth_namespace

templates/kubernetes/terraform/modules/kubernetes/variables.tf

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ variable "metrics_type" {
7070

7171
variable "application_policy_list" {
7272
description = "Application policies"
73-
type = list
73+
type = list(any)
7474
default = []
7575
}
7676

@@ -153,3 +153,9 @@ variable "nginx_ingress_replicas" {
153153
type = number
154154
default = 2
155155
}
156+
157+
variable "enable_node_termination_handler" {
158+
description = "The Node Termination Handler should be enabled when using spot instances in your cluster, as it is responsible for gracefully draining a node that is due to be terminated. It can also be used to cleanly handle scheduled maintenance events on On-Demand instances, though it runs as a daemonset, so will run 1 pod on each node in your cluster"
159+
type = bool
160+
default = false
161+
}

templates/terraform/bootstrap/secrets/main.tf

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ module "rds_master_secret_stage" {
2020
name = "${local.project}-stage-rds-<% index .Params `randomSeed` %>"
2121
type = "random"
2222
random_length = 32
23-
tags = map("rds", "${local.project}-stage")
23+
tags = { rds: "${local.project}-stage" }
2424
}
2525

2626
module "rds_master_secret_prod" {
@@ -30,7 +30,7 @@ module "rds_master_secret_prod" {
3030
name = "${local.project}-prod-rds-<% index .Params `randomSeed` %>"
3131
type = "random"
3232
random_length = 32
33-
tags = map("rds", "${local.project}-prod")
33+
tags = { rds: "${local.project}-prod" }
3434
}
3535

3636
module "sendgrid_api_key" {
@@ -41,7 +41,7 @@ module "sendgrid_api_key" {
4141
name = "${local.project}-sendgrid-<% index .Params `randomSeed` %>"
4242
type = "string"
4343
value = var.sendgrid_api_key
44-
tags = map("sendgrid", local.project)
44+
tags = { sendgrid: local.project }
4545
}
4646

4747
module "slack_api_key" {
@@ -52,5 +52,5 @@ module "slack_api_key" {
5252
name = "${local.project}-slack-<% index .Params `randomSeed` %>"
5353
type = "string"
5454
value = var.slack_api_key
55-
tags = map("slack", local.project)
55+
tags = { slack: local.project }
5656
}

templates/terraform/environments/prod/main.tf

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ provider "aws" {
2929
allowed_account_ids = [local.account_id]
3030
}
3131

32-
# remote state of "shared"
32+
# remote state of "shared" - contains mostly IAM users that will be shared between environments
3333
data "terraform_remote_state" "shared" {
3434
backend = "s3"
3535
config = {
@@ -56,14 +56,11 @@ module "prod" {
5656
ecr_repositories = [] # Should be created by the staging environment
5757

5858
# EKS configuration
59-
eks_cluster_version = "1.18"
60-
eks_worker_instance_type = "t3.medium"
61-
eks_worker_asg_min_size = 2
62-
eks_worker_asg_max_size = 4
63-
64-
# EKS-Optimized AMI for your region: https://docs.aws.amazon.com/eks/latest/userguide/eks-optimized-ami.html
65-
# https://<% index .Params `region` %>.console.aws.amazon.com/systems-manager/parameters/%252Faws%252Fservice%252Feks%252Foptimized-ami%252F1.18%252Famazon-linux-2%252Frecommended%252Fimage_id/description?region=<% index .Params `region` %>
66-
eks_worker_ami = "<% index .Params `eksWorkerAMI` %>"
59+
eks_cluster_version = "1.19"
60+
eks_worker_instance_types = ["t3.medium"]
61+
eks_worker_asg_min_size = 2
62+
eks_worker_asg_max_size = 4
63+
eks_use_spot_instances = false
6764

6865
# Hosting configuration. Each domain will have a bucket created for it, but may have mulitple aliases pointing to the same bucket.
6966
# Note that because of the way terraform handles lists, new records should be added to the end of the list.
@@ -101,11 +98,11 @@ module "prod" {
10198

10299
# Logging configuration
103100
logging_type = "<% index .Params `loggingType` %>"
104-
<% if ne (index .Params `loggingType`) "kibana" %># <% end %>logging_es_version = "7.9"
105-
<% if ne (index .Params `loggingType`) "kibana" %># <% end %>logging_az_count = "2"
106-
<% if ne (index .Params `loggingType`) "kibana" %># <% end %>logging_es_instance_type = "m5.large.elasticsearch"
107-
<% if ne (index .Params `loggingType`) "kibana" %># <% end %>logging_es_instance_count = "2" # Must be a mulitple of the az count
108-
<% if ne (index .Params `loggingType`) "kibana" %># <% end %>logging_volume_size_in_gb = "50" # Maximum value is limited by the instance type
101+
<% if ne (index .Params `loggingType`) "kibana" %># <% end %>logging_es_version = "7.9"
102+
<% if ne (index .Params `loggingType`) "kibana" %># <% end %>logging_az_count = "2"
103+
<% if ne (index .Params `loggingType`) "kibana" %># <% end %>logging_es_instance_type = "t2.medium.elasticsearch" # The next larger instance type is "m5.large.elasticsearch" - upgrading an existing cluster may require fully recreating though, as m5.large is the first instance size which supports disk encryption
104+
<% if ne (index .Params `loggingType`) "kibana" %># <% end %>logging_es_instance_count = "2" # Must be a mulitple of the az count
105+
<% if ne (index .Params `loggingType`) "kibana" %># <% end %>logging_volume_size_in_gb = "35" # Maximum value is limited by the instance type
109106
<% if ne (index .Params `loggingType`) "kibana" %># <% end %>logging_create_service_role = false # If in the same AWS account, this would have already been created by the staging env
110107
# See https://docs.aws.amazon.com/elasticsearch-service/latest/developerguide/aes-limits.html
111108

@@ -118,9 +115,11 @@ module "prod" {
118115
## Check https://docs.aws.amazon.com/AmazonElastiCache/latest/mem-ug/SelectEngine.html to compare redis or memcached.
119116
cache_store = "<% index .Params `cacheStore` %>"
120117

118+
<% if ne (index .Params `cacheStore`) "none" %>
121119
## See how to define node and instance type: https://docs.aws.amazon.com/AmazonElastiCache/latest/mem-ug/nodes-select-size.html
122120
cache_cluster_size = 1
123121
cache_instance_type = "cache.r6g.large"
122+
<% end %>
124123

125124
# Roles configuration
126125
roles = [

templates/terraform/bootstrap/secrets/eks_creator_user.tf renamed to templates/terraform/environments/shared/eks_creator_user.tf

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ data "aws_iam_policy_document" "assumerole_root_only_policy" {
1818

1919
principals {
2020
type = "AWS"
21-
identifiers = [local.aws_account_id]
21+
identifiers = [local.account_id]
2222
}
2323
}
2424
}
@@ -40,6 +40,7 @@ resource "aws_iam_role_policy" "eks_cluster_creator" {
4040
# Allow the cluster creator role to create a cluster
4141
data "aws_iam_policy_document" "eks_manage" {
4242
statement {
43+
effect = "Allow"
4344
actions = [
4445
"eks:*",
4546
"ec2:*",
@@ -60,6 +61,7 @@ data "aws_iam_policy_document" "eks_manage" {
6061
}
6162

6263
statement {
64+
effect = "Allow"
6365
actions = [
6466
"iam:GetRole",
6567
"iam:PassRole",
@@ -70,11 +72,41 @@ data "aws_iam_policy_document" "eks_manage" {
7072
"iam:AttachRolePolicy",
7173
"iam:DetachRolePolicy",
7274
"iam:ListAttachedRolePolicies",
73-
"iam:ListRolePolicies"
75+
"iam:ListRolePolicies",
76+
"iam:CreatePolicy",
77+
"iam:GetPolicy",
78+
"iam:DeletePolicy",
79+
"iam:GetPolicyVersion",
80+
"iam:ListPolicyVersions",
7481
]
7582
resources = [
76-
"arn:aws:iam::${local.aws_account_id}:role/${local.project}-*",
77-
"arn:aws:iam::${local.aws_account_id}:role/k8s-${local.project}-*",
83+
"arn:aws:iam::${local.account_id}:role/${local.project}-*",
84+
"arn:aws:iam::${local.account_id}:role/k8s-${local.project}-*",
85+
"arn:aws:iam::${local.account_id}:policy/${local.project}-*",
7886
]
7987
}
88+
89+
statement {
90+
effect = "Allow"
91+
actions = ["iam:GetRole"]
92+
resources = ["arn:aws:iam::${local.account_id}:role/*"]
93+
}
94+
95+
statement {
96+
effect = "Allow"
97+
actions = ["iam:CreateServiceLinkedRole"]
98+
resources = ["*"]
99+
100+
condition {
101+
test = "StringEquals"
102+
variable = "iam:AWSServiceName"
103+
104+
values = [
105+
"eks.amazonaws.com",
106+
"eks-nodegroup.amazonaws.com",
107+
"eks-fargate.amazonaws.com",
108+
]
109+
}
110+
}
111+
80112
}

templates/terraform/environments/shared/main.tf

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@ locals {
2626
# Users configuration
2727
ci_user_name = "${local.project}-ci-user"
2828
users = [
29-
{
30-
name = local.ci_user_name
31-
roles = [
32-
{ name = "deployer", environments = ["stage", "prod"] }
33-
]
34-
global_roles = []
35-
create_access_keys = true
36-
# },
29+
{
30+
name = local.ci_user_name
31+
roles = [
32+
{ name = "deployer", environments = ["stage", "prod"] }
33+
]
34+
global_roles = []
35+
create_access_keys = true
36+
},
3737
# {
3838
# name = "dev1"
3939
# roles = [
@@ -58,7 +58,7 @@ locals {
5858
# ]
5959
# global_roles = ["mfa-required", "console-allowed"]
6060
# create_access_keys = false
61-
},
61+
# },
6262
]
6363
}
6464

@@ -85,7 +85,7 @@ resource "aws_iam_group_membership" "mfa_required_group" {
8585

8686
group = aws_iam_group.mfa_required.name
8787

88-
depends_on = [ aws_iam_user.access_user ]
88+
depends_on = [aws_iam_user.access_user]
8989
}
9090

9191
resource "aws_iam_group_membership" "console_allowed_group" {
@@ -97,35 +97,38 @@ resource "aws_iam_group_membership" "console_allowed_group" {
9797

9898
group = aws_iam_group.console_allowed.name
9999

100-
depends_on = [ aws_iam_user.access_user ]
100+
depends_on = [aws_iam_user.access_user]
101101
}
102102

103103
## Create access/secret key pair and save to secret manager
104104
resource "aws_iam_access_key" "access_user" {
105-
for_each = { for u in local.users : u.name => u.roles if u.create_access_keys}
105+
for_each = { for u in local.users : u.name => u.roles if u.create_access_keys }
106106

107107
user = aws_iam_user.access_user[each.key].name
108108

109-
depends_on = [ aws_iam_user.access_user ]
109+
depends_on = [aws_iam_user.access_user]
110110
}
111111

112112
module "secret_keys" {
113113
source = "commitdev/zero/aws//modules/secret"
114114
version = "0.0.2"
115115

116-
for_each = { for u in local.users : u.name => u.roles if u.create_access_keys}
116+
for_each = { for u in local.users : u.name => u.roles if u.create_access_keys }
117117

118-
name = "${each.key}-aws-keys${local.random_seed}"
119-
type = "map"
120-
values = map("access_key_id", aws_iam_access_key.access_user[each.key].id, "secret_key", aws_iam_access_key.access_user[each.key].secret)
121-
tags = map("project", local.project)
118+
name = "${each.key}-aws-keys${local.random_seed}"
119+
type = "map"
120+
values = {
121+
access_key_id : aws_iam_access_key.access_user[each.key].id,
122+
secret_key : aws_iam_access_key.access_user[each.key].secret
123+
}
124+
tags = { project : local.project }
122125

123-
depends_on = [ aws_iam_access_key.access_user ]
126+
depends_on = [aws_iam_access_key.access_user]
124127
}
125128

126129
# Enable AWS CloudTrail to help you audit governance, compliance, and operational risk of your AWS account, with logs stored in S3 bucket.
127130
module "cloudtrail" {
128-
source = "commitdev/zero/aws//modules/cloudtrail"
131+
source = "commitdev/zero/aws//modules/cloudtrail"
129132
version = "0.1.10"
130133

131134
project = local.project
@@ -141,7 +144,7 @@ output "iam_users" {
141144

142145
output "user_role_mapping" {
143146
value = [
144-
for u in local.users: {
147+
for u in local.users : {
145148
name = u.name
146149
roles = u.roles
147150
}

0 commit comments

Comments
 (0)