diff --git a/deployment/terraform/environments/private-osv/.terraform.lock.hcl b/deployment/terraform/environments/private-osv/.terraform.lock.hcl new file mode 100644 index 00000000000..afd187e452c --- /dev/null +++ b/deployment/terraform/environments/private-osv/.terraform.lock.hcl @@ -0,0 +1,42 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/google" { + version = "7.35.0" + constraints = "~> 7.35.0" + hashes = [ + "h1:reD5VR1Je0YElmqhBkluWjtVwILyx6bbeU1RvI2PFHM=", + "zh:02c62a2fdf8f9b268054a7a7c3478760e5149889e0c47572940a5503291bb8a5", + "zh:1ca325734f7c4a0f39c86caef38d618db64ca2d9b052f763f469af4e41fb8ea6", + "zh:5777b1dd32e3705735743c3749ccf826ebd2ca3ab774f912379fee2ad235e242", + "zh:61ea1eb889bd037ccf39d5108d686aff67474c1696496567eaf10c4f583e5a3d", + "zh:77308f5d2e1923dab36e320aa9774e8c09e1e4d0185d68f36eedeacd176c7a43", + "zh:841c40ba2141654aa17ab22c3690fea6fd7c2be0cdb96e519ea6360cab20a54f", + "zh:8bea49dabe822f3a852d22e30cd2faf233437b56fa102f9087cfd40b026c2fca", + "zh:8d94331d0dd2b200594aade652f78647377d79d863bcb0e17e3bf4b0a8fe3b73", + "zh:a1c8a93728b0bf7072e69846380bebee985ebc40e0f6a9277f6ba0c3b9541137", + "zh:bee175415263afe9953a5af2180db5cbba653505707d6f88e6d8dd0b04c990b5", + "zh:cd63ba21833277871da2390865fcec8317bc3513cf91bd2d5ef6144192207b76", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} + +provider "registry.terraform.io/hashicorp/google-beta" { + version = "7.35.0" + constraints = "~> 7.35.0" + hashes = [ + "h1:3Q7l97vzTchvzkgB3kcAmv81C+SygZV5ep/nTx/r+g0=", + "zh:1abd0d66802037d02536e1e1a4c99f2767157e18c56737cdf818aad5501c7d80", + "zh:1c13c1152e65eb7a86a89975546385d80752371e8d02c61c5343f7fce3ec1a8e", + "zh:2d06b2381a7d338ee6fa72330434673a15b1735b08c84bb998f53376c3003b06", + "zh:4b1151b1d94c5596258d928bbfc16197b898ee032bbe0ee2a626ba1a713523b4", + "zh:5602c7f6101fcdc8782a5154b27014304773ed72095176636cbd96185c720d2a", + "zh:668348f9a2925f0e8820c756d90476a937349fe8ed92550863fc9f681a44c04d", + "zh:737a15ac77b069689e073cd8c6a81085d683cab66d46eb9b6dca55e80ea19ab2", + "zh:88916a0d3cf8eff075c3a18d626001ec9f7ac4f2eb3e215a56133a44ab956b54", + "zh:b6191da5295f766f05558245a5bbf7b60ebc53cd868b66110d47acae89933267", + "zh:c8e777a057dc76996447d7b8c45199407a689aafc93817d7a2a21f115c010ca9", + "zh:d311d9790ff6de41f4efd9deb062f2363478f9a2b720b7eebaaf463950cd7cf8", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} diff --git a/deployment/terraform/environments/private-osv/main.tf b/deployment/terraform/environments/private-osv/main.tf new file mode 100644 index 00000000000..e4d015916af --- /dev/null +++ b/deployment/terraform/environments/private-osv/main.tf @@ -0,0 +1,42 @@ +module "osv_pipeline" { + source = "../../modules/osv_pipeline" + + project_id = "oss-vdb-test" + datastore_name = "datastore-private" + worker_service_account_id = "worker-private" + vulnerabilities_export_bucket = "osv-test-vulnerabilities-private" + affected_commits_backups_bucket = "osv-test-affected-commits-private" + pubsub_topic_name = "private-tasks" + pubsub_topic_failed_tasks_name = "failed-private-tasks" + pubsub_subscription_default_work_pool_name = "private-default-pool" + pubsub_subscription_recovery_name = "private-recovery" + cluster_name = "workers-private" + cluster_location = "us-central1-f" + cluster_master_cidr = "172.16.0.80/28" + gitter_disk_name = "gitter-disk-private" + gitter_disk_size_gb = 6144 + importer_reconciler_git_cache_disk_name = "importer-reconciler-git-cache-private" + importer_reconciler_git_cache_size_gb = 200 + subnet_name = "my-subnet-0-private" + subnet_cidr = "10.45.80.0/22" + router_name = "router-private" + nat_name = "nat-config-private" +} + + +terraform { + backend "gcs" { + bucket = "oss-vdb-tf" + prefix = "private-osv" + } + required_providers { + google = { + source = "hashicorp/google" + version = "~> 7.35.0" + } + google-beta = { + source = "hashicorp/google-beta" + version = "~> 7.35.0" + } + } +} diff --git a/deployment/terraform/modules/osv_pipeline/database.tf b/deployment/terraform/modules/osv_pipeline/database.tf new file mode 100644 index 00000000000..3c4fd3fd76d --- /dev/null +++ b/deployment/terraform/modules/osv_pipeline/database.tf @@ -0,0 +1,55 @@ +# Datastore +resource "google_firestore_database" "datastore" { + project = var.project_id + name = var.datastore_name + location_id = "us-west2" + type = "DATASTORE_MODE" +} + +# GCP Bucket where protos and full JSON exports are stored +resource "google_storage_bucket" "osv_vulnerabilities_export" { + project = var.project_id + name = var.vulnerabilities_export_bucket + location = "US" + uniform_bucket_level_access = true + + versioning { + enabled = true + } + + lifecycle_rule { + action { + type = "Delete" + } + condition { + num_newer_versions = 673 + with_state = "ARCHIVED" + } + } + + lifecycle_rule { + action { + type = "Delete" + } + condition { + days_since_noncurrent_time = 7 + with_state = "ANY" + } + } +} + +# GCP bucket where affected commits are backed up. +resource "google_storage_bucket" "affected_commits_backups_bucket" { + project = var.project_id + name = var.affected_commits_backups_bucket + location = "US" + uniform_bucket_level_access = true + lifecycle_rule { + action { + type = "Delete" + } + condition { + age = var.affected_commits_backups_bucket_retention_days + } + } +} \ No newline at end of file diff --git a/deployment/terraform/modules/osv_pipeline/gke.tf b/deployment/terraform/modules/osv_pipeline/gke.tf new file mode 100644 index 00000000000..641a3944d55 --- /dev/null +++ b/deployment/terraform/modules/osv_pipeline/gke.tf @@ -0,0 +1,215 @@ +# GKE "workers" cluster and node pools + +resource "google_container_cluster" "workers" { + project = var.project_id + name = var.cluster_name + location = var.cluster_location + subnetwork = google_compute_subnetwork.my_subnet_0.self_link + + private_cluster_config { + enable_private_endpoint = false + enable_private_nodes = true + master_ipv4_cidr_block = var.cluster_master_cidr + } + + # We need to define this for private clusters, but all fields are optional. + ip_allocation_policy {} + + addons_config { + gce_persistent_disk_csi_driver_config { + enabled = true + } + gcp_filestore_csi_driver_config { + enabled = true + } + } + + # We can't create a cluster with no node pool defined, but we want to only use + # separately managed node pools. So we create the smallest possible default + # node pool and immediately delete it. + remove_default_node_pool = true + initial_node_count = 1 + lifecycle { + ignore_changes = [ + # importing from oss-vdb has initial_node_count set to 0, which is actually not a valid configuration for creating a cluster. + # Updating this value in terraform forces a replacement, even though the default pool is destroyed. Ignore it to prevent disruption. + initial_node_count, + ] + } + + monitoring_config { + managed_prometheus { + enabled = true + } + } +} + +resource "google_container_node_pool" "default_pool" { + project = var.project_id + name = "default-pool" + cluster = google_container_cluster.workers.name + location = google_container_cluster.workers.location + + lifecycle { + # Terraform doesn't automatically know to recreate node pools when the cluster is recreated. + replace_triggered_by = [ + google_container_cluster.workers.id, + ] + } + + autoscaling { + min_node_count = 1 + max_node_count = 1000 + location_policy = "BALANCED" + } + + + node_config { + service_account = google_service_account.worker_sa.email + machine_type = "n1-highmem-2" + disk_type = "pd-ssd" + disk_size_gb = 64 + local_ssd_count = 1 + + oauth_scopes = ["https://www.googleapis.com/auth/cloud-platform"] + + } +} + +resource "google_container_node_pool" "highend" { + project = var.project_id + name = "highend" + cluster = google_container_cluster.workers.name + location = google_container_cluster.workers.location + # For using the ephemeral storage local ssd config + provider = google-beta + + lifecycle { + # Terraform doesn't automatically know to recreate node pools when the cluster is recreated. + replace_triggered_by = [ + google_container_cluster.workers.id, + ] + } + + autoscaling { + min_node_count = 0 + max_node_count = 100 + location_policy = "BALANCED" + } + + + node_config { + service_account = google_service_account.worker_sa.email + machine_type = "n2-highmem-32" + disk_type = "pd-ssd" + disk_size_gb = 100 + ephemeral_storage_config { // This is used for emptyDir storage in kubernetes + // Minimum is 4 ssds for n2-highmem-32, for 375GB * 4 = 1.5TB of storage + local_ssd_count = 4 + } + + oauth_scopes = ["https://www.googleapis.com/auth/cloud-platform"] + + labels = { + workloadType = "highend" + } + + taint { + effect = "NO_EXECUTE" + key = "workloadType" + value = "highend" + } + + } +} + +resource "google_container_node_pool" "importer_pool" { + project = var.project_id + name = "importer-pool" + cluster = google_container_cluster.workers.name + location = google_container_cluster.workers.location + node_count = 1 + + lifecycle { + # Terraform doesn't automatically know to recreate node pools when the cluster is recreated. + replace_triggered_by = [ + google_container_cluster.workers.id, + ] + } + + node_config { + service_account = google_service_account.worker_sa.email + machine_type = "n2-highmem-4" + disk_type = "pd-ssd" + disk_size_gb = 64 + local_ssd_count = 1 + + oauth_scopes = ["https://www.googleapis.com/auth/cloud-platform"] + + labels = { + workloadType = "importer-pool" + } + + taint { + effect = "NO_EXECUTE" + key = "workloadType" + value = "importer-pool" + } + } +} + +# Will deal with this properly when we unify test and prod. +# resource "google_container_node_pool" "worker_pool_temp" { +# count = var.project_id == "oss-vdb-test" ? 1 : 0 +# project = var.project_id +# name = "worker-pool-temp" +# cluster = google_container_cluster.workers.name +# location = google_container_cluster.workers.location +# +# lifecycle { +# replace_triggered_by = [ +# google_container_cluster.workers.id, +# ] +# } +# +# autoscaling { +# min_node_count = 0 +# max_node_count = 250 +# location_policy = "BALANCED" +# } +# +# node_config { +# service_account = google_service_account.worker_sa.email +# machine_type = "n4-highcpu-2" +# disk_type = "hyperdisk-balanced" +# +# oauth_scopes = ["https://www.googleapis.com/auth/cloud-platform"] +# +# labels = { +# workloadType = "worker-pool" +# } +# taint { +# effect = "NO_EXECUTE" +# key = "workloadType" +# value = "worker-pool" +# } +# } +# } + +# 6TiB SSD disk used by the gitter caching service +resource "google_compute_disk" "gitter_disk" { + project = var.project_id + name = var.gitter_disk_name + type = "pd-ssd" + zone = google_container_cluster.workers.location + size = var.gitter_disk_size_gb +} + +# SSD for Importer Reconciler Git Cache +resource "google_compute_disk" "importer_reconciler_git_cache" { + project = var.project_id + name = var.importer_reconciler_git_cache_disk_name + type = "pd-ssd" + zone = google_container_cluster.workers.location + size = var.importer_reconciler_git_cache_size_gb +} \ No newline at end of file diff --git a/deployment/terraform/modules/osv_pipeline/iam.tf b/deployment/terraform/modules/osv_pipeline/iam.tf new file mode 100644 index 00000000000..8ae8399b24c --- /dev/null +++ b/deployment/terraform/modules/osv_pipeline/iam.tf @@ -0,0 +1,67 @@ +# GKE Worker Service Account and secure least-privilege IAM permissions + +# Dedicated GKE Worker Service Account +resource "google_service_account" "worker_sa" { + project = var.project_id + account_id = var.worker_service_account_id + display_name = "OSV GKE Worker Service Account" +} + +# Datastore roles with database-specific IAM conditions for multi-tenant isolation +resource "google_project_iam_member" "worker_datastore_roles" { + for_each = toset([ + "roles/datastore.user", + "roles/datastore.importExportAdmin" + ]) + + project = var.project_id + role = each.value + member = "serviceAccount:${google_service_account.worker_sa.email}" + + condition { + title = "Database Isolation" + description = "Restricts this service account to only access the created Datastore database." + expression = "resource.name == '${google_firestore_database.datastore.id}'" + } +} + +# Cloud Monitoring roles at the project level +resource "google_project_iam_member" "worker_monitoring_roles" { + for_each = toset([ + "roles/monitoring.metricWriter", + "roles/monitoring.viewer" + ]) + + project = var.project_id + role = each.value + member = "serviceAccount:${google_service_account.worker_sa.email}" +} + +# Bucket-level GCS access to secure vulnerability exports and backups +resource "google_storage_bucket_iam_member" "worker_export_bucket" { + bucket = google_storage_bucket.osv_vulnerabilities_export.name + role = "roles/storage.objectAdmin" + member = "serviceAccount:${google_service_account.worker_sa.email}" +} + +resource "google_storage_bucket_iam_member" "worker_backup_bucket" { + bucket = google_storage_bucket.affected_commits_backups_bucket.name + role = "roles/storage.objectAdmin" + member = "serviceAccount:${google_service_account.worker_sa.email}" +} + +# Subscription-level Pub/Sub access to prevent queue cross-talk/task-stealing +resource "google_pubsub_subscription_iam_member" "worker_subscriber" { + project = var.project_id + subscription = google_pubsub_subscription.default_work.name + role = "roles/pubsub.subscriber" + member = "serviceAccount:${google_service_account.worker_sa.email}" +} + +resource "google_pubsub_subscription_iam_member" "worker_extra_subscribers" { + for_each = toset(var.extra_work_pools) + project = var.project_id + subscription = google_pubsub_subscription.work_pools[each.value].name + role = "roles/pubsub.subscriber" + member = "serviceAccount:${google_service_account.worker_sa.email}" +} diff --git a/deployment/terraform/modules/osv_pipeline/network.tf b/deployment/terraform/modules/osv_pipeline/network.tf new file mode 100644 index 00000000000..d56c6d9143b --- /dev/null +++ b/deployment/terraform/modules/osv_pipeline/network.tf @@ -0,0 +1,44 @@ +# Network configuration used by GKE worker nodes + +# Private Subnetwork inside the "default" VPC network +# GKE nodes will be provisioned here and assigned private IPs. +resource "google_compute_subnetwork" "my_subnet_0" { + project = var.project_id + name = var.subnet_name + network = "default" + ip_cidr_range = var.subnet_cidr + private_ip_google_access = true + region = "us-central1" + + lifecycle { + ignore_changes = [ + description, + ] + } +} + +# Cloud Router +# Required to route traffic for GKE nodes running on private IPs. +resource "google_compute_router" "router" { + project = var.project_id + name = var.router_name + network = "default" + region = "us-central1" +} + +# Cloud NAT +# Allows private GKE nodes to securely access the public internet. +resource "google_compute_router_nat" "nat_config" { + project = var.project_id + name = var.nat_name + router = google_compute_router.router.name + source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES" + nat_ip_allocate_option = "AUTO_ONLY" + region = google_compute_router.router.region + enable_endpoint_independent_mapping = false + + log_config { + enable = false + filter = "ALL" + } +} diff --git a/deployment/terraform/modules/osv_pipeline/pubsub_tasks.tf b/deployment/terraform/modules/osv_pipeline/pubsub_tasks.tf new file mode 100644 index 00000000000..5afdd4c7036 --- /dev/null +++ b/deployment/terraform/modules/osv_pipeline/pubsub_tasks.tf @@ -0,0 +1,111 @@ +# Pub/Sub worker tasks topics + +resource "google_pubsub_topic" "tasks" { + project = var.project_id + name = var.pubsub_topic_name + + labels = { + goog-dm = "pubsub" + } +} + +resource "google_pubsub_topic" "failed_tasks" { + project = var.project_id + name = var.pubsub_topic_failed_tasks_name +} + +resource "google_pubsub_subscription" "default_work" { + project = var.project_id + name = var.pubsub_subscription_default_work_pool_name + topic = google_pubsub_topic.tasks.id + message_retention_duration = "604800s" + ack_deadline_seconds = 600 + + dead_letter_policy { + dead_letter_topic = google_pubsub_topic.failed_tasks.id + max_delivery_attempts = 5 + } + + expiration_policy { + ttl = "" # never expires + } + + labels = { + goog-dm = "pubsub" + } + + filter = "attributes.work_pool = \"default\"" +} + +resource "google_pubsub_subscription" "work_pools" { + for_each = toset(var.extra_work_pools) + project = var.project_id + name = each.value + topic = google_pubsub_topic.tasks.id + message_retention_duration = "604800s" + ack_deadline_seconds = 600 + + dead_letter_policy { + dead_letter_topic = google_pubsub_topic.failed_tasks.id + max_delivery_attempts = 5 + } + + expiration_policy { + ttl = "" # never expires + } + + labels = { + goog-dm = "pubsub" + } + + filter = "attributes.work_pool = \"${each.value}\"" +} + +# Dead Letter Queue (DLQ) permissions +# Pub/Sub requires its system service account to have publisher rights on the DLQ +# topic and subscriber rights on the subscriptions to forward failing tasks. + +# Pub/Sub system service identity for this project +resource "google_project_service_identity" "pubsub" { + provider = google-beta + project = var.project_id + service = "pubsub.googleapis.com" +} + +# Allow Pub/Sub to pull/acknowledge messages from the default subscription +resource "google_pubsub_subscription_iam_member" "default_work_service_subscriber" { + project = var.project_id + subscription = google_pubsub_subscription.default_work.name + role = "roles/pubsub.subscriber" + member = "serviceAccount:${google_project_service_identity.pubsub.email}" +} + +# Allow Pub/Sub to pull/acknowledge messages from the dynamic work pool subscriptions +resource "google_pubsub_subscription_iam_member" "work_pools_service_subscriber" { + for_each = toset(var.extra_work_pools) + project = var.project_id + subscription = google_pubsub_subscription.work_pools[each.value].name + role = "roles/pubsub.subscriber" + member = "serviceAccount:${google_project_service_identity.pubsub.email}" +} + +# Allow Pub/Sub to publish failed tasks to the DLQ failed-tasks topic +resource "google_pubsub_topic_iam_member" "failed_tasks_service_publisher" { + project = var.project_id + topic = google_pubsub_topic.failed_tasks.name + role = "roles/pubsub.publisher" + member = "serviceAccount:${google_project_service_identity.pubsub.email}" +} + + +resource "google_pubsub_subscription" "recovery" { + project = var.project_id + name = var.pubsub_subscription_recovery_name + topic = google_pubsub_topic.failed_tasks.id + message_retention_duration = "604800s" # 7 days + ack_deadline_seconds = 600 + + expiration_policy { + ttl = "" # never expires + } +} \ No newline at end of file diff --git a/deployment/terraform/modules/osv_pipeline/variables.tf b/deployment/terraform/modules/osv_pipeline/variables.tf new file mode 100644 index 00000000000..395048dd82d --- /dev/null +++ b/deployment/terraform/modules/osv_pipeline/variables.tf @@ -0,0 +1,135 @@ +# Global & Database +variable "project_id" { + type = string + description = "The GCP Project ID where resources will be provisioned." +} + +variable "datastore_name" { + type = string + description = "The name of the Datastore database instance. Default is '(default)'." + default = "(default)" +} + +# Identity & Security +variable "worker_service_account_id" { + type = string + description = "The ID to use for the GKE worker service account (max 30 characters)." + default = "osv-worker" +} + +# Storage +variable "vulnerabilities_export_bucket" { + type = string + description = "The name of the GCS bucket where vulnerability JSON and proto exports are stored." +} + +variable "affected_commits_backups_bucket" { + type = string + description = "The name of the GCS bucket where AffectedCommits database backups are stored." +} + +variable "affected_commits_backups_bucket_retention_days" { + type = number + description = "The number of days to retain GCS backups of AffectedCommits." + default = 30 +} + +# Messaging +variable "pubsub_topic_name" { + type = string + description = "The name of the primary worker Pub/Sub task topic." + default = "tasks" +} + +variable "pubsub_topic_failed_tasks_name" { + type = string + description = "The name of the Pub/Sub topic for failed tasks (DLQ)." + default = "failed-tasks" +} + +variable "pubsub_subscription_default_work_pool_name" { + type = string + description = "The name of the default work pool Pub/Sub subscription." + default = "default-pool" +} + +variable "pubsub_subscription_recovery_name" { + type = string + description = "The name of the Pub/Sub subscription for task recovery." + default = "recovery" +} + +variable "extra_work_pools" { + type = list(string) + description = "Additional dynamic Pub/Sub worker pool subscriptions to create (e.g., reimport, cves)." + default = [] +} + +# Compute +variable "cluster_name" { + type = string + description = "The name of the GKE cluster." + default = "workers" +} + +variable "cluster_location" { + type = string + description = "The GCP zone where the GKE cluster will be provisioned." + default = "us-central1-f" +} + +variable "cluster_master_cidr" { + type = string + description = "The private /28 IP range to allocate for the GKE master control plane peering." + default = "172.16.0.32/28" +} + +variable "gitter_disk_name" { + type = string + description = "The name of the persistent SSD disk for the gitter caching daemon." + default = "gitter-disk" +} + +variable "gitter_disk_size_gb" { + type = number + description = "The size in GiB of the persistent SSD disk used by the gitter caching daemon." + default = 6144 # 6TiB +} + +variable "importer_reconciler_git_cache_disk_name" { + type = string + description = "The name of the persistent SSD disk for the importer reconciler git cache." + default = "importer-reconciler-git-cache" +} + +variable "importer_reconciler_git_cache_size_gb" { + type = number + description = "The size in GiB of the persistent SSD disk used by the importer reconciler git cache." + default = 200 +} + +# Networking +variable "subnet_name" { + type = string + description = "The name of the private subnet to create for GKE nodes." + default = "my-subnet-0" +} + +variable "subnet_cidr" { + type = string + description = "The IP range (CIDR) of the GKE private subnet. Must not overlap in the VPC." + default = "10.45.32.0/22" +} + +variable "router_name" { + type = string + description = "The name of the Cloud Router to create for GKE outbound traffic." + default = "router" +} + +variable "nat_name" { + type = string + description = "The name of the Cloud NAT configuration to create for GKE outbound traffic." + default = "nat-config" +} +