From be5ef08eb55c1ca242c7f1fa083cb2286485c28e Mon Sep 17 00:00:00 2001 From: Luke Curley Date: Tue, 9 Dec 2025 11:49:33 +1100 Subject: [PATCH 1/2] Switch to cdn.moq.dev --- .env.production | 2 +- README.md | 2 +- src/pages/blog/first-cdn.mdx | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.env.production b/.env.production index f4a5b45..118291d 100644 --- a/.env.production +++ b/.env.production @@ -1,4 +1,4 @@ -PUBLIC_RELAY_URL="https://relay.moq.dev" +PUBLIC_RELAY_URL="https://cdn.moq.dev" # Generate with: cargo run --bin moq-token -- --key root.jwk sign --root "demo" --subscribe "" PUBLIC_RELAY_TOKEN="eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJyb290IjoiZGVtbyIsImdldCI6WyIiXSwiZXhwIjpudWxsLCJpYXQiOm51bGx9.6EoN-Y1Ouj35_qV5FokcdcdderrE2navNbYQjJyR2Ac" PUBLIC_CLOUDFLARE_URL="https://relay.cloudflare.mediaoverquic.com" diff --git a/README.md b/README.md index b18bdfa..d67cb5e 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ This repository contains the code for [moq.dev](https://moq.dev). This is a client only. -You'll either need to run a local server using [moq-rs](https://github.com/kixelated/moq-rs) or use a public server such as [relay.moq.dev](https://moq.dev/relay). +You'll either need to run a local server using [moq](https://github.com/kixelated/moq) or use a public server such as `cdn.moq.dev`. Join the [Discord](https://discord.gg/FCYF3p99mr) for updates and discussion. diff --git a/src/pages/blog/first-cdn.mdx b/src/pages/blog/first-cdn.mdx index e18c1f6..28e9bd0 100644 --- a/src/pages/blog/first-cdn.mdx +++ b/src/pages/blog/first-cdn.mdx @@ -90,8 +90,8 @@ If any of these are deal breakers, then you could always run your own [moq-relay I've been adding new features and fixing a bunch of stuff *after* Cloudflare smashed that fork button. For example, authentication (via JWT) and a WebSocket fallback for Safari/TCP support. -There's even a [terraform module](https://github.com/kixelated/moq.dev/blob/main/infra/relay.tf) that powers `relay.moq.dev`. -You too can run your own "global" CDN with 3 nodes and pay GCP a boatload of money for the privilege. +There's even a [terraform module](https://github.com/kixelated/moq/blob/main/cdn/relay.tf) that powers `cdn.moq.dev`. +You too can run your own "global" CDN with 3 nodes and pay ~GCP~ Linode a boatload of money for the privilege. It's not *quite* as good as Cloudflare's network, currently available for free...
From 90bbdf11f70e27f755e544cdee8ad45e437819f3 Mon Sep 17 00:00:00 2001 From: Luke Curley Date: Tue, 9 Dec 2025 12:01:04 +1100 Subject: [PATCH 2/2] Remove the infra stuff. --- .dockerignore | 1 - infra/.gitignore | 38 ---------- infra/.terraform.lock.hcl | 75 -------------------- infra/deploy.tf | 29 -------- infra/domain.tf | 37 ---------- infra/iam.tf | 7 -- infra/input.tf | 67 ----------------- infra/internal.tf | 25 ------- infra/main.tf | 49 ------------- infra/output.tf | 4 -- infra/pub.tf | 53 -------------- infra/pub.yml.tpl | 92 ------------------------ infra/relay-lb.tf | 82 --------------------- infra/relay.tf | 145 ------------------------------------- infra/relay.yml.tpl | 146 -------------------------------------- 15 files changed, 850 deletions(-) delete mode 100644 infra/.gitignore delete mode 100644 infra/.terraform.lock.hcl delete mode 100644 infra/deploy.tf delete mode 100644 infra/domain.tf delete mode 100644 infra/iam.tf delete mode 100644 infra/input.tf delete mode 100644 infra/internal.tf delete mode 100644 infra/main.tf delete mode 100644 infra/output.tf delete mode 100644 infra/pub.tf delete mode 100644 infra/pub.yml.tpl delete mode 100644 infra/relay-lb.tf delete mode 100644 infra/relay.tf delete mode 100644 infra/relay.yml.tpl diff --git a/.dockerignore b/.dockerignore index 0871710..85dcc16 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,3 +1,2 @@ .git node_modules -infra diff --git a/infra/.gitignore b/infra/.gitignore deleted file mode 100644 index f49e471..0000000 --- a/infra/.gitignore +++ /dev/null @@ -1,38 +0,0 @@ -# Local .terraform directories -**/.terraform/* - -# .tfstate files -*.tfstate -*.tfstate.* - -# Crash log files -crash.log -crash.*.log - -# Exclude all .tfvars files, which are likely to contain sensitive data, such as -# password, private keys, and other secrets. These should not be part of version -# control as they are data points which are potentially sensitive and subject -# to change depending on the environment. -*.tfvars -*.tfvars.json - -# Ignore override files as they are usually used to override resources locally and so -# are not checked in -override.tf -override.tf.json -*_override.tf -*_override.tf.json - -# Include override files you do wish to add to version control using negated pattern -# !example_override.tf - -# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan -# example: *tfplan* - -# Ignore CLI configuration files -.terraformrc -terraform.rc - -# OSX pls -.DS_Store -node_modules diff --git a/infra/.terraform.lock.hcl b/infra/.terraform.lock.hcl deleted file mode 100644 index e6ad5ed..0000000 --- a/infra/.terraform.lock.hcl +++ /dev/null @@ -1,75 +0,0 @@ -# This file is maintained automatically by "tofu init". -# Manual edits may be lost in future updates. - -provider "registry.opentofu.org/hashicorp/google" { - version = "5.45.2" - constraints = "~> 5.0" - hashes = [ - "h1:YEQOp7Ou1+GtpcKyCX6Cr/mAGqKIogpi85MX51GuG4s=", - "zh:0931f08e81f220ae3132169cfa4ed8e9d8d2045f29ca914afd8ee9e3e9cf56e0", - "zh:31afa45a4c8a0fd4abff564ecff8b69a97ac1813ead61c12f5f0bf5d33cec7f1", - "zh:536979e437aad59ba41465c9398d8e3d7d3702bfe2a51d80571862d48c817959", - "zh:748e14614be32350ece4e9249e09bc1d20e54421983734ded3a0df6d6674ea71", - "zh:7c8fe641666603aad6693207c8eaac679b9be15246d77090c73a1a84326d6084", - "zh:8095a513a0662323d99c25466b5a291c80b2b0c1857c7c7a7b1159f25dbe4439", - "zh:9453db86d14611cab26dba30daf56d1cfef929918207e9e3e78b58299fc8c4fe", - "zh:adaa5df5d40060409b6b66136c0ac37b99fb35ac2cf554c584649c236a18d95b", - "zh:af2f659b4bd1f44e578f203830bdab829b5e635fcf2a59ffa7e997c16e6611ad", - "zh:b75184fe5c162821b0524fa941d6a934c452e815d82e62675bb21bbdc9046dfc", - ] -} - -provider "registry.opentofu.org/hashicorp/google-beta" { - version = "5.45.2" - hashes = [ - "h1:D157MMCsE8DIpK60goSgECYMsWfJb+E2ZH7bDSLSVG4=", - "zh:2df6e40591ceee7ee77d429ea072c9d51fef2dd04015b2604ff332a2af4ac819", - "zh:4096af21991ba76ab81c8cb00c0eb0bd4f22619f7e491d60023fb10b8b33bfb1", - "zh:44ded286956fff5668f1acbf152b62ca8e6a03abc8df12c5c181bc2ca05b4df7", - "zh:7ae19e1b53a0e26bea0acb9a96b4b44038d7c182c3fdd496148fd20e40aa78e1", - "zh:81c9812823b78fd1b12bc0acd6dae35bc573944950e09eaf237b2e83b6b587d7", - "zh:9db6101421b53b9533807928c651e779f5b8129f4a57ff892bf256c84ba6ed29", - "zh:b779729cb08829f621a718ecdfdb503c310ef5411e694996c7cfda7227221134", - "zh:c43edb31aee354317a6181272a961965b93722fd18637f38c395af013aa65617", - "zh:dbb93970a85f2fe84f650b6a4da694ecb1023a99c3b9bbf6953dccd074fa49ce", - "zh:df9d13853269e98651d495571b4d58c883b4386247d0b9c5495c2e82ef721f45", - ] -} - -provider "registry.opentofu.org/hashicorp/tls" { - version = "4.0.6" - hashes = [ - "h1:0oXaBUFJ5bA0ED7OCajAOa9YSGTTpe8FyUFJP+zO6A4=", - "zh:4b53b372767e5068d9bbfc89199201c1ae4283dde2f0c301974f8abb4215791f", - "zh:5b4c308bd074c6d0bd560220e6ee10a9859ca9a1f29a59367b0477a740ff265e", - "zh:674dd6bc85597677e160ee601d88b21c5a974759a658769812d2904bd94bc042", - "zh:6ccc1c448349b56677ba66112aec7e0a58eb827f66209ca5f4077b81cce240fb", - "zh:8aa6e13a5d722b74230937ea21e8b4994e53340d95b5691cf6cf3518b9f38e6e", - "zh:8b27e55e4c7fa887774860113b95c8f7f68804b002fa47f0eb8e3a485997287e", - "zh:a430b5a3e8753d8f61784de49e538ac4abed19fb665fccd8a10b55402fe9f076", - "zh:b07c978c335ae9fc12f9c221629610775e4ae36691ed4e7ba258d275dd58a243", - "zh:bbec8cb1efc84ee3026c793956a4a4cd0ece20b89d2d4f7d954c68e7f6d596d0", - "zh:e684e247424188dc3b500a543b1a8046d1c0ec08c2a90aedca0c4f6bb56bedbd", - ] -} - -provider "registry.opentofu.org/vancluever/acme" { - version = "2.34.0" - constraints = "~> 2.0" - hashes = [ - "h1:vdAZOh8B/0betvZVI6K8YosSFYXtWW6ieToULzAaPu4=", - "zh:05aa6ef3396f289bd5a02af8d4fdbff8c3d8753197a3da5539267c6c34398c45", - "zh:1bd9e282192633589e157c570ece83fe59ccb8d33f5c5f05099bf1ac32e9f53b", - "zh:245056f38759621164fed4adbbbab378ddff15563b0d2eb5a7e388cc7dda12c6", - "zh:29a3f390fbd803a71b6b6018b36e507065268ce8164f14e63e978d6a467607c1", - "zh:3b097f93081efc1a92d53715e4f1304777dfbff31b30fff1ad1a33a9ec487a27", - "zh:3e48fd49611ac97d87839534ee6763467ef3afdf959c124df015da3043f9e644", - "zh:4919d5b193309ec9460fc8bb5f4ceff358616957b38a269aa03c63bbe63a2d1d", - "zh:4bdcba536c5b626f96b2c91ae85fc2c0cbac9bc1c3c1d91d64ccfc6136c58565", - "zh:5ef97da03f9229b38fd63ebf03a158d63f040c590b8f82dd90cdec99952a45e8", - "zh:9e8b14add8ce6329fd103456a0890d34ce9cf7445bc7f8d31c87412f3f03983a", - "zh:b262b1b6b9e839b364f7ac3fd47fe18bbf7a077043c902a03215534c883050b6", - "zh:e9a4636292a29d2cb398fae5c362862e387c622af7b8020fa2b9506e37cbecba", - "zh:f5e5ee870fb58bedc08cd3e06b11fa26673f48c07cd669d62028590f75656ee4", - ] -} diff --git a/infra/deploy.tf b/infra/deploy.tf deleted file mode 100644 index 0787498..0000000 --- a/infra/deploy.tf +++ /dev/null @@ -1,29 +0,0 @@ -// Get the default service account used by compute instances -data "google_service_account" "compute_default" { - account_id = "${data.google_project.current.number}-compute@developer.gserviceaccount.com" -} - -// Create a service account that has deploy permission. -resource "google_service_account" "deploy" { - account_id = "deploy" -} - -// Allow it to login with a token -resource "google_project_iam_member" "deploy_token" { - project = var.project - role = "roles/iam.serviceAccountTokenCreator" - member = "serviceAccount:${google_service_account.deploy.email}" -} - -// Allow it deploy to Cloud Run -resource "google_project_iam_member" "deploy_run" { - project = var.project - role = "roles/run.admin" - member = "serviceAccount:${google_service_account.deploy.email}" -} - -resource "google_service_account_iam_binding" "deploy_run" { - service_account_id = data.google_service_account.compute_default.id - role = "roles/iam.serviceAccountUser" - members = ["serviceAccount:${google_service_account.deploy.email}"] -} diff --git a/infra/domain.tf b/infra/domain.tf deleted file mode 100644 index 6e44067..0000000 --- a/infra/domain.tf +++ /dev/null @@ -1,37 +0,0 @@ -// Set up a DNS zone for our domain. -resource "google_dns_managed_zone" "relay" { - name = "relay" - dns_name = "relay.${var.domain}." -} - -// We also need an unmanaged certificate for the relay, since there's no QUIC LBs available yet. -provider "acme" { - server_url = "https://acme-v02.api.letsencrypt.org/directory" -} - -resource "tls_private_key" "relay" { - algorithm = "ECDSA" - ecdsa_curve = "P256" -} - -resource "acme_registration" "relay" { - account_key_pem = tls_private_key.relay.private_key_pem - email_address = var.email -} - -resource "acme_certificate" "relay" { - account_key_pem = acme_registration.relay.account_key_pem - common_name = "relay.${var.domain}" - subject_alternative_names = ["*.relay.${var.domain}"] - key_type = tls_private_key.relay.ecdsa_curve - - revoke_certificate_on_destroy = false - - dns_challenge { - provider = "gcloud" - config = { - GCE_PROJECT = var.project - GCE_ZONE_ID = google_dns_managed_zone.relay.name - } - } -} diff --git a/infra/iam.tf b/infra/iam.tf deleted file mode 100644 index bcda95c..0000000 --- a/infra/iam.tf +++ /dev/null @@ -1,7 +0,0 @@ -# Disable authentication -data "google_iam_policy" "noauth" { - binding { - role = "roles/run.invoker" - members = ["allUsers"] - } -} diff --git a/infra/input.tf b/infra/input.tf deleted file mode 100644 index 0d7343a..0000000 --- a/infra/input.tf +++ /dev/null @@ -1,67 +0,0 @@ -variable "project" { - description = "project id" - default = "quic-video" -} - -variable "region" { - description = "region" - default = "us-central1" -} - -variable "zone" { - description = "zone" - default = "us-central1-a" -} - -variable "email" { - description = "Your email address, used for LetsEncrypt" - default = "admin@moq.dev" -} - -variable "domain" { - description = "domain name" - default = "moq.dev" -} - -variable "docker_relay" { - description = "Docker image for moq-relay" - default = "docker.io/kixelated/moq-relay:0.9.4" -} - -variable "docker_hang" { - description = "Docker image for hang" - default = "docker.io/kixelated/hang:0.2.11" -} - -# Too complicated to specify via flags, so do it here. -locals { - relays = { - us-central = { # Iowa - region = "us-central1" - zone = "us-central1-a", - machine = "t2d-standard-1", - image = "cos-cloud/cos-stable", - }, - europe-west = { # Netherlands - region = "europe-west4", - zone = "europe-west4-b", - machine = "t2d-standard-1", - image = "cos-cloud/cos-stable", - }, - asia-southeast = { # Singapore - region = "asia-southeast1", - zone = "asia-southeast1-c", - machine = "t2d-standard-1", - image = "cos-cloud/cos-stable", - }, - } - pub = { - region = "us-central1" - zone = "us-central1-a", - machine = "t2d-standard-1", - image = "cos-cloud/cos-stable", - } - # To save an instance, we use a relay as a root - # In the future we should have a dedicated instance/cluster for this. - root = "relay-us-central.us-central1-a" -} diff --git a/infra/internal.tf b/infra/internal.tf deleted file mode 100644 index 4363ab1..0000000 --- a/infra/internal.tf +++ /dev/null @@ -1,25 +0,0 @@ -// Create a root CA to use for all internal traffic. -// This doesn't need to be secure, but it's required for QUIC. - -resource "tls_private_key" "internal" { - algorithm = "ECDSA" - ecdsa_curve = "P256" -} - -resource "tls_self_signed_cert" "internal" { - private_key_pem = tls_private_key.internal.private_key_pem - - subject { - common_name = "${var.project}.internal" - } - - validity_period_hours = 8760 # 1 year - - is_ca_certificate = true - - allowed_uses = [ - "digital_signature", - "cert_signing", - "crl_signing", - ] -} diff --git a/infra/main.tf b/infra/main.tf deleted file mode 100644 index 99bd500..0000000 --- a/infra/main.tf +++ /dev/null @@ -1,49 +0,0 @@ -terraform { - required_providers { - google = { - source = "hashicorp/google" - version = "~> 5.0" - } - - acme = { - source = "vancluever/acme" - version = "~> 2.0" - } - } - - backend "gcs" { - bucket = "quic-video-tfstate" - prefix = "terraform/state" - } - - required_version = ">= 1.5" -} - -provider "google" { - project = var.project - region = var.region - zone = var.zone -} - -variable "gcp_service_list" { - description = "The list of apis necessary for the project" - type = list(string) - default = [ - "domains.googleapis.com", - "storage.googleapis.com", - "compute.googleapis.com", - "dns.googleapis.com", - "iamcredentials.googleapis.com", - ] -} - -resource "google_project_service" "all" { - for_each = toset(var.gcp_service_list) - service = each.key - disable_dependent_services = false - disable_on_destroy = false -} - -data "google_project" "current" { - project_id = var.project -} diff --git a/infra/output.tf b/infra/output.tf deleted file mode 100644 index b7c7be2..0000000 --- a/infra/output.tf +++ /dev/null @@ -1,4 +0,0 @@ - output "relay_nameservers" { - value = google_dns_managed_zone.relay.name_servers - description = "Add these as NS records in Cloudflare for relay.moq.dev" - } diff --git a/infra/pub.tf b/infra/pub.tf deleted file mode 100644 index e439d79..0000000 --- a/infra/pub.tf +++ /dev/null @@ -1,53 +0,0 @@ -resource "google_compute_instance" "pub" { - name = "pub-${local.pub.region}" - - machine_type = local.pub.machine - zone = local.pub.zone - - boot_disk { - initialize_params { - image = local.pub.image - - size = 64 # GB - type = "pd-standard" - } - } - - network_interface { - network = "default" - - # Give the instance a public IP address so it can download from the internet. - # If we add more instances, it's probably cheaper to set up a NAT instead. - access_config { - nat_ip = google_compute_address.pub.address - network_tier = "STANDARD" - } - } - - metadata = { - # cloud-init template - user-data = templatefile("${path.module}/pub.yml.tpl", { - addr = "relay.${var.domain}" - docker_image = var.docker_hang - region = local.pub.region - - # A token used to publish demo/bbb.hang - # cargo run --bin moq-token -- --key root.jwk sign --root "demo" --publish "" > demo-pub.jwt - demo_token = trimspace(file("demo-pub.jwt")) - }) - } - - service_account { - scopes = ["cloud-platform"] - } - - allow_stopping_for_update = true -} - -# Create an IP address just so we can access the internet without a NAT. -resource "google_compute_address" "pub" { - name = "pub-${local.pub.region}" - region = local.pub.region - address_type = "EXTERNAL" - network_tier = "STANDARD" -} diff --git a/infra/pub.yml.tpl b/infra/pub.yml.tpl deleted file mode 100644 index 1869aab..0000000 --- a/infra/pub.yml.tpl +++ /dev/null @@ -1,92 +0,0 @@ -#cloud-config - -write_files: - - path: /etc/systemd/system/hang-bbb-prepare.service - permissions: 0644 - owner: root - content: | - [Unit] - Description=Prepare video for hang-bbb - Requires=docker.service - After=docker.service - Before=hang-bbb.service - - [Service] - Type=oneshot - RemainAfterExit=yes - WorkingDirectory=/tmp - ExecStart=/bin/bash -c '\ - # Download the video \ - docker run --rm -v /tmp:/tmp alpine:latest \ - wget -nv "$${URL:-http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4}" \ - -O /tmp/tmp.mp4 && \ - # Fragment the video \ - docker run --rm -v /tmp:/tmp linuxserver/ffmpeg:latest \ - -y -loglevel error -i /tmp/tmp.mp4 \ - -c copy \ - -f mp4 -movflags cmaf+separate_moof+delay_moov+skip_trailer+frag_every_frame \ - /tmp/fragmented.mp4 && \ - rm -f /tmp/tmp.mp4' - - - path: /etc/systemd/system/hang-bbb.service - permissions: 0644 - owner: root - content: | - [Unit] - Description=Run hang-bbb via docker - Requires=docker.service hang-bbb-prepare.service - After=docker.service hang-bbb-prepare.service - - [Service] - ExecStart=/bin/bash -c '\ - docker run --rm -v /tmp:/tmp:ro linuxserver/ffmpeg:latest \ - -stream_loop -1 \ - -hide_banner \ - -v quiet \ - -re \ - -i /tmp/fragmented.mp4 \ - -c copy \ - -f mp4 \ - -movflags cmaf+separate_moof+delay_moov+skip_trailer+frag_every_frame \ - - | \ - docker run --rm -i \ - --name hang-bbb \ - --network="host" \ - --pull=always \ - --cap-add=SYS_PTRACE \ - -e RUST_LOG=debug -e RUST_BACKTRACE=1 \ - ${docker_image} \ - publish --url https://relay.moq.dev/demo?jwt=${demo_token} --name bbb' - - ExecStop=docker stop hang-bbb - - # Take longer and longer to restart the process. - Restart=always - RestartSec=10s - - - path: /etc/docker/daemon.json - content: | - { "mtu": 1460 } - - - path: /etc/systemd/journald.conf - content: | - [Journal] - SystemMaxUse=500M - SystemKeepFree=1G - MaxFileSec=1hour - MaxRetentionSec=1day - - # Delete docker images and containers that are no longer in use - - path: /etc/cron.weekly/docker-cleanup - permissions: "0755" - owner: root - content: | - #!/bin/sh - docker system prune -af - -runcmd: - - systemctl daemon-reload - - systemctl restart docker - - systemctl start node-problem-detector - - systemctl start hang-bbb-prepare - - systemctl start hang-bbb diff --git a/infra/relay-lb.tf b/infra/relay-lb.tf deleted file mode 100644 index 265d091..0000000 --- a/infra/relay-lb.tf +++ /dev/null @@ -1,82 +0,0 @@ -# Global Geo DNS, routing to the closest region. -resource "google_dns_record_set" "relay_global" { - name = google_dns_managed_zone.relay.dns_name - managed_zone = google_dns_managed_zone.relay.name - type = "A" - ttl = 60 - - routing_policy { - dynamic "geo" { - for_each = local.relays - - content { - location = geo.value.region - rrdatas = [ - google_compute_address.relay[geo.key].address - ] - } - } - } -} - -# Unfortunately GCP doesn't support global UDP load balancing despite their marketing. -# oof there goes a few hours; here's my progress for posterity: -# TODO We could still use this for regional load balancing. - -/* -# Get a domain name for the anycast address. -resource "google_dns_record_set" "relay_lb" { - name = "relay.moq.dev." - type = "A" - ttl = 300 - managed_zone = google_dns_managed_zone.relay.name - rrdatas = [google_compute_global_forwarding_rule.relay_lb.ip_address] -} - -# Get an anycast address. -resource "google_compute_global_address" "relay_lb" { - name = "relay-lb" - ip_version = "IPV4" - address_type = "EXTERNAL" -} - -# Set up a global forwarding rule -resource "google_compute_global_forwarding_rule" "relay_lb" { - name = "relay-lb" - ip_protocol = "UDP" - port_range = "443" - load_balancing_scheme = "EXTERNAL" - target = data.google_compute_backend_service.relay_lb.self_link - ip_address = google_compute_global_address.relay_lb.address -} - -# Register the list of possible instances to use -resource "google_compute_backend_service" "relay_lb" { - name = "relay-lb" - load_balancing_scheme = "EXTERNAL" - protocol = "UDP" # This is the problem; it's not supported - port_name = "quic" - - dynamic "backend" { - for_each = local.regions - - content { - group = google_compute_instance_group.relay_lb[backend.key].self_link - balancing_mode = "CONNECTION" # apparently required? - } - } - - health_checks = [ - google_compute_http_health_check.relay.self_link - ] -} - - -# We must use a legacy health check for the UDP load balancer -resource "google_compute_http_health_check" "relay" { - name = "relay" - request_path = "/health" - check_interval_sec = 5 - timeout_sec = 5 -} -*/ diff --git a/infra/relay.tf b/infra/relay.tf deleted file mode 100644 index 748e303..0000000 --- a/infra/relay.tf +++ /dev/null @@ -1,145 +0,0 @@ -resource "google_compute_instance" "relay" { - for_each = local.relays - - name = "relay-${each.key}" - - // https://cloud.google.com/compute/docs/general-purpose-machines#t2a_machine_types - // The relay uses virtually no CPU, so we can use a cheap ARM host. - // We should increase the instance size until network is the bottleneck. - // Then we scale out to more instances instead. - machine_type = each.value.machine - zone = each.value.zone - - boot_disk { - initialize_params { - image = each.value.image - size = 50 # 50 GB - type = "pd-standard" - } - } - - network_interface { - network = "default" - - access_config { - nat_ip = google_compute_address.relay[each.key].address - } - } - - metadata = { - # cloud-init template - user-data = templatefile("${path.module}/relay.yml.tpl", { - docker_image = var.docker_relay - - # The external address and certs - public_host = var.domain - public_cert = "${acme_certificate.relay.certificate_pem}${acme_certificate.relay.issuer_pem}" - public_key = acme_certificate.relay.private_key_pem - - # Certs used for internal traffic - internal_cert = "${tls_locally_signed_cert.relay_internal[each.key].cert_pem}${tls_self_signed_cert.internal.cert_pem}" - internal_key = tls_private_key.relay_internal[each.key].private_key_pem - internal_ca = tls_self_signed_cert.internal.cert_pem - - # The name we're using for clustering - # We reuse the GCE provided DNS: VM_NAME.ZONE.c.PROJECT_ID.internal - # See: https://cloud.google.com/compute/docs/internal-dns - cluster_node = "relay-${each.key}.${each.value.zone}.c.${var.project}.internal" - cluster_root = "${local.root}.c.${var.project}.internal" - - # The root key and token, used to authenticate nodes - # cargo run --bin moq-token -- --key root.jwk generate > root.jwk - root_key = trimspace(file("root.jwk")) - - # cargo run --bin moq-token -- --key root.jwk sign --publish "" --subscribe "" --cluster > cluster.jwt - cluster_token = trimspace(file("cluster.jwt")) - }) - } - - service_account { - scopes = ["cloud-platform"] - } - - # For the firewall - tags = ["relay"] - - lifecycle { - # There seems to be a terraform bug causing this to be recreated on every apply - ignore_changes = [boot_disk] - } - - allow_stopping_for_update = true -} - -resource "google_compute_address" "relay" { - for_each = local.relays - - name = "relay-${each.key}" - region = each.value.region -} - -# Create a DNS entry for each node. -resource "google_dns_record_set" "relay" { - for_each = local.relays - - name = "${each.key}.${google_dns_managed_zone.relay.dns_name}" - managed_zone = google_dns_managed_zone.relay.name - type = "A" - ttl = 300 - rrdatas = [google_compute_address.relay[each.key].address] -} - -# Allow port 443 -resource "google_compute_firewall" "relay" { - name = "relay" - network = "default" - - allow { - protocol = "udp" - ports = ["443"] - } - - allow { - protocol = "tcp" - ports = ["443"] - } - - source_ranges = ["0.0.0.0/0"] - target_tags = ["relay"] -} - -# Create an internal TLS certificate for the relay -resource "tls_private_key" "relay_internal" { - for_each = local.relays - - algorithm = "ECDSA" - ecdsa_curve = "P256" -} - -resource "tls_cert_request" "relay_internal" { - for_each = local.relays - private_key_pem = tls_private_key.relay_internal[each.key].private_key_pem - - subject { - common_name = "relay-${each.key}" - } - - # Valid for the default Google DNS entry - dns_names = ["relay-${each.key}.${each.value.zone}.c.${var.project}.internal"] -} - -resource "tls_locally_signed_cert" "relay_internal" { - for_each = local.relays - - cert_request_pem = tls_cert_request.relay_internal[each.key].cert_request_pem - ca_private_key_pem = tls_private_key.internal.private_key_pem - ca_cert_pem = tls_self_signed_cert.internal.cert_pem - - validity_period_hours = 8760 # 1 year - - allowed_uses = [ - "key_encipherment", - "digital_signature", - "server_auth" - ] -} diff --git a/infra/relay.yml.tpl b/infra/relay.yml.tpl deleted file mode 100644 index 210c504..0000000 --- a/infra/relay.yml.tpl +++ /dev/null @@ -1,146 +0,0 @@ -#cloud-config - -write_files: - # Write the certificate to disk - - path: /etc/cert/${public_host}.crt - content: | - ${indent(6, public_cert)} - permissions: "0644" - owner: root - - # Write the private key to disk - - path: /etc/cert/${public_host}.key - content: | - ${indent(6, public_key)} - permissions: "0600" - owner: root - - # Write the internal certificate to disk - - path: /etc/cert/${cluster_node}.crt - content: | - ${indent(6, internal_cert)} - permissions: "0644" - owner: root - - # Write the internal private key to disk - - path: /etc/cert/${cluster_node}.key - content: | - ${indent(6, internal_key)} - permissions: "0600" - owner: root - - # Write our internal CA to disk - # Unfortuantely, cos-cloud doesn't seem to support the ca_certs module - - path: /etc/cert/internal.ca - content: | - ${indent(6, internal_ca)} - permissions: "0644" - owner: root - - # Write the root key to disk - - path: /etc/moq/root.jwk - content: | - ${indent(6, root_key)} - permissions: "0644" - owner: root - - # Write the root token to disk - - path: /etc/moq/cluster.jwt - content: | - ${indent(6, cluster_token)} - permissions: "0644" - owner: root - - - path: /etc/moq/relay.toml - content: | - [server] - listen = "0.0.0.0:443" - - tls.cert = [ "/etc/cert/${cluster_node}.crt", "/etc/cert/${public_host}.crt" ] - tls.key = [ "/etc/cert/${cluster_node}.key", "/etc/cert/${public_host}.key" ] - - [client] - tls.root = [ "/etc/cert/internal.ca" ] - - [web.https] - listen = "0.0.0.0:443" - cert = "/etc/cert/${public_host}.crt" - key = "/etc/cert/${public_host}.key" - - [cluster] - connect = "${cluster_root}" - advertise = "${cluster_node}" - token = "/etc/moq/cluster.jwt" - - [auth] - key = "/etc/moq/root.jwk" - public = "anon" - - permissions: "0644" - owner: root - - # Create a systemd service to run the docker image - - path: /etc/systemd/system/moq-relay.service - permissions: "0644" - owner: root - content: | - [Unit] - Description=Run moq-relay via docker - After=docker.service allow-quic.service - Wants=docker.service allow-quic.service - - [Service] - Restart=on-failure - RestartSec=5s - ExecStart=docker run --rm \ - --name moq-relay \ - --network="host" \ - --pull=always \ - --cap-add=SYS_PTRACE \ - -v "/etc/cert:/etc/cert:ro" \ - -v "/etc/moq:/etc/moq:ro" \ - -e RUST_LOG=debug -e RUST_BACKTRACE=1 \ - ${docker_image} /etc/moq/relay.toml - ExecStop=docker stop moq-relay - - # GCP configures a firewall by default that blocks all UDP traffic - - path: /etc/systemd/system/allow-quic.service - permissions: "0644" - owner: root - content: | - [Unit] - Description=Allow QUIC traffic through the host firewall - - [Service] - Type=oneshot - RemainAfterExit=true - ExecStart=iptables -A INPUT -p udp --dport 443 -j ACCEPT - ExecStart=iptables -A INPUT -p tcp --dport 443 -j ACCEPT - - # There's a mismatch between the GCP network MTU and the docker MTU - - path: /etc/docker/daemon.json - content: | - { "mtu": 1460 } - - # Clear out the logs after a week - - path: /etc/systemd/journald.conf - content: | - [Journal] - SystemMaxUse=500M - SystemKeepFree=1G - MaxFileSec=1hour - MaxRetentionSec=1day - - # Delete docker images and containers that are no longer in use - - path: /etc/cron.weekly/docker-cleanup - permissions: "0755" - owner: root - content: | - #!/bin/sh - docker system prune -af - -runcmd: - - systemctl daemon-reload - - systemctl restart docker - - systemctl start node-problem-detector - - systemctl start moq-relay