From 9e1b3e8bf57d674b132707ecdd0685b290cdc38d Mon Sep 17 00:00:00 2001 From: findias Date: Wed, 6 May 2026 15:45:01 +0300 Subject: [PATCH] relay: add DNS-01 cert acquisition method via Cloudflare API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multi-IP RU round-robin DNS makes the existing webroot HTTP-01 challenge unreliable: LE may pick whichever apex A-record IP it likes, and only one of the round-robin RUs has the challenge file. The second RU's cert ends up rsynced from the first, which goes stale after 90 days unless we run cron-based sync — fragile. DNS-01 via Cloudflare API works regardless of where DNS resolves because the challenge is a TXT record, not an HTTP file. Each RU auto-renews independently against its own LE account; no inter-RU coordination needed. Changes: * defaults/main.yml: relay_certbot_method (default 'webroot' for backwards compat) and relay_certbot_dns_propagation_seconds. * defaults/secrets.yml.example: documents relay_cloudflare_api_token vault var with the CF token-creation recipe (Zone:DNS:Edit on apex). * tasks/install.yml: snap-install certbot-dns-cloudflare plugin and connect it via snap interface, gated on method=dns-cloudflare. trust-plugin-with-root must be set explicitly because the plugin needs root to write into /etc/letsencrypt. * tasks/certbot.yml: validates token presence, deploys /etc/letsencrypt/cloudflare.ini (mode 600, no_log), branches the certbot certonly command on method. Existing webroot path retained unchanged for hosts that don't opt in. Idempotency bug fixed in the same commit: The pre-existing 'cert already covers domain' check parsed `certbot certificates ... | grep 'Domains:'`, but snap certbot 3.x renamed that line to 'Identifiers:'. The grep returned empty, the 'cert doesn't exist' branch fired on every run, and certbot tried to re-issue. LE's small dedup window masked it for a few minutes, but a sustained re-run loop would burn rate budget. Updated the grep to accept both labels: grep -E '^[[:space:]]*(Domains|Identifiers):' Both certbot 2.x (apt distro) and 3.x+ (snap) parse correctly now. Tested: * Manual DNS-01 setup on vm_my_ru and vm_my_ru2 from earlier session. * Re-ran the role with --tags relay_install,relay_certbot — first run reported changed=2 due to the broken grep (false positive); after the fix, idempotent re-run reports changed=0 on both hosts. * certbot renew --cert-name zirgate.com --dry-run on both hosts: "Congratulations, all simulated renewals succeeded". Signed-off-by: findias --- roles/relay/defaults/main.yml | 10 ++++++ roles/relay/defaults/secrets.yml.example | 8 +++++ roles/relay/tasks/certbot.yml | 46 ++++++++++++++++++++++-- roles/relay/tasks/install.yml | 20 +++++++++++ 4 files changed, 81 insertions(+), 3 deletions(-) diff --git a/roles/relay/defaults/main.yml b/roles/relay/defaults/main.yml index 0cff86d..dcaea1c 100644 --- a/roles/relay/defaults/main.yml +++ b/roles/relay/defaults/main.yml @@ -44,6 +44,16 @@ relay_access_log: false # ── Certbot ─────────────────────────────────────────────────────────────────── relay_certbot_email: "" # Set in secrets.yml: relay_certbot_email: "admin@example.com" +# Cert acquisition method for the apex domain. +# - webroot: HTTP-01 challenge served from {{ relay_webroot }}/.well-known/acme-challenge. +# Requires apex DNS to point at this host. Breaks for second-IP/round-robin DNS +# (LE may pick the wrong server and 404 the challenge). +# - dns-cloudflare: DNS-01 challenge via Cloudflare API. Requires +# relay_cloudflare_api_token in vault (Zone:DNS:Edit scope on the apex zone). +# Works regardless of where DNS resolves — required for multi-IP RU setups. +relay_certbot_method: webroot +relay_certbot_dns_propagation_seconds: 30 + # ── Stub site ───────────────────────────────────────────────────────────────── relay_stub_title: "Welcome" relay_stub_description: "Personal website" diff --git a/roles/relay/defaults/secrets.yml.example b/roles/relay/defaults/secrets.yml.example index c3e6fa5..25bc8c1 100644 --- a/roles/relay/defaults/secrets.yml.example +++ b/roles/relay/defaults/secrets.yml.example @@ -7,6 +7,14 @@ relay_sub_my: "my.your-ru-domain.com" # Subdomain for Raven relay (A record relay_upstream_host: "1.2.3.4" # EU server IP address relay_certbot_email: "admin@example.com" # Email for Let's Encrypt notifications +# Cloudflare API token, ONLY required when relay_certbot_method=dns-cloudflare. +# Create at CF dashboard → My Profile → API Tokens → Create Token (Custom): +# Permissions: Zone → DNS → Edit +# Zone Resources: Include → Specific zone → +# Token is used by certbot-dns-cloudflare snap plugin for DNS-01 challenge. +# Strongly recommended for multi-IP RU setups where webroot HTTP-01 breaks. +# relay_cloudflare_api_token: "..." + # Optional: Cloudflare Origin CA cert for {{ relay_sub_my }} (CF-proxied subdomain). # When set, nginx serves this cert instead of LE for the my.* server block. # Issued from CF dashboard → SSL/TLS → Origin Server with SAN: *. + . diff --git a/roles/relay/tasks/certbot.yml b/roles/relay/tasks/certbot.yml index 4ea5bed..4145b13 100644 --- a/roles/relay/tasks/certbot.yml +++ b/roles/relay/tasks/certbot.yml @@ -3,16 +3,38 @@ # and uses CF Origin CA cert (deployed in nginx_ssl.yml when relay_cf_origin_cert # is set), so it doesn't need a publicly-trusted cert and stays out of CT logs # (closes M6). +- name: Relay | Validate dns-cloudflare prerequisites + ansible.builtin.assert: + that: + - relay_cloudflare_api_token is defined + - relay_cloudflare_api_token | length > 0 + fail_msg: >- + relay_certbot_method=dns-cloudflare requires relay_cloudflare_api_token + in vault (Zone:DNS:Edit on apex zone). See defaults/secrets.yml.example. + when: relay_certbot_method == 'dns-cloudflare' + +- name: Relay | Deploy Cloudflare credentials file (DNS-01 only) + ansible.builtin.copy: + content: "dns_cloudflare_api_token = {{ relay_cloudflare_api_token }}\n" + dest: /etc/letsencrypt/cloudflare.ini + owner: root + group: root + mode: "0600" + when: relay_certbot_method == 'dns-cloudflare' + no_log: true + - name: Relay | Check if certificate already covers apex only + # certbot ≤2.x prints "Domains:", certbot ≥3.x prints "Identifiers:" — accept both. ansible.builtin.shell: cmd: | certbot certificates --cert-name {{ relay_domain }} 2>/dev/null \ - | grep -E 'Domains:' | awk -F': ' '{print $2}' | xargs -n1 | sort | tr '\n' ' ' + | grep -E '^[[:space:]]*(Domains|Identifiers):' \ + | awk -F': ' '{print $2}' | xargs -n1 | sort | tr '\n' ' ' register: relay_cert_domains changed_when: false failed_when: false -- name: Relay | Obtain or update Let's Encrypt certificate (apex only) +- name: Relay | Obtain or update Let's Encrypt certificate (webroot) ansible.builtin.command: cmd: > certbot certonly --webroot @@ -22,7 +44,25 @@ --cert-name {{ relay_domain }} --email {{ relay_certbot_email }} -d {{ relay_domain }} - when: relay_cert_domains.stdout | default('') | trim != relay_domain + when: + - relay_certbot_method == 'webroot' + - relay_cert_domains.stdout | default('') | trim != relay_domain + notify: Reload nginx + +- name: Relay | Obtain or update Let's Encrypt certificate (DNS-01 via Cloudflare) + ansible.builtin.command: + cmd: > + certbot certonly --dns-cloudflare + --dns-cloudflare-credentials /etc/letsencrypt/cloudflare.ini + --dns-cloudflare-propagation-seconds {{ relay_certbot_dns_propagation_seconds }} + --non-interactive + --agree-tos + --cert-name {{ relay_domain }} + --email {{ relay_certbot_email }} + -d {{ relay_domain }} + when: + - relay_certbot_method == 'dns-cloudflare' + - relay_cert_domains.stdout | default('') | trim != relay_domain notify: Reload nginx - name: Relay | Ensure certbot renewal timer is enabled diff --git a/roles/relay/tasks/install.yml b/roles/relay/tasks/install.yml index d66398a..cc2da4e 100644 --- a/roles/relay/tasks/install.yml +++ b/roles/relay/tasks/install.yml @@ -62,3 +62,23 @@ name: nginx enabled: true state: started + +# certbot-dns-cloudflare plugin. Snap requires explicit trust before installing +# any plugin that runs as root; the trust flag is idempotent so we just always +# set it when method=dns-cloudflare. Plugin install is gated by `creates`, and +# `snap connect` is idempotent on its own. +- name: Relay | Allow certbot snap plugins with root access + ansible.builtin.command: snap set certbot trust-plugin-with-root=ok + changed_when: false + when: relay_certbot_method == 'dns-cloudflare' + +- name: Relay | Install certbot-dns-cloudflare snap plugin + ansible.builtin.command: + cmd: snap install certbot-dns-cloudflare + creates: /snap/certbot-dns-cloudflare + when: relay_certbot_method == 'dns-cloudflare' + +- name: Relay | Connect certbot:plugin to certbot-dns-cloudflare + ansible.builtin.command: snap connect certbot:plugin certbot-dns-cloudflare + changed_when: false + when: relay_certbot_method == 'dns-cloudflare'