diff --git a/.gitignore b/.gitignore index 2a7a69f..c3379e2 100644 --- a/.gitignore +++ b/.gitignore @@ -9,12 +9,15 @@ venv/ .venv/ .claude/ -# Ignore all files in deploy/inventory/host_vars/ and deploy/inventory/group_vars/ +# Ignore all files in deploy/inventory/host_vars/ and deploy/inventory/group_vars/ # but allow .template files deploy/inventory/host_vars/* -deploy/inventory/group_vars/* !deploy/inventory/host_vars/*.template +deploy/inventory/group_vars/* !deploy/inventory/group_vars/*.template +!deploy/inventory/group_vars/all/ +deploy/inventory/group_vars/all/* +!deploy/inventory/group_vars/all/vars.yml deploy/filter_plugins/__pycache__/ # Testing artifacts diff --git a/deploy/ansible.cfg b/deploy/ansible.cfg index 53ace9a..eafe242 100644 --- a/deploy/ansible.cfg +++ b/deploy/ansible.cfg @@ -6,6 +6,11 @@ host_key_checking = False # log_path=./deploy.log remote_tmp = /tmp/ local_tmp = ~/.ansible/tmp +# Explicit so sub-playbooks under playbooks/ resolve roles and custom +# filters/library regardless of which playbook is invoked. +roles_path = ./roles +filter_plugins = ./filter_plugins +library = ./library # callbacks_enabled = timer, profile_tasks # stdout_callback = yaml diff --git a/deploy/dhis2.yml b/deploy/dhis2.yml index ec4b36a..4c7331a 100644 --- a/deploy/dhis2.yml +++ b/deploy/dhis2.yml @@ -10,7 +10,7 @@ gather_facts: false force_handlers: true become: true - hosts: all:!127.0.0.1 + hosts: all:!127.0.0.1:!{{ wireguard_hub_inventory_hostname | default('wireguard') }} vars_files: - vars/vars.yml roles: @@ -44,5 +44,19 @@ roles: - role: backups +- name: WireGuard VPN bring-up + ansible.builtin.import_playbook: playbooks/wireguard.yml + +# Lockdown is auto-imported. Every play inside gates on wireguard_enabled, +# so this is a no-op when WireGuard is disabled. To deploy the mesh without +# locking services down (e.g. during initial cut-over while not all admins +# are on the VPN yet), run: +# ansible-playbook dhis2.yml --skip-tags wireguard-lockdown +# To revert a specific component without disabling WireGuard, use the +# per-component skip-tags listed in docs/WireGuard-VPN.md (lockdown-proxy, +# lockdown-monitor, lockdown-postgres, lockdown-instances). +- name: WireGuard VPN service lockdown + ansible.builtin.import_playbook: playbooks/wireguard-lockdown.yml + - import_playbook: playbooks/delete-dhis2-instance.yml tags: [never, delete-instance] diff --git a/deploy/inventory/group_vars/all.template b/deploy/inventory/group_vars/all.template deleted file mode 100644 index eaf4d13..0000000 --- a/deploy/inventory/group_vars/all.template +++ /dev/null @@ -1,2 +0,0 @@ ---- -# This is just template. Ansible hosts are members of groups, in our tools we have 3 most commong groups, [web], [databases], [monitoring] diff --git a/deploy/inventory/group_vars/all/vars.yml b/deploy/inventory/group_vars/all/vars.yml new file mode 100644 index 0000000..e80c5b4 --- /dev/null +++ b/deploy/inventory/group_vars/all/vars.yml @@ -0,0 +1,29 @@ +--- +# WireGuard human/admin peers (laptops, home machines, sysadmins). +# App containers (proxy/postgres/dhis/monitor) are auto-derived from the +# inventory's inline `wireguard_ip` and must NOT be listed here. +# The hub itself uses 10.0.0.1; assign human peers from 10.0.0.6 upward +# (10.0.0.2-.5 are reserved for app containers in the default inventory). + +# +# Optional pg_access: a list of entries that produce per-peer pg_hba.conf +# rules on the databases host. Peers without pg_access have no PostgreSQL +# access. Each entry is one of: +# - { instance: } preferred - derives both database and user +# from the DHIS2 instance's LXD container name +# (db name == role == owner == container name). +# Reference a host from the [instances] group so +# the rule tracks the instance instead of being +# hardcoded. +# - { database: all, user: all } explicit - for wildcard/superuser access. +wireguard_peers: + - name: sysadmin + allowed_ips: "10.0.0.6/32" + # public_key: "" # only needed if wireguard_auto_generate_keys: false + # preshared_key: "" # optional - for post-quantum resistance + pg_access: + - { instance: dhis } # access the 'dhis' instance's database as its own role + # - name: superuser + # allowed_ips: "10.0.0.7/32" + # pg_access: + # - { database: all, user: all } # superuser-equivalent - allows access to all databases as any user diff --git a/deploy/inventory/hosts.template b/deploy/inventory/hosts.template index 4ea0893..da52ae5 100644 --- a/deploy/inventory/hosts.template +++ b/deploy/inventory/hosts.template @@ -3,22 +3,27 @@ # proxy [web] -proxy ansible_host=172.19.2.2 +proxy ansible_host=172.19.2.2 wireguard_ip=10.0.0.2 # database hosts [databases] -postgres ansible_host=172.19.2.20 +postgres ansible_host=172.19.2.20 wireguard_ip=10.0.0.3 # dhis2 hosts [instances] -dhis ansible_host=172.19.2.11 database_host=postgres dhis2_version=2.42 proxy_rewrite=True +dhis ansible_host=172.19.2.11 database_host=postgres dhis2_version=2.42 proxy_rewrite=True wireguard_ip=10.0.0.4 # monitoring hosts [monitoring] -monitor ansible_host=172.19.2.30 +monitor ansible_host=172.19.2.30 wireguard_ip=10.0.0.5 + +# WireGuard hub container. wireguard_ip must match wireguard_server_ip. +# Group is wireguard_hub (not wireguard) to avoid Ansible's "host and group share name" warning. +[wireguard_hub] +wireguard ansible_host=172.19.2.200 wireguard_ip=10.0.0.1 [backup_servers] backup ansible_host=172.19.2.100 @@ -50,6 +55,11 @@ postgresql_version=16 server_monitoring=munin app_monitoring=glowroot +# WireGuard VPN: restricts Grafana, Prometheus, Munin, Glowroot and PostgreSQL to VPN-only access. +# App containers above are auto-derived as WG peers via inline wireguard_ip. +# Human/admin peers (laptops, home machines) live in group_vars/all/vars.yml. +wireguard_enabled=false + # lxd lxd_network=172.19.2.1/24 diff --git a/deploy/playbooks/wireguard-lockdown.yml b/deploy/playbooks/wireguard-lockdown.yml new file mode 100644 index 0000000..3186ae7 --- /dev/null +++ b/deploy/playbooks/wireguard-lockdown.yml @@ -0,0 +1,102 @@ +--- +# WireGuard VPN lockdown. +# +# Restricts Grafana, Prometheus, Munin, Glowroot and PostgreSQL access to +# the WireGuard subnet only. Auto-imported by dhis2.yml immediately after +# playbooks/wireguard.yml, so a single `deploy.sh` / `dhis2.yml` run with +# `wireguard_enabled=true` brings up the mesh AND locks services down in +# one go. May also be run standalone to (re-)apply lockdown after manual +# UFW changes: +# ansible-playbook playbooks/wireguard-lockdown.yml +# +# Every play below gates on wireguard_enabled, so this is a no-op when +# WireGuard is disabled - that's what lets dhis2.yml import it +# unconditionally. +# +# Selective skip via --skip-tags (works whether run via dhis2.yml or +# standalone): +# ansible-playbook dhis2.yml --skip-tags lockdown-postgres +# ansible-playbook dhis2.yml --skip-tags wireguard-lockdown # all of it + +- name: WireGuard lockdown | Pre-flight + hosts: 127.0.0.1 + connection: local + become: true + gather_facts: false + tags: + - wireguard + - wireguard-lockdown + tasks: + # Informational only - surfaces "lockdown is about to run" in the play + # output. When wireguard_enabled is false the rest of this playbook + # short-circuits via per-play `when:` guards. + - name: Lockdown | Announce service hardening + ansible.builtin.debug: + msg: >- + WireGuard service lockdown is about to apply: monitoring, + PostgreSQL, and Glowroot will be restricted to the + {{ wireguard_network | default('10.0.0.0/24') }} VPN subnet. + Skip with --skip-tags wireguard-lockdown (whole phase) or + --skip-tags lockdown-proxy,lockdown-monitor,lockdown-postgres,lockdown-instances + (per component). + when: wireguard_enabled | default(false) | bool + +- name: WireGuard lockdown | Proxy + hosts: web + become: true + gather_facts: false + tags: + - wireguard + - wireguard-lockdown + - lockdown-proxy + tasks: + - name: Lockdown | Proxy + ansible.builtin.include_role: + name: wireguard + tasks_from: lockdown_proxy.yml + when: wireguard_enabled | default(false) | bool + +- name: WireGuard lockdown | Monitoring + hosts: monitoring + become: true + gather_facts: false + tags: + - wireguard + - wireguard-lockdown + - lockdown-monitor + tasks: + - name: Lockdown | Monitor + ansible.builtin.include_role: + name: wireguard + tasks_from: lockdown_monitor.yml + when: wireguard_enabled | default(false) | bool + +- name: WireGuard lockdown | PostgreSQL + hosts: databases + become: true + gather_facts: false + tags: + - wireguard + - wireguard-lockdown + - lockdown-postgres + tasks: + - name: Lockdown | Postgres + ansible.builtin.include_role: + name: wireguard + tasks_from: lockdown_postgres.yml + when: wireguard_enabled | default(false) | bool + +- name: WireGuard lockdown | Instances + hosts: instances + become: true + gather_facts: false + tags: + - wireguard + - wireguard-lockdown + - lockdown-instances + tasks: + - name: Lockdown | Instances + ansible.builtin.include_role: + name: wireguard + tasks_from: lockdown_instances.yml + when: wireguard_enabled | default(false) | bool diff --git a/deploy/playbooks/wireguard.yml b/deploy/playbooks/wireguard.yml new file mode 100644 index 0000000..ca6e3e5 --- /dev/null +++ b/deploy/playbooks/wireguard.yml @@ -0,0 +1,122 @@ +--- +# WireGuard VPN bring-up - standalone playbook. +# +# Brings up the WireGuard mesh in three independent stages so each can be +# debugged or re-run on its own: +# 1. host_portforward - provision the hub LXD container and the lxc network +# forward into it (LXD deployments only; SSH/distributed setups expose +# the hub VM directly via the operator's firewall). +# 2. hub - install/configure WireGuard server inside the hub. +# 3. peer - install/configure WireGuard on every app host. +# +# This playbook does NOT alter Grafana/Prometheus/Munin/Glowroot/PostgreSQL +# access rules. To restrict those services to VPN-only access, run +# `playbooks/wireguard-lockdown.yml` AFTER verifying the mesh works. +# +# Auto-imported from dhis2.yml when wireguard_enabled=true. May also be run +# directly: +# ansible-playbook playbooks/wireguard.yml + +- name: WireGuard | Provision hub on localhost + hosts: 127.0.0.1 + become: true + gather_facts: false + tags: + - wireguard + - wireguard-bring-up + tasks: + # Capture the inventory-level deployment mode. We must read it from the + # wireguard hub host's hostvars (not from 127.0.0.1's), because when this + # playbook is imported by dhis2.yml the pre-install role has already + # set_fact'd ansible_connection=local on 127.0.0.1, which would make + # this read 'local' instead of the inventory's 'lxd'. The hub host's + # ansible_connection comes straight from [all:vars] and is never + # clobbered. + - name: WireGuard | Capture deployment mode (lxd vs ssh) + vars: + ansible_connection: local + ansible.builtin.set_fact: + wireguard_deploy_mode: >- + {{ hostvars[wireguard_hub_inventory_hostname | default('wireguard')]['ansible_connection'] + | default('lxd') }} + + - name: WireGuard | Set Ansible connection to local + vars: + ansible_connection: local + ansible.builtin.set_fact: + ansible_connection: local + + - name: WireGuard | Gather network facts on host + ansible.builtin.setup: + gather_subset: + - network + - '!min' + + - name: WireGuard | Validate configuration + ansible.builtin.include_role: + name: wireguard + tasks_from: validate.yml + when: wireguard_enabled | default(false) | bool + + - name: WireGuard | Provision hub LXD container (LXD only) + ansible.builtin.include_role: + name: wireguard + tasks_from: lxd_container.yml + when: + - wireguard_enabled | default(false) | bool + - wireguard_deploy_mode == 'lxd' + + - name: WireGuard | LXD host port-forward (LXD only) + ansible.builtin.include_role: + name: wireguard + tasks_from: host_portforward.yml + when: + - wireguard_enabled | default(false) | bool + - wireguard_deploy_mode == 'lxd' + +- name: WireGuard | Configure hub server + hosts: "{{ wireguard_hub_inventory_hostname | default('wireguard') }}" + become: true + gather_facts: false + tags: + - wireguard + - wireguard-bring-up + tasks: + - name: WireGuard | Hub + ansible.builtin.include_role: + name: wireguard + tasks_from: hub.yml + when: wireguard_enabled | default(false) | bool + +- name: WireGuard | Configure peers across app hosts + hosts: "all:!127.0.0.1:!{{ wireguard_hub_inventory_hostname | default('wireguard') }}" + become: true + gather_facts: false + tags: + - wireguard + - wireguard-bring-up + tasks: + # The backups role runs earlier in dhis2.yml on the databases host and + # does `set_fact: ansible_connection=local`, which persists for that host + # across every later play (set_fact is sticky). Without this re-assert the + # postgres peer's WireGuard tasks would run on the LXD host instead of + # inside the container, so its wg0 never comes up. Re-derive the real + # connection from the hub host's ansible_connection, which comes straight + # from [all:vars] and is never clobbered (same authoritative source used to + # capture wireguard_deploy_mode in the bring-up play above). No-op for + # SSH deployments and for peers that never ran the backups role. + - name: WireGuard | Restore peer connection (undo backups ansible_connection leak) + ansible.builtin.set_fact: + ansible_connection: >- + {{ hostvars[wireguard_hub_inventory_hostname | default('wireguard')]['ansible_connection'] + | default('lxd') }} + when: wireguard_enabled | default(false) | bool + + - name: WireGuard | Peer + ansible.builtin.include_role: + name: wireguard + tasks_from: peer.yml + when: + - wireguard_enabled | default(false) | bool + - hostvars[inventory_hostname].wireguard_ip is defined + - hostvars[inventory_hostname].wireguard_ip | length > 0 diff --git a/deploy/roles/create-instance/templates/apache2/instance.j2 b/deploy/roles/create-instance/templates/apache2/instance.j2 index 93f8fdf..08c1204 100644 --- a/deploy/roles/create-instance/templates/apache2/instance.j2 +++ b/deploy/roles/create-instance/templates/apache2/instance.j2 @@ -1,3 +1,5 @@ +{% set _glowroot_enabled = app_monitoring is defined and app_monitoring | trim == 'glowroot' %} +{% set _glowroot_locked = wireguard_enabled | default(false) | bool and wireguard_lockdown_monitoring | default(false) | bool %} {% if hostvars[item]['dhis2_base_path'] | default(item) | to_fixed_string == "ROOT" %} Require all granted @@ -5,7 +7,7 @@ ProxyPassReverse "http://{{ hostvars[item]['ansible_host']+':8080'}}/" -{% if app_monitoring is defined and app_monitoring | trim == 'glowroot' %} +{% if _glowroot_enabled and not _glowroot_locked %} Require all granted ProxyPass "http://{{ hostvars[item]['ansible_host']+':4000' }}/glowroot" @@ -19,7 +21,7 @@ ProxyPassReverse "http://{{ hostvars[item]['ansible_host']+':8080'}}/{{ hostvars[item]['dhis2_base_path'] | default(item) | to_fixed_string }}" -{% if app_monitoring is defined and app_monitoring | trim == 'glowroot' %} +{% if _glowroot_enabled and not _glowroot_locked %} Require all granted ProxyPass "http://{{ hostvars[item]['ansible_host']+':4000' }}/{{ hostvars[item]['dhis2_base_path'] | default(item) | to_fixed_string }}-glowroot" diff --git a/deploy/roles/create-instance/templates/nginx/instance.j2 b/deploy/roles/create-instance/templates/nginx/instance.j2 index e06e979..559b50f 100644 --- a/deploy/roles/create-instance/templates/nginx/instance.j2 +++ b/deploy/roles/create-instance/templates/nginx/instance.j2 @@ -1,3 +1,5 @@ +{% set _glowroot_enabled = app_monitoring is defined and app_monitoring | trim == 'glowroot' %} +{% set _glowroot_locked = wireguard_enabled | default(false) | bool and wireguard_lockdown_monitoring | default(false) | bool %} {% if hostvars[item]['dhis2_base_path'] | default(item) | to_fixed_string == "ROOT" %} location / { proxy_pass http://{{hostvars[item]['ansible_host']+':8080' }}; @@ -10,7 +12,7 @@ location / { proxy_hide_header X-Powered-By; proxy_hide_header Server; } -{% if app_monitoring is defined and app_monitoring | trim == 'glowroot' %} +{% if _glowroot_enabled and not _glowroot_locked %} {# glowroot location block configs #} location /glowroot/ { proxy_pass http://{{ hostvars[item]['ansible_host']+':4000' }}/glowroot/; @@ -37,7 +39,7 @@ location /{{ hostvars[item]['dhis2_base_path'] | default(item) | to_fixed_string proxy_hide_header X-Powered-By; proxy_hide_header Server; } -{% if app_monitoring is defined and app_monitoring | trim == 'glowroot' %} +{% if _glowroot_enabled and not _glowroot_locked %} {# glowroot location block configs #} location /{{ hostvars[item]['dhis2_base_path'] | default(item) | to_fixed_string }}-glowroot { proxy_pass http://{{ hostvars[item]['ansible_host']+':4000' }}/{{ hostvars[item]['dhis2_base_path'] | default(item) | to_fixed_string }}-glowroot; diff --git a/deploy/roles/wireguard/defaults/main.yml b/deploy/roles/wireguard/defaults/main.yml new file mode 100644 index 0000000..7870084 --- /dev/null +++ b/deploy/roles/wireguard/defaults/main.yml @@ -0,0 +1,39 @@ +--- +wireguard_enabled: false + +wireguard_network: "10.0.0.0/24" +wireguard_server_ip: "10.0.0.1" +wireguard_port: 51820 +wireguard_interface: wg0 + +# Hub container - dedicated LXD container running the WireGuard server. +wireguard_hub_inventory_hostname: "wireguard" +wireguard_hub_lxd_ip: "172.19.2.200" + +# Empty = auto-detect ansible_default_ipv4.address. On cloud VMs behind +# 1:1 NAT this is the private primary IP, not the public IP. +wireguard_endpoint_listen: "" + +# Falls back to wireguard_endpoint_listen when empty. Must be set explicitly +# on cloud VMs with 1:1 NAT or home peers receive an unroutable Endpoint. +wireguard_endpoint_public: "" + +wireguard_stage: "" + +# Cross-role shared variables consumed by proxy/postgres/monitoring/pre-install. +# Renaming with a wireguard_ prefix would break that contract. +lxd_bridge_interface: "lxdbr1" # noqa: var-naming[no-role-prefix] +lxd_network: "172.19.2.0/24" # noqa: var-naming[no-role-prefix] +lxd_gateway_ip: "172.19.2.1" # noqa: var-naming[no-role-prefix] + +grafana_port: 3000 # noqa: var-naming[no-role-prefix] + +wireguard_peers: [] + +wireguard_auto_generate_keys: true +wireguard_auto_generate_psk: false +wireguard_prune_orphans: false +wireguard_client_config_dir: /etc/wireguard/clients +wireguard_client_key_dir: /etc/wireguard/clients/keys + +wireguard_lockdown_monitoring: false diff --git a/deploy/roles/wireguard/handlers/main.yml b/deploy/roles/wireguard/handlers/main.yml new file mode 100644 index 0000000..198167a --- /dev/null +++ b/deploy/roles/wireguard/handlers/main.yml @@ -0,0 +1,42 @@ +--- +- name: Restart WireGuard + ansible.builtin.systemd: + name: "wg-quick@{{ wireguard_interface }}" + state: restarted + daemon_reload: true + +- name: Sync WireGuard peers + ansible.builtin.shell: | + wg syncconf {{ wireguard_interface }} <(wg-quick strip /etc/wireguard/{{ wireguard_interface }}.conf) + args: + executable: /bin/bash + changed_when: true + +- name: Reload UFW + community.general.ufw: + state: reloaded + +- name: Reload Nginx + ansible.builtin.service: + name: nginx + state: reloaded + +- name: Reload Apache2 + ansible.builtin.service: + name: apache2 + state: reloaded + +- name: Restart Grafana + ansible.builtin.service: + name: grafana-server + state: restarted + +- name: Restart Apache2 + ansible.builtin.service: + name: apache2 + state: restarted + +- name: Reload PostgreSQL + ansible.builtin.service: + name: postgresql + state: reloaded diff --git a/deploy/roles/wireguard/meta/argument_specs.yml b/deploy/roles/wireguard/meta/argument_specs.yml new file mode 100644 index 0000000..6d4e640 --- /dev/null +++ b/deploy/roles/wireguard/meta/argument_specs.yml @@ -0,0 +1,143 @@ +--- +argument_specs: + main: + short_description: "WireGuard VPN for securing DHIS2 monitoring and database access" + description: + - Deploys a WireGuard VPN hub inside a dedicated LXD container. + - Each app container (proxy/postgres/dhis/monitor) joins as a WG peer. + - Optionally locks down monitoring dashboards and PostgreSQL to VPN-only access. + options: + wireguard_enabled: + description: "Master switch - enable the WireGuard VPN role" + type: bool + default: false + wireguard_stage: + description: >- + Legacy per-play stage selector retained for backward compatibility + when the role is invoked the old way + (roles: [{ role: wireguard, vars: { wireguard_stage: ... } }]). + The current playbooks (playbooks/wireguard.yml, + playbooks/wireguard-lockdown.yml) call task files directly via + include_role tasks_from: and ignore this variable. + One of: host_portforward, hub, peer, or empty. + type: str + default: "" + choices: ["", "host_portforward", "hub", "peer"] + wireguard_network: + description: "VPN subnet in CIDR notation" + type: str + default: "10.0.0.0/24" + wireguard_server_ip: + description: "Hub IP address on the VPN subnet" + type: str + default: "10.0.0.1" + wireguard_hub_inventory_hostname: + description: "Inventory hostname of the wireguard hub container" + type: str + default: "wireguard" + wireguard_hub_lxd_ip: + description: "Static LXD IP assigned to the wireguard hub container" + type: str + default: "172.19.2.200" + wireguard_endpoint_listen: + description: >- + Listen address used by `lxc network forward` on the LXD host. + Must be an IP owned by the host itself. Empty = auto-detect via + ansible_default_ipv4.address. On cloud VMs behind 1:1 NAT this + is the private primary IP, NOT the public IP. + type: str + default: "" + wireguard_endpoint_public: + description: >- + Public IP/hostname advertised to home/admin peers as their + Endpoint = line. Empty falls back to wireguard_endpoint_listen. + On cloud VMs with 1:1 NAT this MUST be set explicitly to the + public IP/hostname or home peers will receive an unroutable + Endpoint. + type: str + default: "" + wireguard_port: + description: "UDP listen port for WireGuard" + type: int + default: 51820 + wireguard_interface: + description: "WireGuard network interface name" + type: str + default: "wg0" + lxd_bridge_interface: + description: "LXD bridge interface name (must match the LXD network)" + type: str + default: "lxdbr1" + lxd_network: + description: "LXD container subnet in CIDR notation" + type: str + default: "172.19.2.0/24" + lxd_gateway_ip: + description: "Host-side IP of the LXD bridge" + type: str + default: "172.19.2.1" + wireguard_peers: + description: >- + List of human/admin VPN peers (laptops, home machines). + App containers are auto-derived from inventory wireguard_ip; do NOT + list them here. Each entry requires name and allowed_ips. + public_key is required only when wireguard_auto_generate_keys is false. + Optional: preshared_key. + Optional: peer_ip (single CIDR ending in /32) - used for per-peer + pg_hba.conf rules when allowed_ips routes additional networks. If + omitted, the first CIDR in allowed_ips is used. + Optional: pg_access - generates per-peer pg_hba.conf entries on the + databases host. Each entry is either { instance: }, which + derives database and user from the named [instances] host's LXD + container name (db == role == owner == container name), or explicit + { database, user } for wildcards such as { database: all, user: all }. + Resolved database/user names must match ^[a-zA-Z0-9_]+$ (PostgreSQL + keyword 'all' is allowed). Peers without pg_access have no PostgreSQL + access. + type: list + elements: dict + default: [] + wireguard_auto_generate_keys: + description: >- + When true, generate peer keypairs hub-side and produce + complete, ready-to-import client configs. When false, peers + must supply their own public_key. + type: bool + default: true + wireguard_auto_generate_psk: + description: >- + When true, generate a pre-shared key (PSK) for each peer that does + not supply an explicit preshared_key, and embed it into both the + hub and peer configs. Adds a symmetric post-quantum hedge. + Off by default - enabling on an existing deployment forces all + affected clients to re-import their .conf. + type: bool + default: false + wireguard_client_config_dir: + description: "Directory on the hub for generated peer config files" + type: str + default: "/etc/wireguard/clients" + wireguard_client_key_dir: + description: "Directory on the hub for generated peer key files" + type: str + default: "/etc/wireguard/clients/keys" + wireguard_prune_orphans: + description: >- + When true, remove peer key/PSK/config files for peers no longer + present in inventory or wireguard_peers. Off by default to avoid + accidental destruction. + type: bool + default: false + wireguard_lockdown_monitoring: + description: >- + Gates the `/glowroot` proxy block in + roles/create-instance/templates/{nginx,apache2}/instance.j2. + lockdown_proxy.yml sets this true to re-render the proxy configs + without `/glowroot`. Use `--skip-tags lockdown-proxy` to keep + `/glowroot` reachable while the VPN is enabled. + type: bool + default: false + grafana_port: + description: "Grafana HTTP port - used for VPN-only UFW rules" + type: int + default: 3000 diff --git a/deploy/roles/wireguard/meta/main.yml b/deploy/roles/wireguard/meta/main.yml new file mode 100644 index 0000000..e06ba22 --- /dev/null +++ b/deploy/roles/wireguard/meta/main.yml @@ -0,0 +1,18 @@ +--- +galaxy_info: + author: DHIS2 + description: WireGuard VPN for securing DHIS2 monitoring and database access + license: BSD-3-Clause + min_ansible_version: "2.14" + platforms: + - name: Ubuntu + versions: + - jammy # 22.04 + - noble # 24.04 + galaxy_tags: + - wireguard + - vpn + - security + - dhis2 + +dependencies: [] diff --git a/deploy/roles/wireguard/tasks/generate_client_keys.yml b/deploy/roles/wireguard/tasks/generate_client_keys.yml new file mode 100644 index 0000000..ff00e7f --- /dev/null +++ b/deploy/roles/wireguard/tasks/generate_client_keys.yml @@ -0,0 +1,180 @@ +--- +# Hub-side keypair + PSK generator. +# Caller passes `wg_keygen_peers` - a list of normalized peer dicts (name, +# allowed_ips, wireguard_ip, endpoint_kind, optional public_key/preshared_key). +# Produces wireguard_peers_resolved enriched with public_key, _private_key, +# and _preshared_key per entry. + +- name: KeyGen | Ensure client key directory exists + ansible.builtin.file: + path: '{{ wireguard_client_key_dir }}' + state: directory + owner: root + group: root + mode: '0700' + +- name: KeyGen | Check for existing peer private keys + ansible.builtin.stat: + path: '{{ wireguard_client_key_dir }}/{{ item.name }}.key' + loop: '{{ wg_keygen_peers }}' + loop_control: + label: '{{ item.name }}' + register: wireguard_client_key_stats + when: item.public_key is not defined + +- name: KeyGen | Generate peer keypair (private + public) + ansible.builtin.shell: | + set -o pipefail + umask 077 + wg genkey | tee "{{ wireguard_client_key_dir }}/{{ item.item.name }}.key" | \ + wg pubkey > "{{ wireguard_client_key_dir }}/{{ item.item.name }}.pub" + args: + executable: /bin/bash + loop: '{{ wireguard_client_key_stats.results }}' + loop_control: + label: '{{ item.item.name }}' + when: + - item.stat is defined + - not item.stat.exists + changed_when: true + no_log: true + +- name: KeyGen | Read generated peer public keys + ansible.builtin.slurp: + src: '{{ wireguard_client_key_dir }}/{{ item.name }}.pub' + loop: '{{ wg_keygen_peers }}' + loop_control: + label: '{{ item.name }}' + register: wireguard_client_public_keys + when: item.public_key is not defined + no_log: true + +- name: KeyGen | Read generated peer private keys + ansible.builtin.slurp: + src: '{{ wireguard_client_key_dir }}/{{ item.name }}.key' + loop: '{{ wg_keygen_peers }}' + loop_control: + label: '{{ item.name }}' + register: wireguard_client_private_keys + when: item.public_key is not defined + no_log: true + +- name: KeyGen | Check for existing peer PSKs + ansible.builtin.stat: + path: '{{ wireguard_client_key_dir }}/{{ item.name }}.psk' + loop: '{{ wg_keygen_peers }}' + loop_control: + label: '{{ item.name }}' + register: wireguard_client_psk_stats + when: + - wireguard_auto_generate_psk | bool + - item.preshared_key is not defined + +- name: KeyGen | Generate peer PSK + ansible.builtin.shell: | + set -o pipefail + umask 077 + wg genpsk > "{{ wireguard_client_key_dir }}/{{ item.item.name }}.psk" + args: + executable: /bin/bash + loop: '{{ wireguard_client_psk_stats.results | default([]) }}' + loop_control: + label: "{{ item.item.name | default('(skipped)') }}" + when: + - wireguard_auto_generate_psk | bool + - item.stat is defined + - not item.stat.exists + changed_when: true + no_log: true + +- name: KeyGen | Read generated peer PSKs + ansible.builtin.slurp: + src: '{{ wireguard_client_key_dir }}/{{ item.name }}.psk' + loop: '{{ wg_keygen_peers }}' + loop_control: + label: '{{ item.name }}' + register: wireguard_client_psks + when: + - wireguard_auto_generate_psk | bool + - item.preshared_key is not defined + no_log: true + +- name: KeyGen | Build wireguard_peers_resolved with keys and PSKs + ansible.builtin.set_fact: + wireguard_peers_resolved: >- + {{ wireguard_peers_resolved | default([]) + [ + item | combine({ + 'public_key': (wireguard_client_public_keys.results[idx].content | b64decode | trim) + if item.public_key is not defined + else item.public_key, + '_private_key': (wireguard_client_private_keys.results[idx].content | b64decode | trim) + if item.public_key is not defined + else none, + '_preshared_key': item.preshared_key + if item.preshared_key is defined + else ((wireguard_client_psks.results[idx].content | b64decode | trim) + if (wireguard_auto_generate_psk | bool) + else none) + }) + ] }} + loop: '{{ wg_keygen_peers }}' + loop_control: + index_var: idx + label: '{{ item.name }}' + no_log: true + +- name: KeyGen | Find all peer key files for permission enforcement + ansible.builtin.find: + paths: '{{ wireguard_client_key_dir }}' + patterns: '*.key,*.pub,*.psk' + register: wireguard_key_files_for_perms + +- name: KeyGen | Enforce 0600 on peer key files + ansible.builtin.file: + path: '{{ item.path }}' + owner: root + group: root + mode: '0600' + loop: '{{ wireguard_key_files_for_perms.files | default([]) }}' + loop_control: + label: '{{ item.path | basename }}' + +- name: KeyGen | Find orphan peer key files + ansible.builtin.find: + paths: '{{ wireguard_client_key_dir }}' + patterns: '*.key,*.pub,*.psk' + register: wireguard_existing_key_files + when: wireguard_prune_orphans | bool + +- name: KeyGen | Remove orphan peer key files + ansible.builtin.file: + path: '{{ item.path }}' + state: absent + loop: '{{ wireguard_existing_key_files.files | default([]) }}' + loop_control: + label: '{{ item.path | basename }}' + when: + - wireguard_prune_orphans | bool + - (item.path | basename | regex_replace('\\.(key|pub|psk)$', '')) + not in (wg_keygen_peers | map(attribute='name') | list) + no_log: true + +- name: KeyGen | Find orphan peer config files + ansible.builtin.find: + paths: '{{ wireguard_client_config_dir }}' + patterns: '*.conf' + register: wireguard_existing_conf_files + when: wireguard_prune_orphans | bool + +- name: KeyGen | Remove orphan peer config files + ansible.builtin.file: + path: '{{ item.path }}' + state: absent + loop: '{{ wireguard_existing_conf_files.files | default([]) }}' + loop_control: + label: '{{ item.path | basename }}' + when: + - wireguard_prune_orphans | bool + - (item.path | basename | regex_replace('\\.conf$', '')) + not in (wg_keygen_peers | map(attribute='name') | list) + no_log: true diff --git a/deploy/roles/wireguard/tasks/host_portforward.yml b/deploy/roles/wireguard/tasks/host_portforward.yml new file mode 100644 index 0000000..0f1e5df --- /dev/null +++ b/deploy/roles/wireguard/tasks/host_portforward.yml @@ -0,0 +1,57 @@ +--- +# Forwards UDP {{ wireguard_port }} from a host-owned IP into the hub +# container via `lxc network forward`. +# +# wireguard_endpoint_listen is the host-side bind IP (LXD forwards bind on +# host-side only). wireguard_endpoint_public is what home peers dial; it's +# resolved in hub.yml. On cloud VMs with 1:1 NAT (e.g. AWS EIP) the listen +# IP is the private primary and public IP must be set explicitly. + +- name: WireGuard | Resolve listen address for `lxc network forward` + ansible.builtin.set_fact: + wireguard_endpoint_listen_resolved: >- + {{ wireguard_endpoint_listen + if (wireguard_endpoint_listen | length > 0) + else ansible_default_ipv4.address }} + +- name: WireGuard | Resolve public endpoint advertised to home peers + ansible.builtin.set_fact: + wireguard_endpoint_public_resolved: >- + {{ wireguard_endpoint_public + if (wireguard_endpoint_public | length > 0) + else wireguard_endpoint_listen_resolved }} + +- name: WireGuard | List existing LXD network forwards + vars: + ansible_connection: local + ansible.builtin.command: + cmd: lxc network forward list {{ lxd_bridge_interface }} --format json + register: wireguard_lxd_forwards + changed_when: false + +- name: WireGuard | Ensure LXD network forward exists for listen address + vars: + ansible_connection: local + _listen_addrs: >- + {{ wireguard_lxd_forwards.stdout | from_json | map(attribute='listen_address') | list }} + ansible.builtin.command: + cmd: >- + lxc network forward create {{ lxd_bridge_interface }} + {{ wireguard_endpoint_listen_resolved }} + changed_when: true + when: wireguard_endpoint_listen_resolved not in _listen_addrs + +- name: WireGuard | Add UDP forward to hub container + vars: + ansible_connection: local + ansible.builtin.command: + cmd: >- + lxc network forward port add {{ lxd_bridge_interface }} + {{ wireguard_endpoint_listen_resolved }} udp {{ wireguard_port }} + {{ wireguard_hub_lxd_ip }} {{ wireguard_port }} + register: wireguard_port_fwd + changed_when: wireguard_port_fwd.rc == 0 + failed_when: + - wireguard_port_fwd.rc != 0 + - "'already exists' not in wireguard_port_fwd.stderr" + - "'Duplicate' not in wireguard_port_fwd.stderr" diff --git a/deploy/roles/wireguard/tasks/hub.yml b/deploy/roles/wireguard/tasks/hub.yml new file mode 100644 index 0000000..ec45a07 --- /dev/null +++ b/deploy/roles/wireguard/tasks/hub.yml @@ -0,0 +1,210 @@ +--- +- name: Hub | Wait for LXD connection to be usable + ansible.builtin.wait_for_connection: + timeout: 60 + sleep: 3 + +- name: Hub | Install WireGuard packages + ansible.builtin.apt: + name: + - wireguard + - wireguard-tools + state: present + update_cache: true + cache_valid_time: 3600 + +- name: Hub | Ensure /etc/wireguard exists with secure permissions + ansible.builtin.file: + path: /etc/wireguard + state: directory + owner: root + group: root + mode: '0700' + +- name: Hub | Ensure clients config directory exists + ansible.builtin.file: + path: '{{ wireguard_client_config_dir }}' + state: directory + owner: root + group: root + mode: '0700' + +- name: Hub | Check if hub private key exists + ansible.builtin.stat: + path: /etc/wireguard/server_private.key + register: wireguard_private_key_stat + +- name: Hub | Generate hub keypair + ansible.builtin.shell: | + set -o pipefail + umask 077 + wg genkey | tee /etc/wireguard/server_private.key | wg pubkey > /etc/wireguard/server_public.key + args: + executable: /bin/bash + when: not wireguard_private_key_stat.stat.exists + changed_when: true + +- name: Hub | Set restrictive permissions on hub key files + ansible.builtin.file: + path: '/etc/wireguard/{{ item }}' + owner: root + group: root + mode: '0600' + loop: + - server_private.key + - server_public.key + +- name: Hub | Read hub private key + ansible.builtin.slurp: + src: /etc/wireguard/server_private.key + register: wireguard_server_private_key_data + no_log: true + +- name: Hub | Read hub public key + ansible.builtin.slurp: + src: /etc/wireguard/server_public.key + register: wireguard_server_public_key_data + +- name: Hub | Display hub public key + ansible.builtin.debug: + msg: >- + WireGuard Hub Public Key: + {{ wireguard_server_public_key_data.content | b64decode | trim }} + +- name: Hub | Enable IPv4 forwarding (spoke to spoke routing) + ansible.posix.sysctl: + name: net.ipv4.ip_forward + value: '1' + state: present + reload: true + sysctl_set: true + +- name: Hub | Allow wg0 to wg0 FORWARD (mesh relay) + ansible.builtin.iptables: + chain: FORWARD + in_interface: '{{ wireguard_interface }}' + out_interface: '{{ wireguard_interface }}' + jump: ACCEPT + state: present + +# Normalize app-container and human peers into one shape so downstream +# templates and key generation can iterate uniformly. +- name: Hub | Build app-container peer entries from inventory + ansible.builtin.set_fact: + _wireguard_app_peers_normalized: >- + {{ _wireguard_app_peers_normalized | default([]) + [ + { + 'name': item, + 'allowed_ips': hostvars[item].wireguard_ip ~ '/32', + 'wireguard_ip': hostvars[item].wireguard_ip, + 'endpoint_kind': 'internal' + } + ] }} + loop: "{{ groups['all'] | difference(['127.0.0.1', wireguard_hub_inventory_hostname]) }}" + loop_control: + label: "{{ item }}" + when: hostvars[item].wireguard_ip is defined + +- name: Hub | Normalize human peers from wireguard_peers + ansible.builtin.set_fact: + _wireguard_human_peers_normalized: >- + {{ _wireguard_human_peers_normalized | default([]) + [ + item | combine({ + 'wireguard_ip': item.allowed_ips.split(',')[0] | trim | regex_replace('/[0-9]+$', ''), + 'endpoint_kind': 'external' + }) + ] }} + loop: "{{ wireguard_peers }}" + loop_control: + label: "{{ item.name }}" + +- name: Hub | Combine app + human peers into unified list + ansible.builtin.set_fact: + _wireguard_unified_peers: >- + {{ (_wireguard_app_peers_normalized | default([])) + + (_wireguard_human_peers_normalized | default([])) }} + +- name: Hub | Resolve public endpoint for home/admin peers + ansible.builtin.set_fact: + wireguard_endpoint_public_resolved: >- + {{ wireguard_endpoint_public + if (wireguard_endpoint_public | length > 0) + else hostvars['127.0.0.1']['wireguard_endpoint_public_resolved'] + | default(hostvars['127.0.0.1']['wireguard_endpoint_listen_resolved'] + | default(hostvars['127.0.0.1']['ansible_default_ipv4']['address'] + | default('127.0.0.1'))) }} + +- name: Hub | Generate keys for all peers (auto-generate mode) + ansible.builtin.include_tasks: generate_client_keys.yml + vars: + wg_keygen_peers: "{{ _wireguard_unified_peers }}" + when: + - wireguard_auto_generate_keys | bool + - _wireguard_unified_peers | length > 0 + +- name: Hub | Use unified peer list as resolved (no auto-generate) + ansible.builtin.set_fact: + wireguard_peers_resolved: '{{ _wireguard_unified_peers }}' + when: not (wireguard_auto_generate_keys | bool) or _wireguard_unified_peers | length == 0 + +# Two render paths so live VPN sessions survive peer changes: first run +# starts fresh from the file; subsequent runs notify `wg syncconf` instead +# of restarting the unit. +- name: Hub | Render wg0.conf + ansible.builtin.template: + src: wg0.conf.j2 + dest: '/etc/wireguard/{{ wireguard_interface }}.conf' + owner: root + group: root + mode: '0600' + register: wireguard_hub_conf + notify: Sync WireGuard peers + when: wireguard_private_key_stat.stat.exists + +- name: Hub | Render wg0.conf (first run) + ansible.builtin.template: + src: wg0.conf.j2 + dest: '/etc/wireguard/{{ wireguard_interface }}.conf' + owner: root + group: root + mode: '0600' + when: not wireguard_private_key_stat.stat.exists + +- name: Hub | Enable and start WireGuard service + ansible.builtin.systemd: + name: 'wg-quick@{{ wireguard_interface }}' + state: started + enabled: true + daemon_reload: true + +- name: Hub | Warn when no peers are configured + ansible.builtin.debug: + msg: >- + WARNING: no WireGuard peers found (no app containers with wireguard_ip + and no entries in wireguard_peers). The hub will start but nothing can + connect. + when: _wireguard_unified_peers | length == 0 + +- name: Hub | Render per-peer client configs + ansible.builtin.template: + src: client.conf.j2 + dest: '{{ wireguard_client_config_dir }}/{{ item.name }}.conf' + owner: root + group: root + mode: '0600' + no_log: true + loop: '{{ wireguard_peers_resolved }}' + loop_control: + label: '{{ item.name }} ({{ item.endpoint_kind }})' + when: item._private_key is defined and item._private_key | length > 0 + +- name: Hub | Display peer config retrieval instructions + ansible.builtin.debug: + msg: | + {{ wireguard_peers_resolved | length }} peer config(s) generated on the hub. + + App-container configs are pulled automatically by the peer stage. + + Human peer configs ({{ _wireguard_human_peers_normalized | default([]) | length }}): + sudo lxc exec {{ wireguard_hub_inventory_hostname }} -- cat {{ wireguard_client_config_dir }}/.conf + when: wireguard_peers_resolved | default([]) | length > 0 diff --git a/deploy/roles/wireguard/tasks/lockdown_instances.yml b/deploy/roles/wireguard/tasks/lockdown_instances.yml new file mode 100644 index 0000000..d526d6e --- /dev/null +++ b/deploy/roles/wireguard/tasks/lockdown_instances.yml @@ -0,0 +1,29 @@ +--- +- name: Check if munin-node is installed + ansible.builtin.stat: + path: /etc/munin/munin-node.conf + register: wireguard_munin_node_stat + +- name: Lockdown | Allow Glowroot (4000/tcp) from VPN subnet + community.general.ufw: + rule: allow + port: '4000' + src: '{{ wireguard_network }}' + proto: tcp + state: enabled + comment: 'Glowroot APM - VPN access only' + when: + - app_monitoring is defined + - app_monitoring | trim == 'glowroot' + +- name: Lockdown | Allow munin-node (4949/tcp) from monitor container only + community.general.ufw: + rule: allow + port: '4949' + src: "{{ hostvars[groups['monitoring'][0]]['ansible_host'] }}" + proto: tcp + state: enabled + comment: 'munin-node - monitor container access only' + when: + - wireguard_munin_node_stat.stat.exists + - groups.get('monitoring', []) | length > 0 diff --git a/deploy/roles/wireguard/tasks/lockdown_monitor.yml b/deploy/roles/wireguard/tasks/lockdown_monitor.yml new file mode 100644 index 0000000..e306012 --- /dev/null +++ b/deploy/roles/wireguard/tasks/lockdown_monitor.yml @@ -0,0 +1,105 @@ +--- +- name: Check if Grafana is installed + ansible.builtin.stat: + path: /etc/grafana/grafana.ini + register: wireguard_grafana_ini_stat + +- name: Reset Grafana root_url for direct access + community.general.ini_file: + path: /etc/grafana/grafana.ini + section: server + option: root_url + value: '%(protocol)s://%(domain)s:%(http_port)s/' + mode: '0640' + owner: root + group: grafana + notify: Restart Grafana + when: wireguard_grafana_ini_stat.stat.exists + +- name: Disable Grafana serve_from_sub_path + community.general.ini_file: + path: /etc/grafana/grafana.ini + section: server + option: serve_from_sub_path + value: 'false' + mode: '0640' + owner: root + group: grafana + notify: Restart Grafana + when: wireguard_grafana_ini_stat.stat.exists + +- name: Ensure Grafana listens on all container interfaces + community.general.ini_file: + path: /etc/grafana/grafana.ini + section: server + option: http_addr + value: '0.0.0.0' + mode: '0640' + owner: root + group: grafana + notify: Restart Grafana + when: wireguard_grafana_ini_stat.stat.exists + +- name: Check if Prometheus is installed + ansible.builtin.stat: + path: /etc/prometheus/prometheus.yml + register: wireguard_prometheus_stat + +- name: Lockdown | Allow Grafana from VPN subnet + community.general.ufw: + rule: allow + port: "{{ grafana_port }}" + src: '{{ wireguard_network }}' + proto: tcp + state: enabled + comment: 'Grafana - VPN access only' + when: wireguard_grafana_ini_stat.stat.exists + +- name: Lockdown | Remove proxy -> Grafana UFW rule (replaced by VPN access) + community.general.ufw: + rule: allow + port: "{{ grafana_port }}" + src: "{{ hostvars[item]['ansible_host'] }}" + proto: tcp + delete: true + loop: "{{ groups.get('web', []) }}" + loop_control: + label: '{{ item }}' + when: wireguard_grafana_ini_stat.stat.exists + +- name: Lockdown | Allow Prometheus (9090/tcp) from VPN subnet + community.general.ufw: + rule: allow + port: '9090' + src: '{{ wireguard_network }}' + proto: tcp + state: enabled + comment: 'Prometheus - VPN access only' + when: wireguard_prometheus_stat.stat.exists + +- name: Lockdown | Check if Munin is installed + ansible.builtin.stat: + path: /etc/munin/munin.conf + register: wireguard_munin_conf_stat + +- name: Lockdown | Allow Munin (80/tcp) from VPN subnet + community.general.ufw: + rule: allow + port: '80' + src: '{{ wireguard_network }}' + proto: tcp + state: enabled + comment: 'Munin - VPN access only' + when: wireguard_munin_conf_stat.stat.exists + +- name: Lockdown | Remove proxy -> Munin UFW rule (replaced by VPN access) + community.general.ufw: + rule: allow + port: '80' + src: "{{ hostvars[item]['ansible_host'] }}" + proto: tcp + delete: true + loop: "{{ groups.get('web', []) }}" + loop_control: + label: '{{ item }}' + when: wireguard_munin_conf_stat.stat.exists diff --git a/deploy/roles/wireguard/tasks/lockdown_postgres.yml b/deploy/roles/wireguard/tasks/lockdown_postgres.yml new file mode 100644 index 0000000..46f9d32 --- /dev/null +++ b/deploy/roles/wireguard/tasks/lockdown_postgres.yml @@ -0,0 +1,78 @@ +--- +# Per-peer pg_hba.conf rules managed via a single blockinfile so that peer +# removal is idempotent: lineinfile cannot remove a previously-added line +# when the entry disappears from inventory. + +# Discover the active cluster directory off disk as root rather than via +# `become_user: postgres` + postgresql_info. On idmapped LXD mounts the +# become-to-unprivileged-user step cannot chown the temp file it transfers +# ("Unprivileged become user would be unable to read the file"), which aborts +# the lockdown run before lockdown_instances ever applies. Reading the cluster +# directory needs no unprivileged become and works on every connection type. +- name: Lockdown | Discover PostgreSQL cluster directories + ansible.builtin.find: + paths: /etc/postgresql + file_type: directory + depth: 1 + patterns: '[0-9]*' + use_regex: false + register: wireguard_pg_clusters + +- name: Lockdown | Resolve pg_hba.conf path (highest installed major) + ansible.builtin.set_fact: + wireguard_pg_hba_path: >- + /etc/postgresql/{{ + wireguard_pg_clusters.files + | map(attribute='path') + | map('basename') + | map('int') + | max + }}/main/pg_hba.conf + when: wireguard_pg_clusters.files | length > 0 + +- name: Lockdown | Verify pg_hba.conf exists + ansible.builtin.stat: + path: "{{ wireguard_pg_hba_path | default('') }}" + register: wireguard_pg_hba_stat + when: wireguard_pg_hba_path is defined + +# database/user are derived from item.1.instance (the DHIS2 instance's LXD +# container name, which equals its db name, role and owner, see +# roles/create-instance/tasks/postgresql-db.yml). An explicit database/user +# on the entry overrides the derived value (e.g. { database: all, user: all }). +- name: Lockdown | Build per-peer pg_hba lines + ansible.builtin.set_fact: + _wireguard_pg_hba_lines: >- + {{ _wireguard_pg_hba_lines | default([]) + [ + 'hostssl ' ~ (item.1.database | default(item.1.instance)) + ~ ' ' ~ (item.1.user | default(item.1.instance)) + ~ ' ' ~ (item.0.peer_ip | default(item.0.allowed_ips.split(',')[0] | trim)) + ~ ' scram-sha-256' + ] }} + loop: "{{ wireguard_peers | subelements('pg_access', skip_missing=True) }}" + loop_control: + label: >- + {{ item.0.name }} -> + {{ item.1.database | default(item.1.instance) }}/{{ item.1.user | default(item.1.instance) }} + +- name: Lockdown | Manage per-peer pg_hba block + ansible.builtin.blockinfile: + path: "{{ wireguard_pg_hba_path }}" + marker: "# {mark} ANSIBLE MANAGED — wireguard per-peer pg_access" + block: "{{ _wireguard_pg_hba_lines | default([]) | join('\n') }}" + state: "{{ 'present' if (_wireguard_pg_hba_lines | default([]) | length > 0) else 'absent' }}" + insertbefore: EOF + create: false + notify: Reload PostgreSQL + when: + - wireguard_pg_hba_path is defined + - wireguard_pg_hba_stat.stat.exists | default(false) + +- name: Lockdown | Allow PostgreSQL (5432/tcp) from VPN subnet + community.general.ufw: + rule: allow + port: '5432' + src: '{{ wireguard_network }}' + proto: tcp + state: enabled + comment: 'PostgreSQL - VPN access only' diff --git a/deploy/roles/wireguard/tasks/lockdown_proxy.yml b/deploy/roles/wireguard/tasks/lockdown_proxy.yml new file mode 100644 index 0000000..ba80187 --- /dev/null +++ b/deploy/roles/wireguard/tasks/lockdown_proxy.yml @@ -0,0 +1,82 @@ +--- +- name: Find monitoring nginx upstream configs + ansible.builtin.find: + paths: /etc/nginx/conf.d/upstream + patterns: + - 'munin*.conf' + - 'grafana*.conf' + register: wireguard_nginx_monitoring_configs + when: proxy | default('nginx') == 'nginx' + +- name: Empty monitoring configs on nginx proxy (keep files so include directives still resolve) + ansible.builtin.copy: + content: "# Monitoring disabled by WireGuard lockdown\n" + dest: '{{ item.path }}' + mode: '0644' + loop: '{{ wireguard_nginx_monitoring_configs.files | default([]) }}' + loop_control: + label: '{{ item.path | basename }}' + notify: Reload Nginx + when: + - proxy | default('nginx') == 'nginx' + - wireguard_nginx_monitoring_configs.files | default([]) | length > 0 + +# `role_path` is the path to the currently-executing role (wireguard). +# Using it (rather than `playbook_dir`) keeps the cross-role template +# reference correct whether this file is loaded from dhis2.yml or from +# playbooks/wireguard-lockdown.yml. +- name: Re-render nginx instance configs without Glowroot proxy blocks + ansible.builtin.template: + src: '{{ role_path }}/../create-instance/templates/nginx/instance.j2' + dest: '/etc/nginx/conf.d/upstream/{{ item | to_fixed_string }}.conf' + owner: root + group: root + mode: '0640' + loop: '{{ groups["instances"] }}' + vars: + # Forces instance.j2 to drop the /glowroot location block. The role default + # is false (so a normal dhis2.yml proxy render keeps glowroot); the lockdown + # phase explicitly opts in here. + wireguard_lockdown_monitoring: true + notify: Reload Nginx + when: + - proxy | default('nginx') == 'nginx' + - hostvars[item]['instance_state'] is undefined + +- name: Find monitoring apache2 site configs + ansible.builtin.find: + paths: /etc/apache2/sites-enabled + patterns: + - 'munin*.conf' + - 'grafana*.conf' + register: wireguard_apache_monitoring_configs + when: proxy | default('nginx') == 'apache2' + +- name: Remove monitoring configs from apache2 proxy + ansible.builtin.file: + path: '{{ item.path }}' + state: absent + loop: '{{ wireguard_apache_monitoring_configs.files | default([]) }}' + loop_control: + label: '{{ item.path | basename }}' + notify: Reload Apache2 + when: + - proxy | default('nginx') == 'apache2' + - wireguard_apache_monitoring_configs.files | default([]) | length > 0 + +- name: Re-render apache2 instance configs without Glowroot proxy blocks + ansible.builtin.template: + src: '{{ role_path }}/../create-instance/templates/apache2/instance.j2' + dest: '/etc/apache2/upstream/{{ item | to_fixed_string }}.conf' + owner: root + group: root + mode: '0640' + loop: '{{ groups["instances"] }}' + vars: + # Forces instance.j2 to drop the /glowroot Location block. See the nginx + # task above for why this is task-scoped rather than a play default. + wireguard_lockdown_monitoring: true + notify: Reload Apache2 + when: + - proxy | default('nginx') == 'apache2' + - hostvars[item]['instance_state'] is undefined diff --git a/deploy/roles/wireguard/tasks/lxd_container.yml b/deploy/roles/wireguard/tasks/lxd_container.yml new file mode 100644 index 0000000..8919f04 --- /dev/null +++ b/deploy/roles/wireguard/tasks/lxd_container.yml @@ -0,0 +1,66 @@ +--- +# user.type="wireguard" is required so custom_lxd dynamic inventory can +# discover the container. + +- name: WireGuard | Create wireguard hub container + vars: + ansible_connection: local + community.general.lxd_container: + config: + boot.autostart.priority: "5" + user.type: "wireguard" + name: "{{ wireguard_hub_inventory_hostname }}" + state: started + profiles: [default] + ignore_volatile_options: false + wait_for_container: true + wait_for_ipv4_addresses: true + timeout: 60 + source: + type: image + mode: pull + server: "{{ lxd_source_server | default('https://cloud-images.ubuntu.com/releases') }}" + protocol: "{{ lxd_source_protocol | default('simplestreams') }}" + alias: "{{ guest_os }}/{{ guest_os_arch | default('amd64') }}" + devices: + eth0: + nictype: bridged + parent: "{{ lxd_bridge_interface | default('lxdbr1') }}" + type: nic + ipv4.address: "{{ wireguard_hub_lxd_ip }}" + register: wireguard_create_status + +- name: WireGuard | Ensure hub container has its static IP # noqa: no-handler + vars: + ansible_connection: local + community.general.lxd_container: + name: "{{ wireguard_hub_inventory_hostname }}" + state: restarted + wait_for_ipv4_addresses: true + when: wireguard_create_status.changed + register: wireguard_restart_status + +- name: WireGuard | Wait for systemd inside hub container + ansible.builtin.command: + cmd: >- + lxc exec {{ wireguard_hub_inventory_hostname }} -- + test -d /run/systemd/system + changed_when: false + retries: 30 + delay: 2 + register: wireguard_systemd_check + until: wireguard_systemd_check.rc == 0 + when: wireguard_create_status.changed or (wireguard_restart_status.changed | default(false)) + +- name: WireGuard | Wait for cloud-init to finish inside hub container + ansible.builtin.command: + cmd: >- + lxc exec {{ wireguard_hub_inventory_hostname }} -- + cloud-init status --wait + changed_when: false + failed_when: false + retries: 3 + delay: 5 + register: wireguard_cloud_init_check + until: wireguard_cloud_init_check.rc in [0, 2] + when: wireguard_create_status.changed or (wireguard_restart_status.changed | default(false)) diff --git a/deploy/roles/wireguard/tasks/main.yml b/deploy/roles/wireguard/tasks/main.yml new file mode 100644 index 0000000..b428960 --- /dev/null +++ b/deploy/roles/wireguard/tasks/main.yml @@ -0,0 +1,51 @@ +--- +# The role is normally driven from playbooks/wireguard.yml and +# playbooks/wireguard-lockdown.yml via include_role tasks_from:, so this +# dispatcher is only used when the role is invoked the legacy way +# (roles: [{ role: wireguard, vars: { wireguard_stage: ... } }]). +# +# Stages: +# host_portforward - provision hub LXD container + lxc network forward +# (LXD deployments only). +# hub - install/configure WireGuard server inside the hub. +# peer - install/configure WireGuard on each app host. +# +# Lockdown stages (lockdown_proxy, lockdown_monitor, lockdown_postgres, +# lockdown_instances) are NOT exposed here; run +# playbooks/wireguard-lockdown.yml explicitly after the mesh is verified. + +- name: WireGuard | Validate configuration + ansible.builtin.include_tasks: validate.yml + when: + - wireguard_enabled | bool + - wireguard_stage == 'host_portforward' + tags: + - always + +- name: WireGuard | Provision hub LXD container (LXD only) + ansible.builtin.include_tasks: lxd_container.yml + when: + - wireguard_enabled | bool + - wireguard_stage == 'host_portforward' + - ansible_connection | default('lxd') == 'lxd' + +- name: WireGuard | LXD host port-forward (LXD only) + ansible.builtin.include_tasks: host_portforward.yml + when: + - wireguard_enabled | bool + - wireguard_stage == 'host_portforward' + - ansible_connection | default('lxd') == 'lxd' + +- name: WireGuard | Hub container setup + ansible.builtin.include_tasks: hub.yml + when: + - wireguard_enabled | bool + - wireguard_stage == 'hub' + +- name: WireGuard | Peer setup + ansible.builtin.include_tasks: peer.yml + when: + - wireguard_enabled | bool + - wireguard_stage == 'peer' + - hostvars[inventory_hostname].wireguard_ip is defined + - hostvars[inventory_hostname].wireguard_ip | length > 0 diff --git a/deploy/roles/wireguard/tasks/peer.yml b/deploy/roles/wireguard/tasks/peer.yml new file mode 100644 index 0000000..254bfe5 --- /dev/null +++ b/deploy/roles/wireguard/tasks/peer.yml @@ -0,0 +1,50 @@ +--- +# Runs INSIDE each app container (proxy/postgres/dhis/monitor). +# Pulls the pre-rendered wg0 config from the hub and starts the WG service. + +- name: Peer | Install WireGuard packages + ansible.builtin.apt: + name: + - wireguard + - wireguard-tools + state: present + update_cache: true + cache_valid_time: 3600 + +- name: Peer | Ensure /etc/wireguard exists with secure permissions + ansible.builtin.file: + path: /etc/wireguard + state: directory + owner: root + group: root + mode: '0700' + +# `ansible_lxd_host` is set in inventory to `{{ inventory_hostname }}`, but +# under delegate_to that template still resolves with the ORIGINAL host's +# inventory_hostname (peer name) rather than the delegate target. Override +# explicitly so the LXD connection plugin execs into the hub container. +- name: Peer | Slurp pre-rendered config from hub + ansible.builtin.slurp: + src: "{{ wireguard_client_config_dir }}/{{ inventory_hostname }}.conf" + delegate_to: "{{ wireguard_hub_inventory_hostname }}" + vars: + ansible_lxd_host: "{{ wireguard_hub_inventory_hostname }}" + register: _wireguard_peer_conf + no_log: true + +- name: Peer | Write wg0.conf + ansible.builtin.copy: + content: "{{ _wireguard_peer_conf.content | b64decode }}" + dest: "/etc/wireguard/{{ wireguard_interface }}.conf" + owner: root + group: root + mode: '0600' + no_log: true + notify: Restart WireGuard + +- name: Peer | Enable and start WireGuard service + ansible.builtin.systemd: + name: "wg-quick@{{ wireguard_interface }}" + state: started + enabled: true + daemon_reload: true diff --git a/deploy/roles/wireguard/tasks/validate.yml b/deploy/roles/wireguard/tasks/validate.yml new file mode 100644 index 0000000..61f82ba --- /dev/null +++ b/deploy/roles/wireguard/tasks/validate.yml @@ -0,0 +1,192 @@ +--- +# Pre-flight validation. Runs early to fail fast before template rendering +# produces cryptic WireGuard errors. + +- name: WireGuard | Assert each peer has required fields + ansible.builtin.assert: + that: + - item.name is defined and item.name | length > 0 + - item.allowed_ips is defined and item.allowed_ips | length > 0 + - item.public_key is defined or (wireguard_auto_generate_keys | bool) + fail_msg: >- + wireguard_peers[{{ idx }}] is missing required fields. + With auto-generate: name and allowed_ips required. + Without auto-generate: name, public_key, and allowed_ips required. + quiet: true + loop: "{{ wireguard_peers }}" + loop_control: + index_var: idx + label: "{{ item.name | default('UNNAMED-peer-' + idx | string) }}" + +- name: WireGuard | Assert peer names are filesystem-safe + ansible.builtin.assert: + that: + - item.name is match('^[a-zA-Z0-9._-]+$') + fail_msg: >- + wireguard_peers[{{ idx }}].name = '{{ item.name }}' contains + invalid characters. Allowed: letters, digits, dot, underscore, + hyphen. Required to prevent path traversal in client key/config + file paths. + quiet: true + loop: "{{ wireguard_peers }}" + loop_control: + index_var: idx + label: "{{ item.name | default('UNNAMED-peer-' + idx | string) }}" + +- name: WireGuard | Assert no duplicate peer names + ansible.builtin.assert: + that: + - wireguard_peers | map(attribute='name') | list | unique | length + == wireguard_peers | length + fail_msg: >- + Duplicate name detected in wireguard_peers. Names are used as + file paths for keys and configs - duplicates cause silent + overwrite. Values: {{ wireguard_peers | map(attribute='name') | list }} + quiet: true + when: wireguard_peers | length > 1 + +- name: WireGuard | Assert no duplicate allowed_ips across peers + ansible.builtin.assert: + that: + - wireguard_peers | map(attribute='allowed_ips') | list | unique | length == wireguard_peers | length + fail_msg: >- + Duplicate allowed_ips detected in wireguard_peers. + Each peer must have a unique allowed_ips value to avoid silent routing conflicts. + Values: {{ wireguard_peers | map(attribute='allowed_ips') | list }} + quiet: true + when: wireguard_peers | length > 1 + +# A pg_access entry sources its database/user EITHER from `instance` (the +# DHIS2 instance whose container name equals its db name and role - preferred, +# avoids hardcoding) OR from explicit `database`/`user` fields (for wildcards +# like { database: all, user: all }). An explicit field overrides the derived +# value when both are present. +- name: WireGuard | Assert pg_access entries reference an instance or set database/user + ansible.builtin.assert: + that: + - (item.1.instance is defined) or (item.1.database is defined and item.1.user is defined) + fail_msg: >- + wireguard_peers[{{ item.0.name }}].pg_access entry must set either + 'instance' (a host in [instances]) or both 'database' and 'user'. + quiet: true + loop: "{{ wireguard_peers | subelements('pg_access', skip_missing=True) }}" + loop_control: + label: "{{ item.0.name }}" + +- name: WireGuard | Assert pg_access instance references exist in [instances] + ansible.builtin.assert: + that: + - item.1.instance in groups.get('instances', []) + fail_msg: >- + wireguard_peers[{{ item.0.name }}].pg_access references + instance='{{ item.1.instance }}' which is not a host in the [instances] + inventory group. Available: {{ groups.get('instances', []) }}. + quiet: true + loop: "{{ wireguard_peers | subelements('pg_access', skip_missing=True) }}" + loop_control: + label: "{{ item.0.name }} -> {{ item.1.instance | default('(explicit)') }}" + when: item.1.instance is defined + +- name: WireGuard | Assert pg_access database/user names are PostgreSQL-safe identifiers + ansible.builtin.assert: + that: + - (item.1.database | default(item.1.instance)) is match('^[a-zA-Z0-9_]+$') + - (item.1.user | default(item.1.instance)) is match('^[a-zA-Z0-9_]+$') + fail_msg: >- + wireguard_peers[{{ item.0.name }}].pg_access entry resolves to + database='{{ item.1.database | default(item.1.instance) }}' + user='{{ item.1.user | default(item.1.instance) }}' which contains + invalid characters. Allowed: letters, digits, underscore. Required + because these values are interpolated into pg_hba.conf rule regex + matching - metacharacters would corrupt idempotency. + quiet: true + loop: "{{ wireguard_peers | subelements('pg_access', skip_missing=True) }}" + loop_control: + label: >- + {{ item.0.name }} -> + {{ item.1.database | default(item.1.instance) }}/{{ item.1.user | default(item.1.instance) }} + +- name: WireGuard | Assert peer_ip (or first allowed_ips CIDR) is /32 for peers with pg_access + ansible.builtin.assert: + that: + - >- + ((item.peer_ip | default(item.allowed_ips.split(',')[0] | trim)) + | regex_search('/32$')) is not none + fail_msg: >- + wireguard_peers[{{ item.name }}] uses pg_access but its peer_ip + (or first allowed_ips CIDR) does not end in /32. Per-peer pg_hba/UFW + rules treat the CIDR as a single host - non-/32 entries would silently + open access wider than intended. Set peer_ip explicitly or ensure the + first allowed_ips CIDR is /32. + Got: peer_ip={{ item.peer_ip | default('(unset)') }} + allowed_ips={{ item.allowed_ips }} + quiet: true + loop: "{{ wireguard_peers }}" + loop_control: + label: "{{ item.name }}" + when: item.pg_access is defined and item.pg_access | length > 0 + +- name: WireGuard | Assert hub container is in inventory + ansible.builtin.assert: + that: + - wireguard_hub_inventory_hostname in groups['all'] + fail_msg: >- + wireguard_hub_inventory_hostname='{{ wireguard_hub_inventory_hostname }}' + is not in inventory. Add a [wireguard] group with the hub host + (e.g. 'wireguard ansible_host=172.19.2.200 wireguard_ip=10.0.0.1'). + quiet: true + run_once: true + +- name: WireGuard | Assert every app host (web/databases/instances/monitoring) has wireguard_ip + ansible.builtin.assert: + that: + - hostvars[item].wireguard_ip is defined + - hostvars[item].wireguard_ip | length > 0 + fail_msg: >- + Host '{{ item }}' is in an app group (web/databases/instances/monitoring) + but has no wireguard_ip set. Add 'wireguard_ip=<10.0.0.x>' to the + inventory entry, or set wireguard_enabled=false to skip WireGuard. + quiet: true + loop: >- + {{ (groups.get('web', []) + groups.get('databases', []) + + groups.get('instances', []) + groups.get('monitoring', [])) + | unique | list }} + loop_control: + label: "{{ item }}" + run_once: true + +# Two regex passes (split CIDR list, then strip mask) for portability across +# Ansible/Jinja versions. For human peers with comma-separated allowed_ips, +# this picks the peer's own VPN IP, not a downstream subnet. +- name: WireGuard | Build canonical peer IP list for uniqueness check + ansible.builtin.set_fact: + _wireguard_human_peer_ips: >- + {{ wireguard_peers + | map(attribute='allowed_ips') + | map('regex_replace', '^\\s*([^,\\s]+).*$', '\\1') + | map('regex_replace', '/[0-9]+$', '') + | list }} + run_once: true + +- name: WireGuard | Collect all wireguard IPs (containers + human peers) for uniqueness check + ansible.builtin.set_fact: + _wireguard_all_ips: >- + {{ (groups['all'] + | difference(['127.0.0.1']) + | map('extract', hostvars, 'wireguard_ip') + | select('defined') + | list) + + _wireguard_human_peer_ips }} + run_once: true + +- name: WireGuard | Assert no duplicate wireguard IPs across containers and human peers + ansible.builtin.assert: + that: + - _wireguard_all_ips | unique | length == _wireguard_all_ips | length + fail_msg: >- + Duplicate WireGuard IP detected across inventory wireguard_ip and + wireguard_peers.allowed_ips. Each address in {{ wireguard_network }} + must be assigned exactly once. + Values: {{ _wireguard_all_ips }} + quiet: true + run_once: true diff --git a/deploy/roles/wireguard/templates/client.conf.j2 b/deploy/roles/wireguard/templates/client.conf.j2 new file mode 100644 index 0000000..7ebb112 --- /dev/null +++ b/deploy/roles/wireguard/templates/client.conf.j2 @@ -0,0 +1,22 @@ +# WireGuard peer config: {{ item.name }} ({{ item.endpoint_kind }}) +# Generated by Ansible. Do not commit to version control. + +[Interface] +Address = {{ item.wireguard_ip }}/32 +PrivateKey = {{ item._private_key }} +{% if item.endpoint_kind == 'external' %} +# DNS = 1.1.1.1, 8.8.8.8 +{% endif %} + +[Peer] +PublicKey = {{ wireguard_server_public_key_data.content | b64decode | trim }} +{% if item._preshared_key is defined and item._preshared_key %} +PresharedKey = {{ item._preshared_key }} +{% endif %} +{% if item.endpoint_kind == 'internal' %} +Endpoint = {{ wireguard_hub_lxd_ip }}:{{ wireguard_port }} +{% else %} +Endpoint = {{ wireguard_endpoint_public_resolved }}:{{ wireguard_port }} +{% endif %} +AllowedIPs = {{ wireguard_network }} +PersistentKeepalive = 25 diff --git a/deploy/roles/wireguard/templates/wg0.conf.j2 b/deploy/roles/wireguard/templates/wg0.conf.j2 new file mode 100644 index 0000000..cc69ea9 --- /dev/null +++ b/deploy/roles/wireguard/templates/wg0.conf.j2 @@ -0,0 +1,21 @@ +{{ ansible_managed | comment }} +# WireGuard hub config - runs inside the {{ wireguard_hub_inventory_hostname }} container. +# Spoke-to-spoke routing happens via kernel forwarding on this hub +# (net.ipv4.ip_forward=1 + iptables FORWARD wg0-to-wg0 ACCEPT). + +[Interface] +Address = {{ wireguard_server_ip }}/24 +ListenPort = {{ wireguard_port }} +PrivateKey = {{ wireguard_server_private_key_data.content | b64decode | trim }} +SaveConfig = false + +{% for peer in wireguard_peers_resolved %} +[Peer] +# {{ peer.name }} ({{ peer.endpoint_kind }}) +PublicKey = {{ peer.public_key }} +AllowedIPs = {{ peer.allowed_ips }} +{% if peer._preshared_key is defined and peer._preshared_key %} +PresharedKey = {{ peer._preshared_key }} +{% endif %} + +{% endfor %} diff --git a/docs/WireGuard-VPN.md b/docs/WireGuard-VPN.md new file mode 100644 index 0000000..faa2abd --- /dev/null +++ b/docs/WireGuard-VPN.md @@ -0,0 +1,488 @@ +# WireGuard VPN for DHIS2 Server Tools + +WireGuard provides a secure VPN tunnel for administering DHIS2 infrastructure. The hub runs in its own LXD container; every app container (proxy, postgres, dhis, monitor) joins as a WG peer with its own `wg0` interface. Home/admin peers (sysadmin laptops) connect to the hub via the LXD host's public IP over UDP `51820`. Public DHIS2 web access is unaffected. + +WireGuard is set up by `dhis2.yml` in a single deploy, in two stages run back-to-back: + +1. **Mesh bring-up** - `playbooks/wireguard.yml`. Creates the hub container (LXD only), installs WireGuard everywhere, and connects every peer. +2. **Service lockdown** - `playbooks/wireguard-lockdown.yml`. Restricts Grafana, Prometheus, Munin, Glowroot and PostgreSQL to the VPN subnet only. + +Both are imported by `dhis2.yml` and gated on `wireguard_enabled`, so a single `sudo ./deploy.sh` (or `ansible-playbook dhis2.yml`) takes you all the way to a hardened deployment. SSH on port 22 and the DHIS2 web app on 80/443 are deliberately left public. + +If you need the mesh without the firewall (e.g. during cut-over, while not all admins are on the VPN yet), skip lockdown with `--skip-tags wireguard-lockdown` - see [Skipping or reverting the lockdown](#skipping-or-reverting-the-lockdown). + +## Architecture + +``` + Internet + │ + ┌──────┴──────┐ + │ LXD Host │ 104.105.9.136 + │ lxc fwd │ UDP 51820 → 172.19.2.200:51820 + └──────┬──────┘ + │ + ┌────────────┼────────────┐ + │ lxdbr1 (172.19.2.0/24) + │ │ + ┌──────┴───────┐ │ + │ wireguard │ │ LXD bridge (in-band, encrypted by WG) + │ 172.19.2.200 │◄───┼─── wg over UDP between hub and peers + │ wg 10.0.0.1 │ │ + └──────────────┘ │ + │ + ┌──────┐ ┌──────────┐ ┌──────┐ ┌──────────┐ + │proxy │ │ postgres │ │ dhis │ │ monitor │ + │.2/.2 │ │ .20/.3 │ │.11/.4│ │ .30/.5 │ + └──────┘ └──────────┘ └──────┘ └──────────┘ + ▲ + │ wg + ┌────────────┐ │ + │ Home/admin │ wg 10.0.0.6 ──────────┘ (over public internet, + └────────────┘ host fwd's UDP 51820) +``` + +(Numbers under each box: `/`.) + +**Topology**: hub-and-spoke. Each spoke (app container, home machine) has a single `[Peer]` pointing at the hub with `AllowedIPs = 10.0.0.0/24` and `PersistentKeepalive = 25`. Spoke-to-spoke traffic relays through the hub (which has `net.ipv4.ip_forward=1` and `iptables -A FORWARD -i wg0 -o wg0 -j ACCEPT`). + +**Endpoint resolution**: +- App container peers: `Endpoint = 172.19.2.200:51820` - resolved internally over `lxdbr1`. +- Home peers: `Endpoint = :51820` - resolved over the internet, NAT'd by the LXD host through `lxc network forward`. + +Two separate vars control this: + +| Var | Used for | Default | +|---|---|---| +| `wireguard_endpoint_listen` | `lxc network forward` listen address on the host | auto-detect (`ansible_default_ipv4.address`) | +| `wireguard_endpoint_public` | `Endpoint =` line written into home-peer `.conf` files | falls back to `wireguard_endpoint_listen` | + +On a host with a single primary public IP, leaving both empty works. On cloud VMs with 1:1 NAT (AWS EIP, GCP external IP, Azure public IP), the host's primary interface holds a *private* IP - auto-detect picks the private IP. The forward must still bind on that private IP (it is the only IP the host owns), but home peers must dial the public IP. In that case set: + +```ini +# in inventory/hosts [all:vars] +wireguard_endpoint_public=203.0.113.42 # public IP or DNS name +# wireguard_endpoint_listen left empty - auto-detect picks the private primary +``` + +**App-to-app traffic** (e.g. dhis to postgres) continues to use the LXD bridge (`172.19.2.x`); only admin/external traffic is routed through WG. UFW lockdown rules continue to allow `src=10.0.0.0/24`; packets arrive on each container's `wg0` with the peer's WG IP as source. + +## Prerequisites + +- Ubuntu 22.04+ (kernel 5.6 or newer includes the WireGuard kernel module) +- UFW firewall enabled on the host +- A working dhis2-server-tools LXD deployment (or ready to deploy) +- WireGuard client installed on your admin workstation + +## Quick Start + +### 1. Configure the inventory + +Copy the template, lock down its permissions (it ends up holding peer IP allocations and, optionally, manually-supplied keys), and set the master switch: + +```bash +cp deploy/inventory/hosts.template deploy/inventory/hosts +``` + +Then edit `deploy/inventory/hosts` and set: + +```ini +[all:vars] +wireguard_enabled=true +``` + +The default `hosts.template` already lists the hub and per-host `wireguard_ip`: + +```ini +[web] +proxy ansible_host=172.19.2.2 wireguard_ip=10.0.0.2 + +[databases] +postgres ansible_host=172.19.2.20 wireguard_ip=10.0.0.3 + +[instances] +dhis ansible_host=172.19.2.11 ... wireguard_ip=10.0.0.4 + +[monitoring] +monitor ansible_host=172.19.2.30 wireguard_ip=10.0.0.5 + +# Group is wireguard_hub (not "wireguard") to avoid Ansible's +# "host and group share name" warning. +[wireguard_hub] +wireguard ansible_host=172.19.2.200 wireguard_ip=10.0.0.1 +``` + +### 2. Define human peers + +Edit `deploy/inventory/group_vars/all/vars.yml`. Assign IPs from `10.0.0.6` upward (`.2`-`.5` are reserved for app containers in the default inventory): + +```yaml +wireguard_peers: + - name: sysadmin + allowed_ips: "10.0.0.6/32" + pg_access: + - { instance: dhis } + + - name: admin-bob + allowed_ips: "10.0.0.7/32" +``` + +Each peer needs only a **name** and **IP address**. Keypairs are generated hub-side automatically. + +### 3. Deploy + +```bash +cd deploy/ + +# Full deployment: DHIS2 setup + WireGuard mesh + service lockdown. +# dhis2.yml imports playbooks/wireguard.yml and then +# playbooks/wireguard-lockdown.yml at the end. +sudo ./deploy.sh + +# Or bring up the VPN only (mesh + lockdown) on an already-deployed cluster: +sudo ansible-playbook playbooks/wireguard.yml \ + && sudo ansible-playbook playbooks/wireguard-lockdown.yml + +# Deploy the mesh without the lockdown (rare; e.g. mid-cutover): +sudo ansible-playbook dhis2.yml --skip-tags wireguard-lockdown +``` + +A `wireguard_enabled=true` run of `dhis2.yml` will: + +- Provision the `wireguard` LXD container at `172.19.2.200` (LXD setups only - skipped automatically on SSH/distributed deployments). +- Add an `lxc network forward` rule so UDP `51820` from the host's public IP lands inside the wireguard container (LXD only). +- Install WireGuard packages inside the hub and inside every app container. +- Generate hub + peer keypairs (preserved across runs). +- Render `wg0.conf` for the hub and per-peer `.conf` files for every container and every human peer. +- Pull each app container's config from the hub via Ansible `slurp` and start `wg-quick@wg0`. +- Restrict Grafana, Prometheus, Munin, Glowroot and PostgreSQL to the VPN subnet, and strip the `/glowroot` block from the public proxy. + +SSH on port 22 and the DHIS2 web app on 80/443 are deliberately left public. To peel off individual hardening steps (for debugging, mid-cutover, or operator preference), see [Skipping or reverting the lockdown](#skipping-or-reverting-the-lockdown). + +### 4. Retrieve and import a human peer config + +```bash +# View the config (it's rendered inside the hub container) +sudo lxc exec wireguard -- cat /etc/wireguard/clients/sysadmin.conf + +# Or copy it out to the host and scp +sudo lxc file pull wireguard/etc/wireguard/clients/sysadmin.conf . +scp sysadmin.conf my-laptop:~/ +``` + +The config is complete - no editing needed. Import directly into your WireGuard client. + +**Connect:** + +```bash +# Linux +sudo wg-quick up /path/to/sysadmin.conf + +# macOS / Windows +# Import the .conf file into the WireGuard app + +# Mobile (generate QR code on the hub). +# qrencode is not pulled in by wireguard-tools - install it inside the hub +# container first. The redirection must run inside the container, hence +# `bash -c '...'`; a top-level `<` would be parsed by the LXD host shell +# and fail because the .conf file lives inside the container. +sudo lxc exec wireguard -- apt-get install -y qrencode +sudo lxc exec wireguard -- bash -c \ + 'qrencode -t ansiutf8 < /etc/wireguard/clients/sysadmin.conf' +``` + +### 5. Verify deployment + +A full `dhis2.yml` run brings the mesh up *and* applies lockdown. Verify both: + +**Mesh health (run regardless of lockdown):** + +```bash +# On the LXD host (only meaningful for LXD deployments): +sudo lxc exec wireguard -- wg show # all peers + recent handshakes +sudo lxc exec proxy -- wg show +sudo lxc network forward show lxdbr1 # confirms UDP 51820 forward + +# On the home machine (connected over WG): +ping 10.0.0.1 # hub +ping 10.0.0.5 # monitor via mesh relay +``` + +**Lockdown effects (skip this block if you ran with `--skip-tags wireguard-lockdown`):** + +```bash +# From the LXD host (not on the VPN) - should fail / time out. +curl -m 3 http://172.19.2.30:3000/ # Grafana - was reachable, now blocked +curl -m 3 http://172.19.2.11:4000/ # Glowroot - was reachable, now blocked + +# From a connected WG peer (e.g. 10.0.0.6): +curl -m 3 http://10.0.0.5:3000/ # Grafana via VPN +curl -m 3 http://10.0.0.4:4000/ # Glowroot via VPN +psql -h 10.0.0.3 -U dhis -d dhis2 # only if pg_access is set for this peer + +# DHIS2 itself stays public - sanity check it hasn't moved: +curl -I https://your.dhis2.fqdn/ # expect 200 / 302 +``` + +If the lockdown checks fail but the mesh checks pass, the most likely cause is a misconfigured `wireguard_endpoint_public` (cloud 1:1 NAT) or UDP `51820` blocked at the cloud security group - see [Troubleshooting](#troubleshooting). To recover monitoring access while you debug, run `sudo ansible-playbook dhis2.yml --skip-tags wireguard-lockdown` to peel lockdown off without tearing down the mesh. + +## Service lockdown + +The lockdown stage runs automatically as part of `dhis2.yml` whenever `wireguard_enabled=true`. It is idempotent - re-running with no inventory changes does nothing - and can also be invoked standalone to re-apply after manual UFW edits: + +```bash +cd deploy/ + +# Dry-run the lockdown stage on its own (mesh assumed already up). +sudo ansible-playbook playbooks/wireguard-lockdown.yml --check --diff + +# Re-apply standalone (rarely needed; dhis2.yml already does this). +sudo ansible-playbook playbooks/wireguard-lockdown.yml +``` + +### What gets locked down + +| Service | Container | Port | Before lockdown | After lockdown | +|---|---|---|---|---| +| Grafana | monitor | 3000 | Accessible via proxy `/grafana` | VPN-only (`10.0.0.5:3000`) | +| Prometheus | monitor | 9090 | Accessible via proxy | VPN-only | +| Munin | monitor | 80 | Accessible via proxy `/munin` | VPN-only | +| Glowroot | dhis instances | 4000 | Accessible from LXD network | VPN-only | +| munin-node | dhis instances | 4949 | Accessible from monitor container | Monitor container only | +| PostgreSQL | postgres | 5432 | LXD network only | VPN-only, per-peer rules | + +**Not affected**: SSH on port 22 and the DHIS2 web app on ports 80/443 stay public. The lockdown only touches operational/admin services. + +### Per-component control + +Each lockdown step has its own tag so you can opt into or out of a subset. Tags work whether the playbook runs via `dhis2.yml` or standalone: + +| Tag | Effect | +|---|---| +| `lockdown-proxy` | Empties monitoring upstream configs on nginx/apache; re-renders DHIS2 vhosts without `/glowroot` blocks | +| `lockdown-monitor` | UFW rules: allow Grafana/Prometheus/Munin from VPN subnet only; remove proxy-to-Grafana and proxy-to-Munin rules | +| `lockdown-postgres` | Per-peer `pg_hba.conf` rules from `wireguard_peers[*].pg_access`; UFW rule allowing 5432 from VPN subnet | +| `lockdown-instances` | UFW rule allowing Glowroot 4000 from VPN subnet; munin-node 4949 restricted to monitor container | +| `wireguard-lockdown` | Umbrella tag matching all four of the above (used by `--skip-tags wireguard-lockdown`) | + +```bash +# Deploy with the mesh up but PostgreSQL still LXD-only (e.g. while a +# remote DBA hasn't been onboarded to the VPN yet): +sudo ansible-playbook dhis2.yml --skip-tags lockdown-postgres + +# Re-run only the proxy lockdown after editing inventory: +sudo ansible-playbook playbooks/wireguard-lockdown.yml --tags lockdown-proxy +``` + +### PostgreSQL VPN access + +Database access is granted **per peer** via the optional `pg_access` field. A peer without `pg_access` has **no** PostgreSQL access - the role does not add a blanket grant. + +Each `pg_access` entry is either `{ instance: }` - which derives both the database and the role from a `[instances]` host's LXD container name (in this project a DHIS2 instance's database, role and owner all equal its container name) - or an explicit `{ database, user }` pair for wildcards. The role writes one `hostssl scram-sha-256` line to `pg_hba.conf` per entry. A password is still required. + +```yaml +wireguard_peers: + - name: sysadmin + allowed_ips: "10.0.0.6/32" + pg_access: + - { instance: dhis } # least-privilege; tracks the 'dhis' instance container + + # - name: superuser + # allowed_ips: "10.0.0.7/32" + # pg_access: + # - { database: all, user: all } # superuser-equivalent +``` + +A referenced `instance` must be a host in the `[instances]` group. Resolved `database`/`user` names must match `^[a-zA-Z0-9_]+$`. The PostgreSQL keyword `all` is allowed (via the explicit `{ database, user }` form); arbitrary identifiers with regex metacharacters are rejected at validation time. + +If a peer's `allowed_ips` routes additional networks (comma-separated CIDRs), set `peer_ip` explicitly to the single `/32` used for pg_hba/UFW rules. + +App-level pg_hba entries (added by the `create-instance` role) are unaffected - they continue to work over the LXD bridge. + +The role manages all `pg_access`-derived rules inside a single `blockinfile` block delimited by `# BEGIN/END ANSIBLE MANAGED — wireguard per-peer pg_access`. Removing a peer (or its `pg_access` entry) and re-running `playbooks/wireguard-lockdown.yml` removes the corresponding `hostssl` line. + +### Skipping or reverting the lockdown + +Because lockdown is now part of `dhis2.yml`, "skipping" and "reverting" are the same operation: tell `dhis2.yml` not to run the lockdown tag(s) you want to undo. The mesh is unaffected - only the firewall and proxy hardening flips back. + +```bash +# Skip the whole lockdown for this run (mesh stays up, services revert +# to public). Idempotent: re-runs without the flag will re-lock them. +sudo ansible-playbook dhis2.yml --skip-tags wireguard-lockdown + +# Revert one component only - e.g. unlock PostgreSQL while a remote DBA +# joins the VPN, then drop the flag once they're on: +sudo ansible-playbook dhis2.yml --skip-tags lockdown-postgres +``` + +Important: skipping a lockdown tag on its own does **not** restore the original UFW rules / nginx vhost content - it just stops the lockdown tasks from running on that play. To re-create the pre-lockdown state, also re-run the role that originally produced those rules: + +```bash +# Restore proxy-to-monitoring UFW rules and the /glowroot proxy block: +sudo ansible-playbook dhis2.yml --tags monitoring,proxy-install \ + --skip-tags wireguard-lockdown +``` + +To turn WireGuard off completely (mesh + lockdown), set `wireguard_enabled=false` in inventory and re-run `dhis2.yml`. The mesh plays no-op and the lockdown plays no-op - but again, services that were already locked down won't auto-revert; run the `dhis2.yml --tags monitoring,proxy-install --skip-tags wireguard-lockdown` recipe above to restore them. + +## Configuration reference + +All variables are set in `deploy/roles/wireguard/defaults/main.yml` and can be overridden in the inventory. + +| Variable | Default | Description | +|---|---|---| +| `wireguard_enabled` | `false` | Master switch | +| `wireguard_network` | `10.0.0.0/24` | VPN subnet | +| `wireguard_server_ip` | `10.0.0.1` | Hub address on the VPN | +| `wireguard_port` | `51820` | UDP listen port | +| `wireguard_interface` | `wg0` | WireGuard interface name | +| `wireguard_hub_inventory_hostname` | `wireguard` | Inventory name of the hub container | +| `wireguard_hub_lxd_ip` | `172.19.2.200` | Static LXD IP for the hub container | +| `wireguard_endpoint_listen` | `""` | Host-side listen IP for `lxc network forward`. Must be on the host. Auto-detect via `ansible_default_ipv4.address` when empty | +| `wireguard_endpoint_public` | `""` | Public IP/hostname advertised to home peers as `Endpoint =`. Falls back to `wireguard_endpoint_listen` when empty. **Set explicitly on cloud VMs with 1:1 NAT.** | +| `wireguard_auto_generate_keys` | `true` | Generate peer keypairs hub-side | +| `wireguard_auto_generate_psk` | `false` | Auto-generate pre-shared keys | +| `wireguard_client_config_dir` | `/etc/wireguard/clients` | Directory on the hub for peer configs | +| `wireguard_client_key_dir` | `/etc/wireguard/clients/keys` | Directory on the hub for peer keys | +| `wireguard_prune_orphans` | `false` | Remove files for peers no longer in inventory | +| `wireguard_lockdown_monitoring` | `false` | Gates the `/glowroot` proxy block in `roles/create-instance/templates/{nginx,apache2}/instance.j2`. Default `false` keeps `/glowroot` reachable through the public proxy. `playbooks/wireguard-lockdown.yml` (`lockdown_proxy.yml`) sets it to `true` via task vars when re-rendering, which strips the block. Re-run `dhis2.yml --tags proxy-install` to revert. Not normally set in inventory | +| `wireguard_peers` | `[]` | List of human/admin peers | + +### Peer definition (human peers only) + +App containers are auto-derived from inventory `wireguard_ip` and must NOT be listed in `wireguard_peers`. + +| Field | Required | Description | +|---|---|---| +| `name` | Yes | Identifier - filesystem-safe (letters, digits, dot, underscore, hyphen) | +| `allowed_ips` | Yes | Peer's VPN IP (e.g. `10.0.0.6/32`). May be comma-separated to route additional networks | +| `public_key` | No* | Peer's WG public key. *Required only when `wireguard_auto_generate_keys: false` | +| `preshared_key` | No | Optional PSK for post-quantum hedge | +| `peer_ip` | No | Single `/32` CIDR for pg_hba/UFW rules. Defaults to first CIDR in `allowed_ips` | +| `pg_access` | No | List of `{ instance: }` (derives db/user from the instance container) or `{ database, user }` - adds per-peer pg_hba rules | + +### Key generation modes + +**Auto-generate (default)**: `wireguard_auto_generate_keys: true` + +Only `name` and `allowed_ips` required per human peer. The hub container generates every keypair - including its own and one per app container - and produces complete, ready-to-import `.conf` files. + +**Manual keys**: `wireguard_auto_generate_keys: false` + +Each peer must supply a `public_key`. App containers fall back to manual key supply via host_vars (advanced, rare). + +### Pre-shared key (PSK) auto-generation + +Set `wireguard_auto_generate_psk: true` to generate a PSK for each peer that doesn't supply an explicit `preshared_key`. + +> **Warning**: enabling on an existing deployment generates fresh PSKs on the next run for every peer that lacks an explicit `preshared_key`. All affected clients must re-import their `.conf`. + +## Split tunneling + +Default is split-tunnel: only `10.0.0.0/24` routes through WG. App-to-app traffic continues via the LXD bridge. To route all client traffic through the VPN, edit a human peer's `.conf` and change: + +```ini +# Split tunnel (default) +AllowedIPs = 10.0.0.0/24 + +# Full tunnel +AllowedIPs = 0.0.0.0/0 +``` + +## Adding and removing peers + +### Adding a new peer + +Add the peer to `wireguard_peers` (and a `pg_access` entry if they need database access), then re-run the full deploy: + +```bash +sudo ansible-playbook dhis2.yml +``` + +The mesh stage adds the peer and applies the change with `wg syncconf` (no existing tunnels dropped); the lockdown stage immediately after picks up the new `pg_access` entries and writes them to `pg_hba.conf`. Retrieve the new peer's config from `/etc/wireguard/clients/.conf` inside the hub container. + +If you want to do just the WireGuard portion without re-running the rest of `dhis2.yml`: + +```bash +sudo ansible-playbook playbooks/wireguard.yml \ + && sudo ansible-playbook playbooks/wireguard-lockdown.yml --tags lockdown-postgres +``` + +### Removing a peer + +Remove the peer entry and re-run. Set `wireguard_prune_orphans: true` to also clean up orphaned key/config files. + +### Rotating peer keys + +```bash +# On the LXD host, reach into the hub container: +sudo lxc exec wireguard -- rm /etc/wireguard/clients/keys/sysadmin.{key,pub,psk} +sudo ansible-playbook dhis2.yml +``` + +The affected peer must re-import their updated `.conf`. + +## File layout on the hub container + +``` +/etc/wireguard/ +├── wg0.conf +├── server_private.key +├── server_public.key +└── clients/ + ├── proxy.conf # auto-derived app container + ├── postgres.conf + ├── dhis.conf + ├── monitor.conf + ├── sysadmin.conf # human peer + └── keys/ + ├── proxy.{key,pub} + ├── sysadmin.{key,pub,psk} + └── ... +``` + +## Migration from 10.8.0.0/24 (host-bridge architecture) + +Earlier versions of this role ran WireGuard on the LXD host with a `wg0`-to-`lxdbr1` bridge and used the `10.8.0.0/24` subnet. To migrate: + +```bash +# 1. On the LXD host: tear down the old WG instance. +sudo wg-quick down wg0 +sudo systemctl disable wg-quick@wg0 +sudo apt purge wireguard wireguard-tools -y +sudo rm -rf /etc/wireguard + +# 2. Remove old UFW/iptables rules on the host. +sudo ufw status numbered # find any rules referencing 10.8.0.0/24 or wg0 +sudo ufw delete # delete each +# Also remove the old WIREGUARD VPN FORWARDING block from /etc/ufw/before.rules. + +# 3. Verify the new inventory has wireguard_ip per app host (see Quick Start). +# 4. Re-deploy. +cd deploy/ +sudo ./deploy.sh +``` + +Old `10.8.0.0/24` client `.conf` files will not work - re-import the freshly generated `10.0.0.0/24` configs. + +## Disabling WireGuard + +Set `wireguard_enabled=false` in the inventory. Subsequent `dhis2.yml` runs no-op both the mesh and the lockdown - every play in `playbooks/wireguard.yml` and `playbooks/wireguard-lockdown.yml` gates on `wireguard_enabled`. + +This **stops future WireGuard changes** but does **not** tear down an existing hub container or revert UFW / `pg_hba.conf` / proxy edits already applied by previous lockdown runs. To fully remove: + +```bash +# 1. Stop and remove the hub container (LXD setups only). +sudo lxc stop wireguard && sudo lxc delete wireguard +sudo lxc network forward port remove lxdbr1 udp 51820 + +# 2. Stop wg-quick on each app container. +for c in proxy postgres dhis monitor; do + sudo lxc exec "$c" -- systemctl disable --now wg-quick@wg0 + sudo lxc exec "$c" -- rm -rf /etc/wireguard +done + +# 3. Restore public access to services that the lockdown playbook +# locked down. Re-running the upstream DHIS2 roles re-creates the +# original UFW and proxy configs idempotently: +sudo ansible-playbook dhis2.yml --tags monitoring,proxy-install +```