diff --git a/playbooks/dhclient_azure_cache.yml b/playbooks/dhclient_azure_cache.yml new file mode 100644 index 000000000..3d534efe0 --- /dev/null +++ b/playbooks/dhclient_azure_cache.yml @@ -0,0 +1,6 @@ +--- +- name: Configure persistent DHCP caching for cloud VMs + hosts: all:!localhost + gather_facts: true + roles: + - role: dhclient_cache diff --git a/playbooks/dhclient_cache.yml b/playbooks/dhclient_cache.yml new file mode 100644 index 000000000..3d534efe0 --- /dev/null +++ b/playbooks/dhclient_cache.yml @@ -0,0 +1,6 @@ +--- +- name: Configure persistent DHCP caching for cloud VMs + hosts: all:!localhost + gather_facts: true + roles: + - role: dhclient_cache diff --git a/playbooks/roles/dhclient_cache/defaults/main.yml b/playbooks/roles/dhclient_cache/defaults/main.yml new file mode 100644 index 000000000..3ec542a51 --- /dev/null +++ b/playbooks/roles/dhclient_cache/defaults/main.yml @@ -0,0 +1,6 @@ +--- +# Default variables for dhclient_cache role +dhclient_cache_enabled: true +dhclient_cache_timeout: 3600 +dhclient_cache_retry_interval: 10 +dhclient_cache_max_retries: 30 diff --git a/playbooks/roles/dhclient_cache/tasks/isc-dhclient.yml b/playbooks/roles/dhclient_cache/tasks/isc-dhclient.yml new file mode 100644 index 000000000..adda2f4aa --- /dev/null +++ b/playbooks/roles/dhclient_cache/tasks/isc-dhclient.yml @@ -0,0 +1,66 @@ +--- +# ISC dhclient persistent DHCP caching implementation +# Used by: Debian, Ubuntu, older RHEL/CentOS/Fedora + +- name: Detect primary network interface + ansible.builtin.set_fact: + primary_interface: "{{ ansible_default_ipv4.interface | default('eth0') }}" + +- name: Create dhclient enter hook for persistent lease caching + become: true + become_flags: "su - -c" + become_method: sudo + ansible.builtin.template: + src: isc-dhclient-enter-hook.j2 + dest: /etc/dhcp/dhclient-enter-hooks.d/kdevops-persistent-cache + mode: "0755" + owner: root + group: root + +- name: Create dhclient exit hook for persistent lease caching + become: true + become_flags: "su - -c" + become_method: sudo + ansible.builtin.template: + src: isc-dhclient-exit-hook.j2 + dest: /etc/dhcp/dhclient-exit-hooks.d/kdevops-persistent-cache + mode: "0755" + owner: root + group: root + +- name: Update dhclient configuration for aggressive retry + become: true + become_flags: "su - -c" + become_method: sudo + ansible.builtin.lineinfile: + path: /etc/dhcp/dhclient.conf + regexp: "^{{ item.key }}\\s+" + line: "{{ item.key }} {{ item.value }};" + create: false + loop: + - { key: "timeout", value: "{{ dhclient_cache_timeout }}" } + - { key: "retry", value: "{{ dhclient_cache_retry_interval }}" } + +- name: Ensure lease cache directory exists + become: true + become_flags: "su - -c" + become_method: sudo + ansible.builtin.file: + path: /var/lib/dhcp/cache + state: directory + mode: "0755" + owner: root + group: root + +- name: Create initial cached lease from current lease + become: true + become_flags: "su - -c" + become_method: sudo + ansible.builtin.shell: | + if [ -f /var/lib/dhcp/dhclient.{{ primary_interface }}.leases ]; then + cp -a /var/lib/dhcp/dhclient.{{ primary_interface }}.leases \ + /var/lib/dhcp/cache/dhclient.{{ primary_interface }}.cached-lease + chmod 644 /var/lib/dhcp/cache/dhclient.{{ primary_interface }}.cached-lease + fi + args: + creates: /var/lib/dhcp/cache/dhclient.{{ primary_interface }}.cached-lease diff --git a/playbooks/roles/dhclient_cache/tasks/main.yml b/playbooks/roles/dhclient_cache/tasks/main.yml new file mode 100644 index 000000000..a53242633 --- /dev/null +++ b/playbooks/roles/dhclient_cache/tasks/main.yml @@ -0,0 +1,45 @@ +--- +- name: Configure persistent DHCP caching for cloud VMs + block: + - name: Detect DHCP client mechanism + ansible.builtin.set_fact: + dhcp_mechanism: >- + {%- if ansible_facts['os_family']|lower == 'suse' -%} + wicked + {%- elif ansible_facts['os_family']|lower in ['redhat', 'rocky', 'almalinux'] -%} + {%- if ansible_facts['distribution_major_version']|int >= 8 -%} + networkmanager + {%- else -%} + isc-dhclient + {%- endif -%} + {%- elif ansible_facts['os_family']|lower == 'debian' -%} + isc-dhclient + {%- else -%} + unknown + {%- endif -%} + + - name: Display detected DHCP mechanism + ansible.builtin.debug: + msg: "Detected DHCP mechanism: {{ dhcp_mechanism }} on {{ ansible_facts['distribution'] }} {{ ansible_facts['distribution_version'] }}" + + - name: Configure ISC dhclient persistent caching + ansible.builtin.include_tasks: isc-dhclient.yml + when: dhcp_mechanism == 'isc-dhclient' + + - name: Configure NetworkManager persistent caching + ansible.builtin.include_tasks: networkmanager.yml + when: dhcp_mechanism == 'networkmanager' + + - name: Configure wicked persistent caching + ansible.builtin.include_tasks: wicked.yml + when: dhcp_mechanism == 'wicked' + + - name: Warn about unsupported DHCP mechanism + ansible.builtin.debug: + msg: "WARNING: DHCP mechanism '{{ dhcp_mechanism }}' is not supported for persistent caching" + when: dhcp_mechanism == 'unknown' + + when: + - dhclient_cache_enabled|bool + - kdevops_enable_terraform is defined + - kdevops_enable_terraform|bool diff --git a/playbooks/roles/dhclient_cache/tasks/networkmanager.yml b/playbooks/roles/dhclient_cache/tasks/networkmanager.yml new file mode 100644 index 000000000..eab04cf6a --- /dev/null +++ b/playbooks/roles/dhclient_cache/tasks/networkmanager.yml @@ -0,0 +1,42 @@ +--- +# NetworkManager persistent DHCP caching implementation +# Used by: RHEL 8+, Fedora 30+, CentOS 8+ + +- name: Detect primary network interface + ansible.builtin.set_fact: + primary_interface: "{{ ansible_default_ipv4.interface | default('eth0') }}" + +- name: Create NetworkManager dispatcher script for DHCP cache save + become: true + become_flags: "su - -c" + become_method: sudo + ansible.builtin.template: + src: networkmanager-dispatcher.j2 + dest: /etc/NetworkManager/dispatcher.d/10-kdevops-dhcp-cache + mode: "0755" + owner: root + group: root + +- name: Ensure lease cache directory exists + become: true + become_flags: "su - -c" + become_method: sudo + ansible.builtin.file: + path: /var/lib/NetworkManager/cache + state: directory + mode: "0755" + owner: root + group: root + +- name: Get current IP configuration via NetworkManager + become: true + become_flags: "su - -c" + become_method: sudo + ansible.builtin.shell: | + nmcli -t -f IP4.ADDRESS,IP4.GATEWAY,IP4.DNS device show {{ primary_interface }} | \ + awk -F: '{print $2}' > /var/lib/NetworkManager/cache/{{ primary_interface }}.cached-config + echo "cache_timestamp=$(date +%s)" >> /var/lib/NetworkManager/cache/{{ primary_interface }}.cached-config + chmod 644 /var/lib/NetworkManager/cache/{{ primary_interface }}.cached-config + args: + creates: /var/lib/NetworkManager/cache/{{ primary_interface }}.cached-config + changed_when: true diff --git a/playbooks/roles/dhclient_cache/tasks/wicked.yml b/playbooks/roles/dhclient_cache/tasks/wicked.yml new file mode 100644 index 000000000..25fd27afb --- /dev/null +++ b/playbooks/roles/dhclient_cache/tasks/wicked.yml @@ -0,0 +1,43 @@ +--- +# Wicked persistent DHCP caching implementation +# Used by: SUSE Linux Enterprise Server, openSUSE + +- name: Detect primary network interface + ansible.builtin.set_fact: + primary_interface: "{{ ansible_default_ipv4.interface | default('eth0') }}" + +- name: Create wicked extension for DHCP cache save + become: true + become_flags: "su - -c" + become_method: sudo + ansible.builtin.template: + src: wicked-extension.j2 + dest: /etc/wicked/extensions/kdevops-dhcp-cache + mode: "0755" + owner: root + group: root + +- name: Ensure lease cache directory exists + become: true + become_flags: "su - -c" + become_method: sudo + ansible.builtin.file: + path: /var/lib/wicked/cache + state: directory + mode: "0755" + owner: root + group: root + +- name: Get current IP configuration via wicked + become: true + become_flags: "su - -c" + become_method: sudo + ansible.builtin.shell: | + wicked show {{ primary_interface }} | \ + grep -E "addr.*inet|route.*default|DNS" | \ + awk '{print}' > /var/lib/wicked/cache/{{ primary_interface }}.cached-config + echo "cache_timestamp=$(date +%s)" >> /var/lib/wicked/cache/{{ primary_interface }}.cached-config + chmod 644 /var/lib/wicked/cache/{{ primary_interface }}.cached-config + args: + creates: /var/lib/wicked/cache/{{ primary_interface }}.cached-config + changed_when: true diff --git a/playbooks/roles/dhclient_cache/templates/isc-dhclient-enter-hook.j2 b/playbooks/roles/dhclient_cache/templates/isc-dhclient-enter-hook.j2 new file mode 100644 index 000000000..d716ee8a1 --- /dev/null +++ b/playbooks/roles/dhclient_cache/templates/isc-dhclient-enter-hook.j2 @@ -0,0 +1,36 @@ +#!/bin/bash +# Persistent DHCP lease caching - ISC dhclient enter hook +# This hook saves successful DHCP leases to a persistent cache +# Managed by kdevops - dhclient_cache role + +CACHE_DIR="/var/lib/dhcp/cache" +CACHE_FILE="${CACHE_DIR}/dhclient.${interface}.cached-lease" +CACHE_TIMEOUT={{ dhclient_cache_timeout }} + +# Only cache on BOUND, RENEW, or REBIND events +case "${reason}" in + BOUND|RENEW|REBIND) + # Ensure cache directory exists + mkdir -p "${CACHE_DIR}" + + # Save current lease information to cache file + if [ -n "${new_ip_address}" ]; then + { + echo "# Cached DHCP lease for ${interface}" + echo "# Cached at: $(date)" + echo "# Cache timeout: ${CACHE_TIMEOUT} seconds" + echo "interface=\"${interface}\"" + echo "new_ip_address=\"${new_ip_address}\"" + echo "new_subnet_mask=\"${new_subnet_mask}\"" + echo "new_routers=\"${new_routers}\"" + echo "new_domain_name_servers=\"${new_domain_name_servers}\"" + echo "new_broadcast_address=\"${new_broadcast_address}\"" + echo "new_dhcp_lease_time=\"${new_dhcp_lease_time}\"" + echo "cache_timestamp=\"$(date +%s)\"" + } > "${CACHE_FILE}" + + chmod 644 "${CACHE_FILE}" + logger -t kdevops-dhcp-cache "Saved DHCP lease to cache for ${interface}: ${new_ip_address}" + fi + ;; +esac diff --git a/playbooks/roles/dhclient_cache/templates/isc-dhclient-exit-hook.j2 b/playbooks/roles/dhclient_cache/templates/isc-dhclient-exit-hook.j2 new file mode 100644 index 000000000..b7388e697 --- /dev/null +++ b/playbooks/roles/dhclient_cache/templates/isc-dhclient-exit-hook.j2 @@ -0,0 +1,56 @@ +#!/bin/bash +# Persistent DHCP lease caching - ISC dhclient exit hook +# This hook restores cached DHCP configuration if DHCP fails after reboot +# Managed by kdevops - dhclient_cache role + +CACHE_DIR="/var/lib/dhcp/cache" +CACHE_FILE="${CACHE_DIR}/dhclient.${interface}.cached-lease" +CACHE_TIMEOUT={{ dhclient_cache_timeout }} + +# Only attempt cache restore on TIMEOUT or FAIL events +case "${reason}" in + TIMEOUT|FAIL) + if [ -f "${CACHE_FILE}" ]; then + # Source the cached lease + source "${CACHE_FILE}" + + # Check if cache is still valid (within timeout period) + current_timestamp=$(date +%s) + cache_age=$((current_timestamp - cache_timestamp)) + + if [ ${cache_age} -lt ${CACHE_TIMEOUT} ]; then + logger -t kdevops-dhcp-cache "DHCP ${reason} for ${interface}, restoring from cache (age: ${cache_age}s)" + + # Apply cached IP configuration + if [ -n "${new_ip_address}" ]; then + ip addr flush dev "${interface}" 2>/dev/null || true + ip addr add "${new_ip_address}/${new_subnet_mask}" dev "${interface}" + + if [ -n "${new_routers}" ]; then + ip route add default via "${new_routers}" dev "${interface}" 2>/dev/null || true + fi + + if [ -n "${new_domain_name_servers}" ]; then + # Update resolv.conf with cached DNS servers + { + echo "# Generated by dhclient-azure-cache from cached lease" + echo "# Cached DNS servers restored after DHCP ${reason}" + for nameserver in ${new_domain_name_servers}; do + echo "nameserver ${nameserver}" + done + } > /etc/resolv.conf + fi + + logger -t kdevops-dhcp-cache "Successfully restored cached IP configuration: ${new_ip_address}" + + # Set exit code to indicate we handled the failure + exit 0 + fi + else + logger -t kdevops-dhcp-cache "Cached lease for ${interface} expired (age: ${cache_age}s > ${CACHE_TIMEOUT}s)" + fi + else + logger -t kdevops-dhcp-cache "No cached lease found for ${interface} after DHCP ${reason}" + fi + ;; +esac diff --git a/playbooks/roles/dhclient_cache/templates/networkmanager-dispatcher.j2 b/playbooks/roles/dhclient_cache/templates/networkmanager-dispatcher.j2 new file mode 100644 index 000000000..ecf46bfe5 --- /dev/null +++ b/playbooks/roles/dhclient_cache/templates/networkmanager-dispatcher.j2 @@ -0,0 +1,68 @@ +#!/bin/bash +# Persistent DHCP caching - NetworkManager dispatcher script +# This script saves successful DHCP configuration and restores on failure +# Managed by kdevops - dhclient_cache role + +INTERFACE="$1" +ACTION="$2" +CACHE_DIR="/var/lib/NetworkManager/cache" +CACHE_FILE="${CACHE_DIR}/${INTERFACE}.cached-config" +CACHE_TIMEOUT={{ dhclient_cache_timeout }} + +# Ensure cache directory exists +mkdir -p "${CACHE_DIR}" + +case "${ACTION}" in + up|dhcp4-change) + # Save current network configuration to cache + IP4_ADDRESS=$(nmcli -t -f IP4.ADDRESS device show "${INTERFACE}" 2>/dev/null | cut -d: -f2) + IP4_GATEWAY=$(nmcli -t -f IP4.GATEWAY device show "${INTERFACE}" 2>/dev/null | cut -d: -f2) + IP4_DNS=$(nmcli -t -f IP4.DNS device show "${INTERFACE}" 2>/dev/null | cut -d: -f2 | tr '\n' ' ') + + if [ -n "${IP4_ADDRESS}" ]; then + { + echo "# Cached NetworkManager DHCP configuration for ${INTERFACE}" + echo "# Cached at: $(date)" + echo "# Cache timeout: ${CACHE_TIMEOUT} seconds" + echo "IP4_ADDRESS=\"${IP4_ADDRESS}\"" + echo "IP4_GATEWAY=\"${IP4_GATEWAY}\"" + echo "IP4_DNS=\"${IP4_DNS}\"" + echo "cache_timestamp=\"$(date +%s)\"" + } > "${CACHE_FILE}" + chmod 644 "${CACHE_FILE}" + logger -t kdevops-dhcp-cache "Saved NetworkManager DHCP config to cache for ${INTERFACE}: ${IP4_ADDRESS}" + fi + ;; + + connectivity-change) + # Check if we have connectivity, if not try to restore from cache + CONNECTIVITY=$(nmcli -t -f CONNECTIVITY general status) + if [ "${CONNECTIVITY}" = "none" ] || [ "${CONNECTIVITY}" = "limited" ]; then + if [ -f "${CACHE_FILE}" ]; then + source "${CACHE_FILE}" + + # Check if cache is still valid + current_timestamp=$(date +%s) + cache_age=$((current_timestamp - cache_timestamp)) + + if [ ${cache_age} -lt ${CACHE_TIMEOUT} ]; then + logger -t kdevops-dhcp-cache "NetworkManager connectivity ${CONNECTIVITY} for ${INTERFACE}, restoring from cache (age: ${cache_age}s)" + + # Apply cached configuration using nmcli + if [ -n "${IP4_ADDRESS}" ]; then + nmcli connection modify "${INTERFACE}" ipv4.method manual \ + ipv4.addresses "${IP4_ADDRESS}" \ + ipv4.gateway "${IP4_GATEWAY}" \ + ipv4.dns "${IP4_DNS}" + nmcli connection up "${INTERFACE}" + logger -t kdevops-dhcp-cache "Successfully restored cached NetworkManager configuration: ${IP4_ADDRESS}" + fi + else + logger -t kdevops-dhcp-cache "Cached config for ${INTERFACE} expired (age: ${cache_age}s > ${CACHE_TIMEOUT}s)" + fi + else + logger -t kdevops-dhcp-cache "No cached config found for ${INTERFACE} after connectivity change" + fi + fi + ;; +esac diff --git a/playbooks/roles/dhclient_cache/templates/wicked-extension.j2 b/playbooks/roles/dhclient_cache/templates/wicked-extension.j2 new file mode 100644 index 000000000..2cc5a1af3 --- /dev/null +++ b/playbooks/roles/dhclient_cache/templates/wicked-extension.j2 @@ -0,0 +1,74 @@ +#!/bin/bash +# Persistent DHCP caching - wicked extension script +# This script saves successful DHCP configuration and restores on failure +# Managed by kdevops - dhclient_cache role + +INTERFACE="$1" +ACTION="$2" +CACHE_DIR="/var/lib/wicked/cache" +CACHE_FILE="${CACHE_DIR}/${INTERFACE}.cached-config" +CACHE_TIMEOUT={{ dhclient_cache_timeout }} + +# Ensure cache directory exists +mkdir -p "${CACHE_DIR}" + +case "${ACTION}" in + lease-acquired|lease-renewed) + # Save current network configuration to cache + WICKED_INFO=$(wicked show "${INTERFACE}" 2>/dev/null) + IP_ADDR=$(echo "${WICKED_INFO}" | grep -oP 'addr.*inet \K[0-9./]+') + GATEWAY=$(echo "${WICKED_INFO}" | grep -oP 'route.*default.*via \K[0-9.]+') + DNS=$(echo "${WICKED_INFO}" | grep -oP 'DNS.*\K[0-9.]+' | tr '\n' ' ') + + if [ -n "${IP_ADDR}" ]; then + { + echo "# Cached wicked DHCP configuration for ${INTERFACE}" + echo "# Cached at: $(date)" + echo "# Cache timeout: ${CACHE_TIMEOUT} seconds" + echo "IP_ADDR=\"${IP_ADDR}\"" + echo "GATEWAY=\"${GATEWAY}\"" + echo "DNS=\"${DNS}\"" + echo "cache_timestamp=\"$(date +%s)\"" + } > "${CACHE_FILE}" + chmod 644 "${CACHE_FILE}" + logger -t kdevops-dhcp-cache "Saved wicked DHCP config to cache for ${INTERFACE}: ${IP_ADDR}" + fi + ;; + + link-down|lease-lost) + # Try to restore from cache if DHCP fails + if [ -f "${CACHE_FILE}" ]; then + source "${CACHE_FILE}" + + # Check if cache is still valid + current_timestamp=$(date +%s) + cache_age=$((current_timestamp - cache_timestamp)) + + if [ ${cache_age} -lt ${CACHE_TIMEOUT} ]; then + logger -t kdevops-dhcp-cache "Wicked ${ACTION} for ${INTERFACE}, restoring from cache (age: ${cache_age}s)" + + # Apply cached configuration using wicked + if [ -n "${IP_ADDR}" ]; then + ip addr add "${IP_ADDR}" dev "${INTERFACE}" 2>/dev/null + [ -n "${GATEWAY}" ] && ip route add default via "${GATEWAY}" dev "${INTERFACE}" 2>/dev/null + + # Update resolv.conf + if [ -n "${DNS}" ]; then + { + echo "# Generated by kdevops dhcp cache from wicked" + for nameserver in ${DNS}; do + echo "nameserver ${nameserver}" + done + } > /etc/resolv.conf + fi + + logger -t kdevops-dhcp-cache "Successfully restored cached wicked configuration: ${IP_ADDR}" + fi + else + logger -t kdevops-dhcp-cache "Cached config for ${INTERFACE} expired (age: ${cache_age}s > ${CACHE_TIMEOUT}s)" + fi + else + logger -t kdevops-dhcp-cache "No cached config found for ${INTERFACE} after ${ACTION}" + fi + ;; +esac diff --git a/playbooks/roles/gen_tfvars/defaults/main.yml b/playbooks/roles/gen_tfvars/defaults/main.yml index caaf306eb..5c49af3a3 100644 --- a/playbooks/roles/gen_tfvars/defaults/main.yml +++ b/playbooks/roles/gen_tfvars/defaults/main.yml @@ -28,6 +28,10 @@ terraform_aws_ebs_volumes_per_instance: "0" terraform_aws_ebs_volume_size: 0 terraform_aws_ebs_volume_type: "gp3" +# Azure defaults - prevent undefined variable errors for hidden bool configs +# that default to 'n' (kconfig doesn't output these to yaml) +terraform_azure_accelerated_networking_enabled: false + terraform_oci_assign_public_ip: false terraform_oci_use_existing_vcn: false diff --git a/playbooks/roles/pkg/tasks/debian.yml b/playbooks/roles/pkg/tasks/debian.yml index 362df3b1d..252777ea8 100644 --- a/playbooks/roles/pkg/tasks/debian.yml +++ b/playbooks/roles/pkg/tasks/debian.yml @@ -2,6 +2,7 @@ - name: Debian_libaio rename for buster ansible.builtin.set_fact: is_bookworm: '{{ ansible_distribution_release == "bookworm" }}' + is_bullseye: '{{ ansible_distribution_release == "bullseye" }}' is_buster: '{{ ansible_distribution_release == "buster" }}' is_trixie: '{{ ansible_distribution_release == "trixie" }}' @@ -10,4 +11,5 @@ pkg_libaio: "libaio1" when: > is_buster|bool or + is_bullseye|bool or is_bookworm|bool diff --git a/scripts/terraform.Makefile b/scripts/terraform.Makefile index c751bda0d..3abcdcb9e 100644 --- a/scripts/terraform.Makefile +++ b/scripts/terraform.Makefile @@ -220,6 +220,9 @@ $(KDEVOPS_PROVISIONED_SSH): $(Q)ansible \ baseline:dev:service \ -m wait_for_connection + $(Q)ansible-playbook $(ANSIBLE_VERBOSE) \ + -i hosts playbooks/dhclient_cache.yml \ + --extra-vars=@./extra_vars.yaml $(Q)touch $(KDEVOPS_PROVISIONED_SSH) $(Q)ansible-playbook \ -i hosts playbooks/extra_volumes.yml \ diff --git a/terraform/Kconfig b/terraform/Kconfig index dbb04ccf1..4c5df15a7 100644 --- a/terraform/Kconfig +++ b/terraform/Kconfig @@ -62,9 +62,14 @@ config TERRAFORM_USE_OPENTOFU endchoice config TERRAFORM_BINARY_PATH - string - default "/usr/local/bin/terraform" if TERRAFORM_USE_TERRAFORM + string "Path to Terraform or OpenTofu binary" + default "/usr/bin/terraform" if TERRAFORM_USE_TERRAFORM default "/usr/local/bin/tofu" if TERRAFORM_USE_OPENTOFU output yaml + help + Specify the path to the Terraform or OpenTofu binary. The default + is set based on your Infrastructure as Code tool choice above. + Override this if your binary is installed in a different location, + such as /usr/bin/terraform or /usr/bin/tofu. endif # TERRAFORM