From 70a6f54c966d536481a64f8efd5e51c0f3ce6ee9 Mon Sep 17 00:00:00 2001 From: Joao Morais Date: Thu, 7 May 2026 09:01:03 -0300 Subject: [PATCH] add option to separate router and haproxy containers Add an opt-in configuration that informs the router that HAProxy runs on a sidecar container, so its process should be managed via its admin socket. Add also a script that manages signals sent from kubelet to HAProxy. The script should be the entry point of the container in order to capture terminating signals, converting to SIGUSR1, and than into SIGTERM when the timeout expires. The former asks HAProxy to finish as soon as all the current connections close, and the later asks HAProxy to close (FIN) connections and finish just after receiving FIN+ACK from both sides of all connections. The script should be moved to a new HAProxy image, separated from the router one. https://redhat.atlassian.net/browse/NE-2664 --- images/router/haproxy/start-haproxy | 57 +++++++++++++++++++++++++++++ pkg/router/template/router.go | 48 +++++++++++++++++++++++- 2 files changed, 103 insertions(+), 2 deletions(-) create mode 100755 images/router/haproxy/start-haproxy diff --git a/images/router/haproxy/start-haproxy b/images/router/haproxy/start-haproxy new file mode 100755 index 000000000..e7771f2d1 --- /dev/null +++ b/images/router/haproxy/start-haproxy @@ -0,0 +1,57 @@ +#!/bin/bash +set -euo pipefail + +## +## This script expects HAProxy running in foreground, do not initialize it in daemon mode! + +timeoutStr=${ROUTER_GRACEFUL_SHUTDOWN_DELAY:-45s} +if ! [[ "$timeoutStr" =~ ^[0-9]+s$ ]]; then + echo "Invalid timeout: $timeoutStr" + exit 1 +fi + +timeout="${timeoutStr%s}" +signaled=0 + +stopHAProxy() { + signaled=1 + + # HAProxy handles SIGUSR1 by finishing its process as soon as all the current connections, + # active or not, are closed by either the client or the backend server. + echo "Sending SIGUSR1 to HAProxy process $haproxyPID" + kill -s USR1 $haproxyPID + + # Poll the process, retuning as soon as it is not alive anymore. + for i in $(seq 1 $timeout); do + sleep 1 + if ! kill -0 $haproxyPID 2>/dev/null; then + echo "All connections are closed" + return + fi + done + + # HAProxy handles SIGTERM by closing all the TCP connections and waiting for the full TCP handshake + # (FIN / ACK / FIN-ACK) from both sides, and only after that it finishes. `kill` runs asynchronous, + # and `wait` maintains this script alive until haproxy finishes. + echo "SIGUSR1 timed out, sending SIGTERM to HAProxy process $haproxyPID" + kill -s TERM $haproxyPID +} + +echo "Starting HAProxy. SIGUSR1 timeout is ${timeout}s" +/usr/sbin/haproxy "$@" & +haproxyPID=$! + +trap stopHAProxy SIGTERM SIGUSR1 SIGINT +trap "kill -s HUP $haproxyPID" SIGHUP + +exit_code=0 +while kill -0 $haproxyPID 2>/dev/null; do + # Start `wait` again in case it returned due to a non terminate signal, e.g. SIGHUP + wait $haproxyPID || exit_code=$? +done + +# Received signal (usually SIGTERM) takes precedence during `wait`, overriding with a clean exit +[ "$signaled" = 1 ] && exit 0 + +echo "haproxy exited with status code $exit_code" +exit $exit_code diff --git a/pkg/router/template/router.go b/pkg/router/template/router.go index 1f1f4ca1d..569894141 100644 --- a/pkg/router/template/router.go +++ b/pkg/router/template/router.go @@ -2,6 +2,7 @@ package templaterouter import ( "bytes" + "context" "crypto/md5" "encoding/pem" "fmt" @@ -15,10 +16,12 @@ import ( "text/template" "time" + "github.com/bcicen/go-haproxy" "github.com/fsnotify/fsnotify" "github.com/prometheus/client_golang/prometheus" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" routev1 "github.com/openshift/api/route/v1" @@ -666,8 +669,22 @@ func (r *templateRouter) writeCertificates(cfg *ServiceAliasConfig) error { return nil } -// reloadRouter executes the router's reload script. +// reloadRouter reloads haproxy. func (r *templateRouter) reloadRouter(shutdown bool) error { + adminSocket := os.Getenv("ROUTER_HAPROXY_ADMIN_UNIX_SOCKET") + if adminSocket != "" { + if shutdown { + // We are in HAProxy's master/worker mode, currently implemented as a sidecar, + // so there is no local process to handle and the sidecar one already received SIGTERM. + return nil + } + return r.reloadRouterExternal(adminSocket) + } + return r.reloadRouterEmbedded(shutdown) +} + +// reloadRouterEmbedded executes the router's reload script. +func (r *templateRouter) reloadRouterEmbedded(shutdown bool) error { if r.reloadFn != nil { return r.reloadFn(shutdown) } @@ -679,7 +696,34 @@ func (r *templateRouter) reloadRouter(shutdown bool) error { if err != nil { return fmt.Errorf("error reloading router: %v\n%s", err, string(out)) } - log.V(0).Info("router reloaded", "output", string(out)) + log.V(0).Info("router reloaded", "mode", "embedded", "output", string(out)) + return nil +} + +// reloadRouterExternal sends a reload command to the external haproxy. +func (r *templateRouter) reloadRouterExternal(adminSocket string) error { + // TODO missing application's context + _ = wait.PollUntilContextCancel(context.Background(), 2*time.Second, true, func(ctx context.Context) (done bool, err error) { + _, errstat := os.Lstat(adminSocket) + if errstat != nil { + log.Info("waiting for haproxy socket", "message", errstat.Error()) + return false, nil + } + return true, nil + }) + client := haproxy.HAProxyClient{Addr: "unix://" + adminSocket, Timeout: 10 /*seconds*/} + outputBuffer, err := client.RunCommand("reload") + if err != nil { + return fmt.Errorf("error connecting haproxy: %w", err) + } + output := outputBuffer.String() + + // `reload` command is synchronous since haproxy 2.7, so it is safe to continue as soon as it returns. + // It should return Success=1 in the first line in case everything went well, anything else is considered a failure. + if !strings.HasPrefix(output, "Success=1") { + return fmt.Errorf("error reloading router: %s", output) + } + log.Info("router reloaded", "mode", "sidecar") return nil }