-
Notifications
You must be signed in to change notification settings - Fork 136
NE-2664: add option to separate router and haproxy containers #772
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| #!/bin/bash | ||
| set -euo pipefail | ||
|
|
||
| ## | ||
| ## This script expects HAProxy running in foreground, do not initialize it in daemon mode! | ||
|
|
||
| timeoutStr=${ROUTER_GRACEFUL_SHUTDOWN_DELAY:-45s} | ||
| if ! [[ "$timeoutStr" =~ ^[0-9]+s$ ]]; then | ||
| echo "Invalid timeout: $timeoutStr" | ||
| exit 1 | ||
| fi | ||
|
|
||
| timeout="${timeoutStr%s}" | ||
| signaled=0 | ||
|
|
||
| stopHAProxy() { | ||
| signaled=1 | ||
|
|
||
| # HAProxy handles SIGUSR1 by finishing its process as soon as all the current connections, | ||
| # active or not, are closed by either the client or the backend server. | ||
| echo "Sending SIGUSR1 to HAProxy process $haproxyPID" | ||
| kill -s USR1 $haproxyPID | ||
|
|
||
| # Poll the process, retuning as soon as it is not alive anymore. | ||
| for i in $(seq 1 $timeout); do | ||
| sleep 1 | ||
| if ! kill -0 $haproxyPID 2>/dev/null; then | ||
| echo "All connections are closed" | ||
| return | ||
| fi | ||
| done | ||
|
|
||
| # HAProxy handles SIGTERM by closing all the TCP connections and waiting for the full TCP handshake | ||
| # (FIN / ACK / FIN-ACK) from both sides, and only after that it finishes. `kill` runs asynchronous, | ||
| # and `wait` maintains this script alive until haproxy finishes. | ||
| echo "SIGUSR1 timed out, sending SIGTERM to HAProxy process $haproxyPID" | ||
| kill -s TERM $haproxyPID | ||
| } | ||
|
|
||
| echo "Starting HAProxy. SIGUSR1 timeout is ${timeout}s" | ||
| /usr/sbin/haproxy "$@" & | ||
| haproxyPID=$! | ||
|
|
||
| trap stopHAProxy SIGTERM SIGUSR1 SIGINT | ||
| trap "kill -s HUP $haproxyPID" SIGHUP | ||
|
|
||
| exit_code=0 | ||
| while kill -0 $haproxyPID 2>/dev/null; do | ||
| # Start `wait` again in case it returned due to a non terminate signal, e.g. SIGHUP | ||
| wait $haproxyPID || exit_code=$? | ||
| done | ||
|
|
||
| # Received signal (usually SIGTERM) takes precedence during `wait`, overriding with a clean exit | ||
| [ "$signaled" = 1 ] && exit 0 | ||
|
|
||
| echo "haproxy exited with status code $exit_code" | ||
| exit $exit_code | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ package templaterouter | |
|
|
||
| import ( | ||
| "bytes" | ||
| "context" | ||
| "crypto/md5" | ||
| "encoding/pem" | ||
| "fmt" | ||
|
|
@@ -15,10 +16,12 @@ import ( | |
| "text/template" | ||
| "time" | ||
|
|
||
| "github.com/bcicen/go-haproxy" | ||
| "github.com/fsnotify/fsnotify" | ||
| "github.com/prometheus/client_golang/prometheus" | ||
|
|
||
| "k8s.io/apimachinery/pkg/util/sets" | ||
| "k8s.io/apimachinery/pkg/util/wait" | ||
|
|
||
| routev1 "github.com/openshift/api/route/v1" | ||
|
|
||
|
|
@@ -666,8 +669,22 @@ func (r *templateRouter) writeCertificates(cfg *ServiceAliasConfig) error { | |
| return nil | ||
| } | ||
|
|
||
| // reloadRouter executes the router's reload script. | ||
| // reloadRouter reloads haproxy. | ||
| func (r *templateRouter) reloadRouter(shutdown bool) error { | ||
| adminSocket := os.Getenv("ROUTER_HAPROXY_ADMIN_UNIX_SOCKET") | ||
| if adminSocket != "" { | ||
| if shutdown { | ||
| // We are in HAProxy's master/worker mode, currently implemented as a sidecar, | ||
| // so there is no local process to handle and the sidecar one already received SIGTERM. | ||
| return nil | ||
| } | ||
| return r.reloadRouterExternal(adminSocket) | ||
| } | ||
| return r.reloadRouterEmbedded(shutdown) | ||
| } | ||
|
|
||
| // reloadRouterEmbedded executes the router's reload script. | ||
| func (r *templateRouter) reloadRouterEmbedded(shutdown bool) error { | ||
| if r.reloadFn != nil { | ||
| return r.reloadFn(shutdown) | ||
| } | ||
|
|
@@ -679,7 +696,34 @@ func (r *templateRouter) reloadRouter(shutdown bool) error { | |
| if err != nil { | ||
| return fmt.Errorf("error reloading router: %v\n%s", err, string(out)) | ||
| } | ||
| log.V(0).Info("router reloaded", "output", string(out)) | ||
| log.V(0).Info("router reloaded", "mode", "embedded", "output", string(out)) | ||
| return nil | ||
| } | ||
|
|
||
| // reloadRouterExternal sends a reload command to the external haproxy. | ||
| func (r *templateRouter) reloadRouterExternal(adminSocket string) error { | ||
| // TODO missing application's context | ||
| _ = wait.PollUntilContextCancel(context.Background(), 2*time.Second, true, func(ctx context.Context) (done bool, err error) { | ||
| _, errstat := os.Lstat(adminSocket) | ||
| if errstat != nil { | ||
| log.Info("waiting for haproxy socket", "message", errstat.Error()) | ||
| return false, nil | ||
| } | ||
| return true, nil | ||
| }) | ||
|
Comment on lines
+706
to
+713
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unbounded socket poll blocks the reload goroutine permanently if HAProxy's admin socket never appears
Additionally, the error result is discarded ( 🛡️ Proposed fix- // TODO missing application's context
- _ = wait.PollUntilContextCancel(context.Background(), 2*time.Second, true, func(ctx context.Context) (done bool, err error) {
+ pollCtx, cancel := context.WithTimeout(ctx, 30*time.Second) // propagate caller's context; add a sensible upper bound
+ defer cancel()
+ if err := wait.PollUntilContextCancel(pollCtx, 2*time.Second, true, func(ctx context.Context) (done bool, err error) {
_, errstat := os.Lstat(adminSocket)
if errstat != nil {
log.Info("waiting for haproxy socket", "message", errstat.Error())
return false, nil
}
return true, nil
- })
+ }); err != nil {
+ return fmt.Errorf("timed out waiting for haproxy admin socket %q: %w", adminSocket, err)
+ }The caller ( 🤖 Prompt for AI Agents |
||
| client := haproxy.HAProxyClient{Addr: "unix://" + adminSocket, Timeout: 10 /*seconds*/} | ||
| outputBuffer, err := client.RunCommand("reload") | ||
| if err != nil { | ||
| return fmt.Errorf("error connecting haproxy: %w", err) | ||
| } | ||
| output := outputBuffer.String() | ||
|
|
||
| // `reload` command is synchronous since haproxy 2.7, so it is safe to continue as soon as it returns. | ||
| // It should return Success=1 in the first line in case everything went well, anything else is considered a failure. | ||
| if !strings.HasPrefix(output, "Success=1") { | ||
| return fmt.Errorf("error reloading router: %s", output) | ||
| } | ||
| log.Info("router reloaded", "mode", "sidecar") | ||
| return nil | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
killcommands are unguarded underset -e— risk of premature script terminationset -euo pipefailis active for the whole script. Bothkill -s USR1 $haproxyPID(line 22) andkill -s TERM $haproxyPID(line 37) will cause the script to abort if HAProxy has already exited. The race on line 37 is realistically reachable: HAProxy can drain and die naturally during thekill -0poll loop, then the loop falls through by timeout — at that point HAProxy is already gone andkill -s TERMfails. A similar, narrower race exists at line 22.🐛 Proposed fix
Also applies to: 37-37
🤖 Prompt for AI Agents