Skip to content

Commit bf5749f

Browse files
tmshortclaude
andcommitted
Add immediate fallback dialer to eliminate Happy Eyeballs delay
Implements ImmediateFallbackDialContext that removes the 300ms delay from Go's Happy Eyeballs algorithm by trying addresses sequentially in the order returned by DNS, without racing or artificial delays. This respects DNS server address ordering (which already optimizes for the local network environment) while eliminating the delay that causes IPv6 "network is unreachable" failures in dual-stack environments where IPv6 has internal-only routing. All network clients (HTTP, Kubernetes REST, image pulls) now use the immediate fallback dialer. Signed-off-by: Todd Short <tshort@redhat.com> Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 7a60e71 commit bf5749f

4 files changed

Lines changed: 197 additions & 5 deletions

File tree

cmd/catalogd/main.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,13 @@ func init() {
149149
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
150150
utilruntime.Must(ocv1.AddToScheme(scheme))
151151
ctrl.SetLogger(klog.NewKlogr())
152+
153+
// Configure global HTTP transport to use custom dialer for all HTTP clients
154+
// including the containers/image library used for pulling from registries.
155+
// The custom dialer tries addresses in DNS order without Happy Eyeballs' 300ms delay.
156+
if err := httputil.ConfigureDefaultTransport(); err != nil {
157+
setupLog.Error(err, "Failed to configure custom dialer")
158+
}
152159
}
153160

154161
func main() {
@@ -274,7 +281,10 @@ func run(ctx context.Context) error {
274281
}
275282

276283
// Create manager
277-
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
284+
restConfig := ctrl.GetConfigOrDie()
285+
// Configure REST client to use custom dialer without Happy Eyeballs delay
286+
restConfig.Dial = httputil.ImmediateFallbackDialContext
287+
mgr, err := ctrl.NewManager(restConfig, ctrl.Options{
278288
Scheme: scheme,
279289
Metrics: metricsServerOptions,
280290
PprofBindAddress: cfg.pprofAddr,

cmd/operator-controller/main.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,13 @@ func init() {
198198
tlsprofiles.AddFlags(flags)
199199

200200
ctrl.SetLogger(klog.NewKlogr())
201+
202+
// Configure global HTTP transport to use custom dialer for all HTTP clients
203+
// including the containers/image library used for pulling from registries.
204+
// The custom dialer tries addresses in DNS order without Happy Eyeballs' 300ms delay.
205+
if err := httputil.ConfigureDefaultTransport(); err != nil {
206+
setupLog.Error(err, "Failed to configure custom dialer")
207+
}
201208
}
202209
func validateMetricsFlags() error {
203210
if (cfg.certFile != "" && cfg.keyFile == "") || (cfg.certFile == "" && cfg.keyFile != "") {
@@ -325,6 +332,8 @@ func run() error {
325332
}
326333

327334
restConfig := ctrl.GetConfigOrDie()
335+
// Configure REST client to use custom dialer without Happy Eyeballs delay
336+
restConfig.Dial = httputil.ImmediateFallbackDialContext
328337
mgr, err := ctrl.NewManager(restConfig, ctrl.Options{
329338
Scheme: scheme.Scheme,
330339
Metrics: metricsServerOptions,

internal/shared/util/http/httputil.go

Lines changed: 94 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,98 @@
11
package http
22

33
import (
4+
"context"
45
"crypto/tls"
6+
"fmt"
7+
"net"
58
"net/http"
69
"time"
10+
11+
"k8s.io/klog/v2"
712
)
813

14+
// ImmediateFallbackDialContext creates a DialContext function that tries connection
15+
// attempts sequentially in the order returned by DNS, without the 300ms Happy Eyeballs
16+
// delay. This respects DNS server ordering while eliminating the racing delay.
17+
func ImmediateFallbackDialContext(ctx context.Context, network, address string) (net.Conn, error) {
18+
// Split the address into host and port
19+
host, port, err := net.SplitHostPort(address)
20+
if err != nil {
21+
return nil, err
22+
}
23+
24+
klog.V(4).InfoS("Resolving DNS for connection", "host", host, "port", port, "network", network)
25+
26+
// Resolve all IP addresses for the host
27+
ips, err := net.DefaultResolver.LookupIP(ctx, "ip", host)
28+
if err != nil {
29+
klog.V(2).ErrorS(err, "DNS resolution failed", "host", host)
30+
return nil, err
31+
}
32+
33+
if len(ips) == 0 {
34+
err := fmt.Errorf("no IP addresses found for host %s", host)
35+
klog.V(2).ErrorS(err, "DNS resolution returned no addresses", "host", host)
36+
return nil, err
37+
}
38+
39+
klog.V(4).InfoS("DNS resolution complete", "host", host, "addressCount", len(ips))
40+
41+
dialer := &net.Dialer{
42+
Timeout: 30 * time.Second,
43+
KeepAlive: 30 * time.Second,
44+
}
45+
46+
// Try each address sequentially in the order DNS returned them
47+
var lastErr error
48+
for i, ip := range ips {
49+
// Determine address type and dial network
50+
var addrType, dialNetwork string
51+
if ip.To4() != nil {
52+
addrType = "IPv4"
53+
dialNetwork = network
54+
if network == "tcp" {
55+
dialNetwork = "tcp4"
56+
}
57+
} else {
58+
addrType = "IPv6"
59+
dialNetwork = network
60+
if network == "tcp" {
61+
dialNetwork = "tcp6"
62+
}
63+
}
64+
65+
target := net.JoinHostPort(ip.String(), port)
66+
klog.V(2).InfoS("Attempting connection", "host", host, "type", addrType,
67+
"address", ip.String(), "port", port, "attempt", i+1, "of", len(ips))
68+
69+
conn, err := dialer.DialContext(ctx, dialNetwork, target)
70+
if err == nil {
71+
klog.InfoS("Successfully connected", "host", host, "type", addrType,
72+
"address", ip.String(), "port", port)
73+
return conn, nil
74+
}
75+
klog.V(2).ErrorS(err, "Connection failed", "host", host, "type", addrType,
76+
"address", ip.String(), "port", port, "attempt", i+1, "of", len(ips))
77+
lastErr = err
78+
}
79+
80+
klog.ErrorS(lastErr, "All connection attempts failed", "host", host, "totalAttempts", len(ips))
81+
return nil, lastErr
82+
}
83+
84+
// ConfigureDefaultTransport configures http.DefaultTransport to use ImmediateFallbackDialContext.
85+
// This affects all HTTP clients that use the default transport, including the containers/image
86+
// library used for pulling from registries. Returns an error if DefaultTransport is not *http.Transport.
87+
func ConfigureDefaultTransport() error {
88+
transport, ok := http.DefaultTransport.(*http.Transport)
89+
if !ok {
90+
return fmt.Errorf("http.DefaultTransport is not *http.Transport, cannot configure custom dialer")
91+
}
92+
transport.DialContext = ImmediateFallbackDialContext
93+
return nil
94+
}
95+
996
func BuildHTTPClient(cpw *CertPoolWatcher) (*http.Client, error) {
1097
httpClient := &http.Client{Timeout: 10 * time.Second}
1198

@@ -14,13 +101,16 @@ func BuildHTTPClient(cpw *CertPoolWatcher) (*http.Client, error) {
14101
return nil, err
15102
}
16103

17-
tlsConfig := &tls.Config{
104+
// Clone the default transport to inherit custom dialer and other defaults
105+
transport, ok := http.DefaultTransport.(*http.Transport)
106+
if !ok {
107+
return nil, fmt.Errorf("http.DefaultTransport is not *http.Transport, cannot build HTTP client")
108+
}
109+
tlsTransport := transport.Clone()
110+
tlsTransport.TLSClientConfig = &tls.Config{
18111
RootCAs: pool,
19112
MinVersion: tls.VersionTLS12,
20113
}
21-
tlsTransport := &http.Transport{
22-
TLSClientConfig: tlsConfig,
23-
}
24114
httpClient.Transport = tlsTransport
25115

26116
return httpClient, nil
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
package http
2+
3+
import (
4+
"context"
5+
"net"
6+
"testing"
7+
)
8+
9+
func TestImmediateFallbackDialContext(t *testing.T) {
10+
tests := []struct {
11+
name string
12+
address string
13+
wantFail bool
14+
minExpectedAddrs int // minimum addresses we expect to find
15+
}{
16+
{
17+
name: "dual-stack hostname tries addresses in DNS order",
18+
address: "localhost:80",
19+
wantFail: true, // nothing listening on port 80
20+
minExpectedAddrs: 1, // should have at least one address
21+
},
22+
{
23+
name: "IPv4-only hostname",
24+
address: "127.0.0.1:80",
25+
wantFail: true,
26+
minExpectedAddrs: 1,
27+
},
28+
{
29+
name: "IPv6-only hostname",
30+
address: "[::1]:80",
31+
wantFail: true,
32+
minExpectedAddrs: 1,
33+
},
34+
}
35+
36+
for _, tt := range tests {
37+
t.Run(tt.name, func(t *testing.T) {
38+
ctx := context.Background()
39+
40+
// Parse the address to extract host for DNS lookup
41+
host, _, err := net.SplitHostPort(tt.address)
42+
if err != nil {
43+
t.Fatalf("Failed to split host:port: %v", err)
44+
}
45+
46+
// Look up IPs to verify DNS resolution works
47+
ips, err := net.DefaultResolver.LookupIP(ctx, "ip", host)
48+
if err != nil {
49+
t.Skipf("DNS resolution failed for %s: %v (this is OK for test environments)", host, err)
50+
}
51+
52+
if len(ips) < tt.minExpectedAddrs {
53+
t.Skip("Not enough IP addresses found for hostname")
54+
}
55+
56+
t.Logf("DNS returned %d address(es) - will try each in order:", len(ips))
57+
58+
// Log all addresses for debugging
59+
for i, ip := range ips {
60+
ipType := "IPv6"
61+
if ip.To4() != nil {
62+
ipType = "IPv4"
63+
}
64+
t.Logf(" [%d] %s (%s)", i, ip.String(), ipType)
65+
}
66+
67+
// Actually call the dialer function
68+
_, err = ImmediateFallbackDialContext(ctx, "tcp", tt.address)
69+
70+
if tt.wantFail {
71+
if err == nil {
72+
t.Errorf("Expected connection to fail, but it succeeded")
73+
} else {
74+
t.Logf("Connection failed as expected: %v", err)
75+
}
76+
} else {
77+
if err != nil {
78+
t.Errorf("Expected connection to succeed, but got error: %v", err)
79+
}
80+
}
81+
})
82+
}
83+
}

0 commit comments

Comments
 (0)