From 4f3fb26e5ad33d968ba6aba6135546f946bd11cb Mon Sep 17 00:00:00 2001 From: hugo Date: Thu, 18 Jun 2026 01:15:48 +0000 Subject: [PATCH] =?UTF-8?q?registry:=20real=20control-plane=20resolver=20?= =?UTF-8?q?=E2=80=94=20k8s=20Service=20lookup=20by=20resource-id=20label?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cached/chain resolver strategies had their cache machinery built but their LookupFunc was a STUB (conventionLookup) that fell back to a naming-template guess. That guess can't work for Saturn inference: the model's Service name is pd-{identity5}-{name}-{id}, which embeds the owning group and endpoint name — neither of which phoebe receives. So static/convention were walking-skeleton modes; the real multi-model design (cached/chain) was never wired. Implement the real LookupFunc (internal/registry/k8s.go): resolve a deployment id to its model Service by the saturncloud.io/resource-id label, read off the in-cluster Kubernetes API. That label's value IS X-Saturn-Resource-Id (Saturn stamps it on every inference Service via basic_resource_labels), so it's an exact join key — no name reconstruction, no new Atlas API, no new headers. It's self-correcting: the Service NAME template can change Atlas-side without breaking phoebe. - Select `resource-id=,service-type=internal` — the service-type clause is load-bearing: a deployment's ssh Service shares the resource-id label, so without it the match is ambiguous. (Tested.) - Port: prefer the port named "8000" (Route.port_name == str(container_port), and the vLLM serve port is 8000 == Deployment.proxy_port); single-port fallback; error rather than silently pick a wrong port on a multi-port Service. (Tested.) - 0 matches → ErrNotFound (CachedResolver negative-caches, short TTL, so a new model is reachable fast). API error / ambiguous / no-served-port → transient error (NOT cached, retried). (Tested.) - Wire k8sLookup into buildResolver for cached/chain (replacing conventionLookup). chain keeps convention as a fallback ONLY for k8s-API-unreachable. - New config registry.k8sNamespace (Saturn: "main-namespace"); required for cached/chain. Fixed the misleading placeholder convention comment in settings.example.yaml so nobody flips strategy:convention and gets silent 404s. - Adds client-go (in-cluster client). Unit-tested with the fake clientset — no cluster needed; 8 named tests covering the invariants + negatives + attacks. ROUTING CONTRACT (one-way door, for Hugo's review): phoebe now depends on two Atlas-owned k8s conventions for inference routing — (1) inference Services carry saturncloud.io/resource-id + service-type=internal; (2) the served port is 8000. Both hold today; this promotes them to a documented contract so an Atlas refactor can't silently break routing. No Atlas change required to ship this. Deploy note: the interceptor pod needs an RBAC Role granting get/list on services in main-namespace (a saturn-k8s chart change, separate PR). Co-Authored-By: Claude Opus 4.8 --- cmd/interceptor/main.go | 46 +++++++---- config/settings.example.yaml | 24 ++++-- go.mod | 40 ++++++++- go.sum | 140 +++++++++++++++++++++++++++++-- internal/config/config.go | 5 ++ internal/registry/k8s.go | 144 ++++++++++++++++++++++++++++++++ internal/registry/k8s_test.go | 150 ++++++++++++++++++++++++++++++++++ 7 files changed, 522 insertions(+), 27 deletions(-) create mode 100644 internal/registry/k8s.go create mode 100644 internal/registry/k8s_test.go diff --git a/cmd/interceptor/main.go b/cmd/interceptor/main.go index 0ac1fb4..0bc8669 100644 --- a/cmd/interceptor/main.go +++ b/cmd/interceptor/main.go @@ -3,7 +3,7 @@ package main import ( "context" "flag" - "net/url" + "fmt" "os" "time" @@ -80,23 +80,23 @@ func buildResolver(s *config.Settings, log *logging.Logger) (registry.Resolver, }) case "cached": - conv, err := registry.NewConventionResolver(registry.ConventionConfig{Template: rs.ConventionTemplate}) + lookup, err := k8sLookup(rs) if err != nil { return nil, err } - log.Info.Printf("resolver: cached (lookup degrades to convention until control-plane wired)") - return registry.NewCachedResolver(conventionLookup(conv), registry.CacheConfig{ + log.Info.Printf("resolver: cached (k8s label lookup in namespace %q)", rs.K8sNamespace) + return registry.NewCachedResolver(lookup, registry.CacheConfig{ Size: rs.CacheSize, PositiveTTL: rs.PositiveTTL, NegativeTTL: rs.NegativeTTL, }) case "chain": - conv, err := registry.NewConventionResolver(registry.ConventionConfig{Template: rs.ConventionTemplate}) + lookup, err := k8sLookup(rs) if err != nil { return nil, err } - cached, err := registry.NewCachedResolver(conventionLookup(conv), registry.CacheConfig{ + cached, err := registry.NewCachedResolver(lookup, registry.CacheConfig{ Size: rs.CacheSize, PositiveTTL: rs.PositiveTTL, NegativeTTL: rs.NegativeTTL, @@ -104,9 +104,16 @@ func buildResolver(s *config.Settings, log *logging.Logger) (registry.Resolver, if err != nil { return nil, err } - // Cached control-plane lookup first, naming-convention fallback if it - // errors — graceful degradation when the control plane is unreachable. - log.Info.Printf("resolver: chain (cached → convention)") + conv, err := registry.NewConventionResolver(registry.ConventionConfig{Template: rs.ConventionTemplate}) + if err != nil { + return nil, err + } + // Cached k8s label lookup first; convention fallback only if the k8s API + // is unreachable (transient). The convention template is a best-effort + // guess at the Service name and is NOT guaranteed correct for Saturn's + // pd-{identity5}-{name}-{id} scheme — prefer plain "cached" unless your + // convention template matches your cluster. + log.Info.Printf("resolver: chain (cached k8s → convention fallback)") return registry.ChainResolver{cached, conv}, nil default: @@ -114,12 +121,23 @@ func buildResolver(s *config.Settings, log *logging.Logger) (registry.Resolver, } } -// conventionLookup adapts a ConventionResolver to a registry.LookupFunc so it -// can stand in for the (not-yet-wired) control-plane lookup. -func conventionLookup(conv *registry.ConventionResolver) registry.LookupFunc { - return func(_ context.Context, resourceID string) (*url.URL, error) { - return conv.Resolve(resourceID) +// k8sLookup builds the real control-plane LookupFunc: resolve a deployment id to +// its model Service via the saturncloud.io/resource-id label, using the +// in-cluster Kubernetes API. Requires an RBAC Role granting get/list on services +// in rs.K8sNamespace. +func k8sLookup(rs config.RegistrySettings) (registry.LookupFunc, error) { + if rs.K8sNamespace == "" { + return nil, fmt.Errorf("registry.k8sNamespace is required for the %q strategy "+ + "(the namespace inference Services live in, e.g. \"main-namespace\")", rs.Strategy) + } + client, err := registry.InClusterClient() + if err != nil { + return nil, fmt.Errorf("registry: %w", err) } + return registry.NewK8sLookup(registry.K8sLookupConfig{ + Namespace: rs.K8sNamespace, + Client: client, + }) } // buildEmitter constructs the durable metering emitter. When ValkeyAddr is set diff --git a/config/settings.example.yaml b/config/settings.example.yaml index da4fc29..255d156 100644 --- a/config/settings.example.yaml +++ b/config/settings.example.yaml @@ -19,14 +19,24 @@ billPartialOnAbort: true # Model → upstream dispatch (M4). registry: - # static — always use defaultUpstream (above). Single-model / dev. - # convention — derive the upstream from a Service-naming template; new models - # are reachable with no redeploy. - # cached — control-plane lookup with an LRU + TTL cache (lookup currently - # degrades to the convention template until the Atlas resource - # path is wired). - # chain — cached, falling back to convention if the lookup errors. + # static — always use defaultUpstream (above). Single-model / dev / first + # deploy: point defaultUpstream at the one served model's Service. + # cached — resolve each model's upstream from its in-cluster Service via the + # saturncloud.io/resource-id label (== X-Saturn-Resource-Id), LRU + + # TTL cached. THIS is the real multi-model strategy. Needs + # k8sNamespace (below) + an RBAC Role granting get/list on services. + # chain — cached k8s lookup, falling back to the convention template only if + # the k8s API is unreachable. + # convention — derive the upstream from a Service-naming TEMPLATE. NOTE: the + # template below is a PLACEHOLDER and does NOT match Saturn's real + # Service name (pd-{identity5}-{name}-{id} in main-namespace), which + # embeds pieces phoebe never receives. Do not rely on convention for + # Saturn inference unless your template matches your cluster — prefer + # "cached". strategy: "static" + # Namespace inference Services live in (Saturn: "main-namespace"). Required for + # cached/chain; ignored by static/convention. + k8sNamespace: "" conventionTemplate: "http://model-{id}.inference.svc.cluster.local:8000" cacheSize: 4096 cachePositiveTTL: "5m" diff --git a/go.mod b/go.mod index a8d02e2..ee1050a 100644 --- a/go.mod +++ b/go.mod @@ -11,22 +11,60 @@ require ( github.com/jackc/pgx/v5 v5.7.5 github.com/redis/go-redis/v9 v9.11.0 github.com/tidwall/wal v1.2.1 + k8s.io/api v0.31.3 + k8s.io/apimachinery v0.31.3 + k8s.io/client-go v0.31.3 ) require ( github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-openapi/jsonpointer v0.19.6 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.22.4 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect - github.com/kr/text v0.2.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/errors v0.9.1 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect github.com/tidwall/gjson v1.10.2 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect github.com/tidwall/tinylru v1.1.0 // indirect + github.com/x448/float16 v0.8.4 // indirect github.com/yuin/gopher-lua v1.1.1 // indirect golang.org/x/crypto v0.37.0 // indirect + golang.org/x/net v0.26.0 // indirect + golang.org/x/oauth2 v0.21.0 // indirect golang.org/x/sync v0.13.0 // indirect + golang.org/x/sys v0.32.0 // indirect + golang.org/x/term v0.31.0 // indirect golang.org/x/text v0.24.0 // indirect + golang.org/x/time v0.3.0 // indirect + google.golang.org/protobuf v1.34.2 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/go.sum b/go.sum index b2ea01f..f5ceec9 100644 --- a/go.sum +++ b/go.sum @@ -10,10 +10,42 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU= +github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af h1:kmjWCqn2qkEml422C2Rrd27c3VGxi6a/6HNq8QmHRKM= +github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= @@ -24,22 +56,54 @@ github.com/jackc/pgx/v5 v5.7.5 h1:JHGfMnQY+IEtGM63d+NGMjoRpysB2JBwDr5fsngwmJs= github.com/jackc/pgx/v5 v5.7.5/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M= github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE= -github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= -github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= +github.com/onsi/gomega v1.19.0 h1:4ieX6qQjPP/BfC3mpsAtIGGlxTWPeA3Inl/7DtXw1tw= +github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/redis/go-redis/v9 v9.11.0 h1:E3S08Gl/nJNn5vkxd2i78wZxWAPNZgUNTp8WIJUAiIs= github.com/redis/go-redis/v9 v9.11.0/go.mod h1:huWgSWd8mW6+m0VPhJjSSQ+d6Nh1VICQ6Q5lHuCH/Iw= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tidwall/gjson v1.10.2 h1:APbLGOM0rrEkd8WBw9C24nllro4ajFuJu0Sc9hRz8Bo= github.com/tidwall/gjson v1.10.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= @@ -50,19 +114,85 @@ github.com/tidwall/tinylru v1.1.0 h1:XY6IUfzVTU9rpwdhKUF6nQdChgCdGjkMfLzbWyiau6I github.com/tidwall/tinylru v1.1.0/go.mod h1:3+bX+TJ2baOLMWTnlyNWHh4QMnFyARg2TLTQ6OFbzw8= github.com/tidwall/wal v1.2.1 h1:xQvwnRF3e+xBC4NvFvl1mPGJHU0aH5zNzlUKnKGIImA= github.com/tidwall/wal v1.2.1/go.mod h1:r6lR1j27W9EPalgHiB7zLJDYu3mzW5BQP5KrzBpYY/E= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M= github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= +golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= +golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610= golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= +golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.31.0 h1:erwDkOK1Msy6offm1mOgvspSkslFnIGsFnxOKoufg3o= +golang.org/x/term v0.31.0/go.mod h1:R4BeIy7D95HzImkxGkTW1UQTtP54tio2RyHz7PwK0aw= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= +golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= +golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.31.3 h1:umzm5o8lFbdN/hIXbrK9oRpOproJO62CV1zqxXrLgk8= +k8s.io/api v0.31.3/go.mod h1:UJrkIp9pnMOI9K2nlL6vwpxRzzEX5sWgn8kGQe92kCE= +k8s.io/apimachinery v0.31.3 h1:6l0WhcYgasZ/wk9ktLq5vLaoXJJr5ts6lkaQzgeYPq4= +k8s.io/apimachinery v0.31.3/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/client-go v0.31.3 h1:CAlZuM+PH2cm+86LOBemaJI/lQ5linJ6UFxKX/SoG+4= +k8s.io/client-go v0.31.3/go.mod h1:2CgjPUTpv3fE5dNygAr2NcM8nhHzXvxB8KL5gYc3kJs= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/internal/config/config.go b/internal/config/config.go index 40f61c6..e3d5f8a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -78,6 +78,11 @@ type RegistrySettings struct { CachePositiveTTL string `yaml:"cachePositiveTTL"` CacheNegativeTTL string `yaml:"cacheNegativeTTL"` + // K8sNamespace is the namespace the cached/chain resolver lists inference + // Services in (Saturn: "main-namespace"). Required for cached/chain; ignored + // by static/convention. + K8sNamespace string `yaml:"k8sNamespace"` + // Parsed durations. PositiveTTL time.Duration `yaml:"-"` NegativeTTL time.Duration `yaml:"-"` diff --git a/internal/registry/k8s.go b/internal/registry/k8s.go new file mode 100644 index 0000000..0cb0752 --- /dev/null +++ b/internal/registry/k8s.go @@ -0,0 +1,144 @@ +package registry + +import ( + "context" + "fmt" + "net/url" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +// k8s.go implements the REAL control-plane LookupFunc: resolve a deployment's +// upstream by reading its in-cluster Service, selected by the +// saturncloud.io/resource-id label. +// +// WHY this and not a naming convention or an Atlas API call: +// - Saturn stamps `saturncloud.io/resource-id: ` on every +// inference Service (pdc basic_resource_labels), and phoebe receives that +// exact id as X-Saturn-Resource-Id. So the label IS the join key — no name +// reconstruction (the Service name embeds the group prefix + endpoint name, +// which phoebe never receives) and no new Atlas API/headers. +// - The Service name template can change on the Atlas side without breaking +// phoebe; the label is set by the same resource-labels path that labels +// every Saturn object, so this resolution is self-correcting. +// +// ROUTING CONTRACT (the one-way door phoebe now depends on — surfaced for Hugo): +// 1. inference Services carry `saturncloud.io/resource-id: ` and +// `saturncloud.io/service-type: internal`. +// 2. the served (vLLM) port is 8000 (== Deployment.proxy_port); the Service +// port is named "8000" (Route.port_name = str(container_port)). +// A deployment also has an ssh Service sharing the resource-id label, so the +// service-type=internal selector is REQUIRED to pick the model endpoint, not ssh. + +const ( + // labelResourceID is the Saturn label whose value equals the deployment id + // phoebe receives as X-Saturn-Resource-Id (pdc labels.py Labels.RESOURCE_ID). + labelResourceID = "saturncloud.io/resource-id" + // labelServiceType distinguishes the model Service ("internal") from the + // deployment's ssh Service ("ssh"), both of which carry the resource-id label. + labelServiceType = "saturncloud.io/service-type" + serviceTypeInternal = "internal" + // servedPortName is the Service port name for the vLLM serve port. Route + // port names are str(container_port); the served port is 8000. + servedPortName = "8000" +) + +// K8sLookupConfig configures the label-based Service resolver. +type K8sLookupConfig struct { + // Namespace is the namespace inference Services live in (Saturn: "main-namespace"). + Namespace string + // Client is the Kubernetes clientset. Injected so the lookup is unit-testable + // against a fake clientset; production builds it from the in-cluster config. + Client kubernetes.Interface +} + +// NewK8sLookup returns a LookupFunc that resolves a deployment id to its model +// Service's cluster-local URL via the saturncloud.io/resource-id label. +// +// It returns ErrNotFound when no internal Service carries the id (unknown or +// torn-down model — negative-cached by the CachedResolver with a short TTL). Any +// other error (API failure, ambiguous match, no served port) is transient and is +// NOT cached, so the next request retries. +func NewK8sLookup(cfg K8sLookupConfig) (LookupFunc, error) { + if cfg.Namespace == "" { + return nil, fmt.Errorf("registry: K8sLookup requires a non-empty Namespace") + } + if cfg.Client == nil { + return nil, fmt.Errorf("registry: K8sLookup requires a non-nil Client") + } + ns := cfg.Namespace + client := cfg.Client + return func(ctx context.Context, resourceID string) (*url.URL, error) { + // Select the model's internal Service. The service-type=internal clause is + // load-bearing: the deployment's ssh Service shares the resource-id label. + selector := fmt.Sprintf("%s=%s,%s=%s", + labelResourceID, resourceID, labelServiceType, serviceTypeInternal) + list, err := client.CoreV1().Services(ns).List(ctx, metav1.ListOptions{ + LabelSelector: selector, + Limit: 2, // 0/1 expected; >1 is an error, so 2 is enough to detect it. + }) + if err != nil { + // Transient: kube API unreachable/throttled. Not ErrNotFound, so the + // CachedResolver treats it as a retryable error (not negative-cached). + return nil, fmt.Errorf("registry: list services for %s: %w", resourceID, err) + } + switch len(list.Items) { + case 0: + return nil, ErrNotFound + case 1: + // ok + default: + return nil, fmt.Errorf("registry: ambiguous upstream for %s: %d internal services match", + resourceID, len(list.Items)) + } + + svc := list.Items[0] + port, err := servedPort(&svc) + if err != nil { + return nil, fmt.Errorf("registry: %s: %w", resourceID, err) + } + raw := fmt.Sprintf("http://%s.%s.svc.cluster.local:%d", svc.Name, ns, port) + u, err := url.Parse(raw) + if err != nil { + return nil, fmt.Errorf("registry: resolved invalid URL %q for %s: %w", raw, resourceID, err) + } + return u, nil + }, nil +} + +// servedPort picks the vLLM serve port (8000) from a model Service. It prefers +// the port NAMED "8000" (Route.port_name == str(container_port)); if the Service +// has exactly one port it uses that; otherwise it errors rather than guess (a +// deployment can expose multiple route ports, and silently picking the wrong one +// would route metered traffic to the wrong listener). +func servedPort(svc *corev1.Service) (int32, error) { + ports := svc.Spec.Ports + for _, p := range ports { + if p.Name == servedPortName { + return p.Port, nil + } + } + if len(ports) == 1 { + return ports[0].Port, nil + } + return 0, fmt.Errorf("no served port: want a port named %q among %d ports on service %s", + servedPortName, len(ports), svc.Name) +} + +// InClusterClient builds a Kubernetes clientset from the in-cluster service +// account config. phoebe's interceptor runs as a pod, so this is the production +// path; it requires an RBAC Role granting get/list on services in the namespace. +func InClusterClient() (kubernetes.Interface, error) { + cfg, err := rest.InClusterConfig() + if err != nil { + return nil, fmt.Errorf("registry: in-cluster config: %w", err) + } + cs, err := kubernetes.NewForConfig(cfg) + if err != nil { + return nil, fmt.Errorf("registry: build clientset: %w", err) + } + return cs, nil +} diff --git a/internal/registry/k8s_test.go b/internal/registry/k8s_test.go new file mode 100644 index 0000000..2753df9 --- /dev/null +++ b/internal/registry/k8s_test.go @@ -0,0 +1,150 @@ +package registry + +import ( + "context" + "errors" + "testing" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" +) + +const testNS = "main-namespace" + +// svc builds a Service with the given name, labels, and ports for the fake clientset. +func svc(name string, labels map[string]string, ports ...corev1.ServicePort) *corev1.Service { + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: testNS, Labels: labels}, + Spec: corev1.ServiceSpec{Ports: ports}, + } +} + +func port(name string, p int32) corev1.ServicePort { + return corev1.ServicePort{Name: name, Port: p} +} + +// internalLabels is the model Service's label set: resource-id + service-type=internal. +func internalLabels(id string) map[string]string { + return map[string]string{labelResourceID: id, labelServiceType: serviceTypeInternal} +} + +func newLookup(t *testing.T, objs ...*corev1.Service) LookupFunc { + t.Helper() + runtimeObjs := make([]runtime.Object, 0, len(objs)) + for _, o := range objs { + runtimeObjs = append(runtimeObjs, o) + } + client := fake.NewSimpleClientset(runtimeObjs...) + lf, err := NewK8sLookup(K8sLookupConfig{Namespace: testNS, Client: client}) + if err != nil { + t.Fatalf("NewK8sLookup: %v", err) + } + return lf +} + +func TestK8sLookup_ResolvesInternalService(t *testing.T) { + lf := newLookup(t, + svc("pd-abcde-mymodel-r123", internalLabels("r123"), port("8000", 8000)), + ) + u, err := lf(context.Background(), "r123") + if err != nil { + t.Fatalf("lookup: %v", err) + } + want := "http://pd-abcde-mymodel-r123.main-namespace.svc.cluster.local:8000" + if u.String() != want { + t.Errorf("url = %q, want %q", u.String(), want) + } +} + +// The deployment's ssh Service shares the resource-id label; the service-type +// selector MUST exclude it. If it didn't, this would be an ambiguous match. +func TestK8sLookup_IgnoresSSHService(t *testing.T) { + lf := newLookup(t, + svc("pd-abcde-mymodel-r123", internalLabels("r123"), port("8000", 8000)), + svc("pd-abcde-mymodel-r123-ssh", + map[string]string{labelResourceID: "r123", labelServiceType: "ssh"}, + port("ssh", 22)), + ) + u, err := lf(context.Background(), "r123") + if err != nil { + t.Fatalf("lookup: %v", err) + } + if u.Port() != "8000" { + t.Errorf("resolved the ssh service (port %s), want the internal :8000", u.Port()) + } +} + +func TestK8sLookup_UnknownIsErrNotFound(t *testing.T) { + lf := newLookup(t, + svc("pd-abcde-other-r999", internalLabels("r999"), port("8000", 8000)), + ) + _, err := lf(context.Background(), "r123") + if !errors.Is(err, ErrNotFound) { + t.Errorf("err = %v, want ErrNotFound (so the cache negative-caches it)", err) + } +} + +// Two internal Services with the same id is a can't-happen state; if it ever +// occurs we must error (transient, not cached), never silently pick one. +func TestK8sLookup_AmbiguousIsError(t *testing.T) { + lf := newLookup(t, + svc("svc-a", internalLabels("r123"), port("8000", 8000)), + svc("svc-b", internalLabels("r123"), port("8000", 8000)), + ) + _, err := lf(context.Background(), "r123") + if err == nil { + t.Fatal("want an error for ambiguous match, got nil") + } + if errors.Is(err, ErrNotFound) { + t.Error("ambiguous must NOT be ErrNotFound (it must not be negative-cached)") + } +} + +// A single unnamed port is used as-is (defensive: not every Service names 8000). +func TestK8sLookup_SinglePortFallback(t *testing.T) { + lf := newLookup(t, + svc("pd-x-r123", internalLabels("r123"), port("", 8000)), + ) + u, err := lf(context.Background(), "r123") + if err != nil { + t.Fatalf("lookup: %v", err) + } + if u.Port() != "8000" { + t.Errorf("port = %s, want 8000", u.Port()) + } +} + +// Multiple ports without an 8000-named one is an error, not a silent wrong pick. +func TestK8sLookup_MultiPortNoServedIsError(t *testing.T) { + lf := newLookup(t, + svc("pd-x-r123", internalLabels("r123"), port("http", 80), port("metrics", 9090)), + ) + _, err := lf(context.Background(), "r123") + if err == nil { + t.Fatal("want an error when no served port (8000) is identifiable, got nil") + } +} + +func TestK8sLookup_PrefersNamed8000OverOthers(t *testing.T) { + lf := newLookup(t, + svc("pd-x-r123", internalLabels("r123"), port("metrics", 9090), port("8000", 8000)), + ) + u, err := lf(context.Background(), "r123") + if err != nil { + t.Fatalf("lookup: %v", err) + } + if u.Port() != "8000" { + t.Errorf("port = %s, want the named 8000 port", u.Port()) + } +} + +func TestNewK8sLookup_RequiresNamespaceAndClient(t *testing.T) { + if _, err := NewK8sLookup(K8sLookupConfig{Client: fake.NewSimpleClientset()}); err == nil { + t.Error("want error for empty Namespace") + } + if _, err := NewK8sLookup(K8sLookupConfig{Namespace: testNS}); err == nil { + t.Error("want error for nil Client") + } +}