Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 32 additions & 14 deletions cmd/interceptor/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package main
import (
"context"
"flag"
"net/url"
"fmt"
"os"
"time"

Expand Down Expand Up @@ -80,46 +80,64 @@ func buildResolver(s *config.Settings, log *logging.Logger) (registry.Resolver,
})

case "cached":
conv, err := registry.NewConventionResolver(registry.ConventionConfig{Template: rs.ConventionTemplate})
lookup, err := k8sLookup(rs)
if err != nil {
return nil, err
}
log.Info.Printf("resolver: cached (lookup degrades to convention until control-plane wired)")
return registry.NewCachedResolver(conventionLookup(conv), registry.CacheConfig{
log.Info.Printf("resolver: cached (k8s label lookup in namespace %q)", rs.K8sNamespace)
return registry.NewCachedResolver(lookup, registry.CacheConfig{
Size: rs.CacheSize,
PositiveTTL: rs.PositiveTTL,
NegativeTTL: rs.NegativeTTL,
})

case "chain":
conv, err := registry.NewConventionResolver(registry.ConventionConfig{Template: rs.ConventionTemplate})
lookup, err := k8sLookup(rs)
if err != nil {
return nil, err
}
cached, err := registry.NewCachedResolver(conventionLookup(conv), registry.CacheConfig{
cached, err := registry.NewCachedResolver(lookup, registry.CacheConfig{
Size: rs.CacheSize,
PositiveTTL: rs.PositiveTTL,
NegativeTTL: rs.NegativeTTL,
})
if err != nil {
return nil, err
}
// Cached control-plane lookup first, naming-convention fallback if it
// errors — graceful degradation when the control plane is unreachable.
log.Info.Printf("resolver: chain (cached → convention)")
conv, err := registry.NewConventionResolver(registry.ConventionConfig{Template: rs.ConventionTemplate})
if err != nil {
return nil, err
}
// Cached k8s label lookup first; convention fallback only if the k8s API
// is unreachable (transient). The convention template is a best-effort
// guess at the Service name and is NOT guaranteed correct for Saturn's
// pd-{identity5}-{name}-{id} scheme — prefer plain "cached" unless your
// convention template matches your cluster.
log.Info.Printf("resolver: chain (cached k8s → convention fallback)")
return registry.ChainResolver{cached, conv}, nil

default:
return registry.NewStatic(s.Default), nil
}
}

// conventionLookup adapts a ConventionResolver to a registry.LookupFunc so it
// can stand in for the (not-yet-wired) control-plane lookup.
func conventionLookup(conv *registry.ConventionResolver) registry.LookupFunc {
return func(_ context.Context, resourceID string) (*url.URL, error) {
return conv.Resolve(resourceID)
// k8sLookup builds the real control-plane LookupFunc: resolve a deployment id to
// its model Service via the saturncloud.io/resource-id label, using the
// in-cluster Kubernetes API. Requires an RBAC Role granting get/list on services
// in rs.K8sNamespace.
func k8sLookup(rs config.RegistrySettings) (registry.LookupFunc, error) {
if rs.K8sNamespace == "" {
return nil, fmt.Errorf("registry.k8sNamespace is required for the %q strategy "+
"(the namespace inference Services live in, e.g. \"main-namespace\")", rs.Strategy)
}
client, err := registry.InClusterClient()
if err != nil {
return nil, fmt.Errorf("registry: %w", err)
}
return registry.NewK8sLookup(registry.K8sLookupConfig{
Namespace: rs.K8sNamespace,
Client: client,
})
}

// buildEmitter constructs the durable metering emitter. When ValkeyAddr is set
Expand Down
24 changes: 17 additions & 7 deletions config/settings.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,24 @@ billPartialOnAbort: true

# Model → upstream dispatch (M4).
registry:
# static — always use defaultUpstream (above). Single-model / dev.
# convention — derive the upstream from a Service-naming template; new models
# are reachable with no redeploy.
# cached — control-plane lookup with an LRU + TTL cache (lookup currently
# degrades to the convention template until the Atlas resource
# path is wired).
# chain — cached, falling back to convention if the lookup errors.
# static — always use defaultUpstream (above). Single-model / dev / first
# deploy: point defaultUpstream at the one served model's Service.
# cached — resolve each model's upstream from its in-cluster Service via the
# saturncloud.io/resource-id label (== X-Saturn-Resource-Id), LRU +
# TTL cached. THIS is the real multi-model strategy. Needs
# k8sNamespace (below) + an RBAC Role granting get/list on services.
# chain — cached k8s lookup, falling back to the convention template only if
# the k8s API is unreachable.
# convention — derive the upstream from a Service-naming TEMPLATE. NOTE: the
# template below is a PLACEHOLDER and does NOT match Saturn's real
# Service name (pd-{identity5}-{name}-{id} in main-namespace), which
# embeds pieces phoebe never receives. Do not rely on convention for
# Saturn inference unless your template matches your cluster — prefer
# "cached".
strategy: "static"
# Namespace inference Services live in (Saturn: "main-namespace"). Required for
# cached/chain; ignored by static/convention.
k8sNamespace: ""
conventionTemplate: "http://model-{id}.inference.svc.cluster.local:8000"
cacheSize: 4096
cachePositiveTTL: "5m"
Expand Down
40 changes: 39 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,60 @@ require (
github.com/jackc/pgx/v5 v5.7.5
github.com/redis/go-redis/v9 v9.11.0
github.com/tidwall/wal v1.2.1
k8s.io/api v0.31.3
k8s.io/apimachinery v0.31.3
k8s.io/client-go v0.31.3
)

require (
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.22.4 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
github.com/jackc/puddle/v2 v2.2.2 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/rogpeppe/go-internal v1.14.1 // indirect
github.com/tidwall/gjson v1.10.2 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
github.com/tidwall/tinylru v1.1.0 // indirect
github.com/x448/float16 v0.8.4 // indirect
github.com/yuin/gopher-lua v1.1.1 // indirect
golang.org/x/crypto v0.37.0 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/sync v0.13.0 // indirect
golang.org/x/sys v0.32.0 // indirect
golang.org/x/term v0.31.0 // indirect
golang.org/x/text v0.24.0 // indirect
golang.org/x/time v0.3.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
)
Loading
Loading