@@ -20,9 +20,11 @@ import (
2020 "context"
2121 "time"
2222
23+ "github.com/go-logr/logr"
2324 "sigs.k8s.io/controller-runtime/pkg/log"
2425
2526 backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
27+ "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/flowcontrol/contracts"
2628 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/flowcontrol/types"
2729 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/handlers"
2830 errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error"
@@ -40,7 +42,6 @@ type AdmissionController interface {
4042 // Args:
4143 // ctx: The request context, carrying deadlines, cancellation signals, and logger.
4244 // reqCtx: The handlers.RequestContext containing details about the incoming request.
43- // candidatePods: A list of potential backend pods that can serve the request.
4445 // priority: The priority level of the request, as determined by the InferenceObjective.
4546 //
4647 // Returns:
@@ -49,7 +50,6 @@ type AdmissionController interface {
4950 Admit (
5051 ctx context.Context ,
5152 reqCtx * handlers.RequestContext ,
52- candidatePods []backendmetrics.PodMetrics ,
5353 priority int ,
5454 ) error
5555}
@@ -65,18 +65,17 @@ type flowController interface {
6565 EnqueueAndWait (ctx context.Context , req types.FlowControlRequest ) (types.QueueOutcome , error )
6666}
6767
68- // rejectIfSheddableAndSaturated checks if a request should be immediately rejected because it's sheddable
69- // (priority < 0) and the system is saturated.
68+ // rejectIfSheddableAndSaturated checks if a request should be immediately rejected.
7069func rejectIfSheddableAndSaturated (
7170 ctx context.Context ,
7271 sd saturationDetector ,
72+ locator contracts.PodLocator ,
7373 reqCtx * handlers.RequestContext ,
74- candidatePods []backendmetrics.PodMetrics ,
7574 priority int ,
75+ logger logr.Logger ,
7676) error {
7777 if requtil .IsSheddable (priority ) {
78- logger := log .FromContext (ctx )
79- if sd .IsSaturated (ctx , candidatePods ) {
78+ if sd .IsSaturated (ctx , locator .Locate (ctx , reqCtx .Request .Metadata )) {
8079 logger .V (logutil .TRACE ).Info ("Request rejected: system saturated and request is sheddable" ,
8180 "requestID" , reqCtx .SchedulingRequest .RequestId )
8281 return errutil.Error {
@@ -95,25 +94,37 @@ func rejectIfSheddableAndSaturated(
9594// saturated. Non-sheddable requests always bypass the saturation check.
9695type LegacyAdmissionController struct {
9796 saturationDetector saturationDetector
97+ podLocator contracts.PodLocator
9898}
9999
100100// NewLegacyAdmissionController creates a new LegacyAdmissionController.
101- func NewLegacyAdmissionController (sd saturationDetector ) * LegacyAdmissionController {
102- return & LegacyAdmissionController {saturationDetector : sd }
101+ func NewLegacyAdmissionController (
102+ sd saturationDetector ,
103+ pl contracts.PodLocator ,
104+ ) * LegacyAdmissionController {
105+ return & LegacyAdmissionController {
106+ saturationDetector : sd ,
107+ podLocator : pl ,
108+ }
103109}
104110
105111// Admit implements the AdmissionController interface for the legacy strategy.
106112// It checks for saturation only for requests with priority < 0.
107113func (lac * LegacyAdmissionController ) Admit (
108114 ctx context.Context ,
109115 reqCtx * handlers.RequestContext ,
110- candidatePods []backendmetrics.PodMetrics ,
111116 priority int ,
112117) error {
113118 logger := log .FromContext (ctx )
114119 logger .V (logutil .TRACE ).Info ("Executing LegacyAdmissionController" ,
115120 "priority" , priority , "fairnessID" , reqCtx .FairnessID )
116- if err := rejectIfSheddableAndSaturated (ctx , lac .saturationDetector , reqCtx , candidatePods , priority ); err != nil {
121+ if err := rejectIfSheddableAndSaturated (
122+ ctx ,
123+ lac .saturationDetector ,
124+ lac .podLocator ,
125+ reqCtx , priority ,
126+ logger ,
127+ ); err != nil {
117128 return err
118129 }
119130 logger .V (logutil .TRACE ).Info ("Request admitted" , "requestID" , reqCtx .SchedulingRequest .RequestId )
@@ -123,19 +134,15 @@ func (lac *LegacyAdmissionController) Admit(
123134// --- FlowControlAdmissionController ---
124135
125136// FlowControlAdmissionController delegates admission decisions to the Flow Control layer.
126- // It first checks if the request is sheddable and the system is saturated, rejecting immediately if both conditions are
127- // true. Otherwise, it uses the provided flowController to enqueue the request and await an outcome.
137+ // It uses the provided Flow Controller to enqueue the request and await an outcome.
128138type FlowControlAdmissionController struct {
129- saturationDetector saturationDetector
130- flowController flowController
139+ flowController flowController
131140}
132141
133142// NewFlowControlAdmissionController creates a new FlowControlAdmissionController.
134- // It requires a SaturationDetector and a flowController instance.
135- func NewFlowControlAdmissionController (sd saturationDetector , fc flowController ) * FlowControlAdmissionController {
143+ func NewFlowControlAdmissionController (fc flowController ) * FlowControlAdmissionController {
136144 return & FlowControlAdmissionController {
137- saturationDetector : sd ,
138- flowController : fc ,
145+ flowController : fc ,
139146 }
140147}
141148
@@ -144,24 +151,18 @@ func NewFlowControlAdmissionController(sd saturationDetector, fc flowController)
144151func (fcac * FlowControlAdmissionController ) Admit (
145152 ctx context.Context ,
146153 reqCtx * handlers.RequestContext ,
147- candidatePods []backendmetrics.PodMetrics ,
148154 priority int ,
149155) error {
150156 logger := log .FromContext (ctx )
151157 logger .V (logutil .TRACE ).Info ("Executing FlowControlAdmissionController" ,
152158 "requestID" , reqCtx .SchedulingRequest .RequestId , "priority" , priority , "fairnessID" , reqCtx .FairnessID )
153- if err := rejectIfSheddableAndSaturated (ctx , fcac .saturationDetector , reqCtx , candidatePods , priority ); err != nil {
154- return err
155- }
156-
157- logger .V (logutil .TRACE ).Info ("Request proceeding to flow control" , "requestID" , reqCtx .SchedulingRequest .RequestId )
158159
159160 fcReq := & flowControlRequest {
160161 requestID : reqCtx .SchedulingRequest .RequestId ,
161162 fairnessID : reqCtx .FairnessID ,
162163 priority : priority ,
163164 requestByteSize : uint64 (reqCtx .RequestSize ),
164- candidatePods : candidatePods ,
165+ reqMetadata : reqCtx . Request . Metadata ,
165166 }
166167
167168 outcome , err := fcac .flowController .EnqueueAndWait (ctx , fcReq )
@@ -176,20 +177,20 @@ type flowControlRequest struct {
176177 fairnessID string
177178 priority int
178179 requestByteSize uint64
179- candidatePods []backendmetrics. PodMetrics
180+ reqMetadata map [ string ] any
180181}
181182
182183var _ types.FlowControlRequest = & flowControlRequest {}
183184
184185func (r * flowControlRequest ) ID () string { return r .requestID }
185186func (r * flowControlRequest ) InitialEffectiveTTL () time.Duration { return 0 } // Use controller default.
186187func (r * flowControlRequest ) ByteSize () uint64 { return r .requestByteSize }
187- func (r * flowControlRequest ) CandidatePodsForScheduling () []backendmetrics.PodMetrics {
188- return r .candidatePods
189- }
190188func (r * flowControlRequest ) FlowKey () types.FlowKey {
191189 return types.FlowKey {ID : r .fairnessID , Priority : r .priority }
192190}
191+ func (r * flowControlRequest ) GetMetadata () map [string ]any {
192+ return r .reqMetadata
193+ }
193194
194195// translateFlowControlOutcome maps the context-rich outcome of the Flow Control layer to the public errutil.Error
195196// contract used by the Director.
0 commit comments