@@ -33,7 +33,7 @@ import (
3333 "github.com/openshift/ci-tools/pkg/steps"
3434)
3535
36- func admit (port , healthPort int , certDir string , client buildclientv1.BuildV1Interface , loaders map [string ][]* cacheReloader , mutateResourceLimits bool , cpuCap int64 , memoryCap string , cpuPriorityScheduling int64 , reporter results.PodScalerReporter ) {
36+ func admit (port , healthPort int , certDir string , client buildclientv1.BuildV1Interface , loaders map [string ][]* cacheReloader , mutateResourceLimits bool , cpuCap int64 , memoryCap string , cpuPriorityScheduling int64 , authoritativeCPU , authoritativeMemory bool , reporter results.PodScalerReporter ) {
3737 logger := logrus .WithField ("component" , "pod-scaler admission" )
3838 logger .Infof ("Initializing admission webhook server with %d loaders." , len (loaders ))
3939 health := pjutil .NewHealthOnPort (healthPort )
@@ -44,7 +44,7 @@ func admit(port, healthPort int, certDir string, client buildclientv1.BuildV1Int
4444 Port : port ,
4545 CertDir : certDir ,
4646 })
47- server .Register ("/pods" , & webhook.Admission {Handler : & podMutator {logger : logger , client : client , decoder : decoder , resources : resources , mutateResourceLimits : mutateResourceLimits , cpuCap : cpuCap , memoryCap : memoryCap , cpuPriorityScheduling : cpuPriorityScheduling , reporter : reporter }})
47+ server .Register ("/pods" , & webhook.Admission {Handler : & podMutator {logger : logger , client : client , decoder : decoder , resources : resources , mutateResourceLimits : mutateResourceLimits , cpuCap : cpuCap , memoryCap : memoryCap , cpuPriorityScheduling : cpuPriorityScheduling , authoritativeCPU : authoritativeCPU , authoritativeMemory : authoritativeMemory , reporter : reporter }})
4848 logger .Info ("Serving admission webhooks." )
4949 if err := server .Start (interrupts .Context ()); err != nil {
5050 logrus .WithError (err ).Fatal ("Failed to serve webhooks." )
@@ -60,6 +60,8 @@ type podMutator struct {
6060 cpuCap int64
6161 memoryCap string
6262 cpuPriorityScheduling int64
63+ authoritativeCPU bool
64+ authoritativeMemory bool
6365 reporter results.PodScalerReporter
6466}
6567
@@ -97,7 +99,7 @@ func (m *podMutator) Handle(ctx context.Context, req admission.Request) admissio
9799 logger .WithError (err ).Error ("Failed to handle rehearsal Pod." )
98100 return admission .Allowed ("Failed to handle rehearsal Pod, ignoring." )
99101 }
100- mutatePodResources (pod , m .resources , m .mutateResourceLimits , m .cpuCap , m .memoryCap , m .reporter , logger )
102+ mutatePodResources (pod , m .resources , m .mutateResourceLimits , m .cpuCap , m .memoryCap , m .authoritativeCPU , m . authoritativeMemory , m . reporter , logger )
101103 m .addPriorityClass (pod )
102104
103105 marshaledPod , err := json .Marshal (pod )
@@ -196,8 +198,14 @@ func mutatePodLabels(pod *corev1.Pod, build *buildv1.Build) {
196198 }
197199}
198200
199- // useOursIfLarger updates fields in theirs when ours are larger
200- func useOursIfLarger (allOfOurs , allOfTheirs * corev1.ResourceRequirements , workloadName , workloadType string , reporter results.PodScalerReporter , logger * logrus.Entry ) {
201+ // applyRecommendationsBasedOnRecentData applies resource recommendations based on recent usage data
202+ // (see resourceRecommendationWindow). If they used more, we increase resources. If they used less,
203+ // we decrease them if authoritative mode is enabled for that resource type.
204+ //
205+ // TestApplyRecommendationsBasedOnRecentData_ReducesResources is tested in admission_test.go
206+ // as part of TestUseOursIfLarger. The reduction functionality is verified there with proper
207+ // test cases that handle ResourceQuantity comparison correctly.
208+ func applyRecommendationsBasedOnRecentData (allOfOurs , allOfTheirs * corev1.ResourceRequirements , workloadName , workloadType string , authoritativeCPU , authoritativeMemory bool , reporter results.PodScalerReporter , logger * logrus.Entry ) {
201209 for _ , item := range []* corev1.ResourceRequirements {allOfOurs , allOfTheirs } {
202210 if item .Requests == nil {
203211 item .Requests = corev1.ResourceList {}
@@ -215,6 +223,10 @@ func useOursIfLarger(allOfOurs, allOfTheirs *corev1.ResourceRequirements, worklo
215223 } {
216224 for _ , field := range []corev1.ResourceName {corev1 .ResourceCPU , corev1 .ResourceMemory } {
217225 our := (* pair .ours )[field ]
226+ // If we have no recommendation for this resource, skip it
227+ if our .IsZero () {
228+ continue
229+ }
218230 //TODO(sgoeddel): this is a temporary experiment to see what effect setting values that are 120% of what has
219231 // been determined has on the rate of OOMKilled and similar termination of workloads
220232 increased := our .AsApproximateFloat64 () * 1.2
@@ -231,13 +243,49 @@ func useOursIfLarger(allOfOurs, allOfTheirs *corev1.ResourceRequirements, worklo
231243 })
232244 cmp := our .Cmp (their )
233245 if cmp == 1 {
234- fieldLogger .Debug ("determined amount larger than configured" )
246+ fieldLogger .Debug ("determined amount larger than configured, increasing resources " )
235247 (* pair .theirs )[field ] = our
236248 if their .Value () > 0 && our .Value () > (their .Value ()* 10 ) {
237249 reporter .ReportResourceConfigurationWarning (workloadName , workloadType , their .String (), our .String (), field .String ())
238250 }
239251 } else if cmp < 0 {
240- fieldLogger .Debug ("determined amount smaller than configured" )
252+ // Check if authoritative mode is enabled for this resource type
253+ isAuthoritative := false
254+ if field == corev1 .ResourceCPU {
255+ isAuthoritative = authoritativeCPU
256+ } else if field == corev1 .ResourceMemory {
257+ isAuthoritative = authoritativeMemory
258+ }
259+
260+ if ! isAuthoritative {
261+ fieldLogger .Debug ("authoritative mode disabled for this resource, skipping reduction" )
262+ continue
263+ }
264+
265+ // Apply gradual reduction with safety limits: max 25% reduction per cycle, minimum 5% difference
266+ ourValue := our .AsApproximateFloat64 ()
267+ theirValue := their .AsApproximateFloat64 ()
268+ if theirValue == 0 {
269+ fieldLogger .Debug ("theirs is zero, applying recommendation" )
270+ (* pair .theirs )[field ] = our
271+ continue
272+ }
273+
274+ reductionPercent := 1.0 - (ourValue / theirValue )
275+ if reductionPercent < 0.05 {
276+ fieldLogger .Debug ("difference less than 5%, skipping micro-adjustment" )
277+ continue
278+ }
279+
280+ maxReductionPercent := 0.25
281+ if reductionPercent > maxReductionPercent {
282+ maxAllowed := theirValue * (1.0 - maxReductionPercent )
283+ our .Set (int64 (maxAllowed ))
284+ fieldLogger .Debugf ("applying gradual reduction (limited to 25%% per cycle)" )
285+ } else {
286+ fieldLogger .Debug ("reducing resources based on recent usage" )
287+ }
288+ (* pair .theirs )[field ] = our
241289 } else {
242290 fieldLogger .Debug ("determined amount equal to configured" )
243291 }
@@ -292,7 +340,7 @@ func preventUnschedulable(resources *corev1.ResourceRequirements, cpuCap int64,
292340 }
293341}
294342
295- func mutatePodResources (pod * corev1.Pod , server * resourceServer , mutateResourceLimits bool , cpuCap int64 , memoryCap string , reporter results.PodScalerReporter , logger * logrus.Entry ) {
343+ func mutatePodResources (pod * corev1.Pod , server * resourceServer , mutateResourceLimits bool , cpuCap int64 , memoryCap string , authoritativeCPU , authoritativeMemory bool , reporter results.PodScalerReporter , logger * logrus.Entry ) {
296344 mutateResources := func (containers []corev1.Container ) {
297345 for i := range containers {
298346 meta := podscaler .MetadataFor (pod .ObjectMeta .Labels , pod .ObjectMeta .Name , containers [i ].Name )
@@ -301,7 +349,7 @@ func mutatePodResources(pod *corev1.Pod, server *resourceServer, mutateResourceL
301349 logger .Debugf ("recommendation exists for: %s" , containers [i ].Name )
302350 workloadType := determineWorkloadType (pod .Annotations , pod .Labels )
303351 workloadName := determineWorkloadName (pod .Name , containers [i ].Name , workloadType , pod .Labels )
304- useOursIfLarger (& resources , & containers [i ].Resources , workloadName , workloadType , reporter , logger )
352+ applyRecommendationsBasedOnRecentData (& resources , & containers [i ].Resources , workloadName , workloadType , authoritativeCPU , authoritativeMemory , reporter , logger )
305353 if mutateResourceLimits {
306354 reconcileLimits (& containers [i ].Resources )
307355 }
0 commit comments