openshift
diff --git a/‎cmd/pod-scaler/admission.go‎
Lines changed: 131 additions & 23 deletions b/‎cmd/pod-scaler/admission.go‎
Lines changed: 131 additions & 23 deletions
diff --git a/‎cmd/pod-scaler/admission_test.go‎
Lines changed: 3 additions & 3 deletions b/‎cmd/pod-scaler/admission_test.go‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎cmd/pod-scaler/main.go‎
Lines changed: 31 additions & 7 deletions b/‎cmd/pod-scaler/main.go‎
Lines changed: 31 additions & 7 deletions
@@ -33,34 +33,51 @@ import (
 	"github.com/openshift/ci-tools/pkg/steps"
 )
 
-func admit(port, healthPort int, certDir string, client buildclientv1.BuildV1Interface, loaders map[string][]*cacheReloader, mutateResourceLimits bool, cpuCap int64, memoryCap string, cpuPriorityScheduling int64, reporter results.PodScalerReporter) {
+func admit(port, healthPort int, certDir string, client buildclientv1.BuildV1Interface, loaders map[string][]*cacheReloader, mutateResourceLimits bool, cpuCap int64, memoryCap string, cpuPriorityScheduling int64, authoritativeCPURequests, authoritativeMemoryRequests bool, enableMeasuredPods bool, bigQueryProjectID, bigQueryDatasetID, bigQueryCredentialsFile string, reporter results.PodScalerReporter) {
 	logger := logrus.WithField("component", "pod-scaler admission")
 	logger.Infof("Initializing admission webhook server with %d loaders.", len(loaders))
 	health := pjutil.NewHealthOnPort(healthPort)
 	resources := newResourceServer(loaders, health)
 	decoder := admission.NewDecoder(scheme.Scheme)
 
+	var bqClient *BigQueryClient
+	if enableMeasuredPods {
+		if bigQueryProjectID == "" || bigQueryDatasetID == "" {
+			logrus.Fatal("bigquery-project-id and bigquery-dataset-id are required when enable-measured-pods is true")
+		}
+		cache := NewMeasuredPodCache(logger)
+		var err error
+		bqClient, err = NewBigQueryClient(bigQueryProjectID, bigQueryDatasetID, bigQueryCredentialsFile, cache, logger)
+		if err != nil {
+			logrus.WithError(err).Fatal("Failed to create BigQuery client for measured pods")
+		}
+		logger.Info("Measured pods feature enabled with BigQuery integration")
+	}
+
 	server := webhook.NewServer(webhook.Options{
 		Port:    port,
 		CertDir: certDir,
 	})
-	server.Register("/pods", &webhook.Admission{Handler: &podMutator{logger: logger, client: client, decoder: decoder, resources: resources, mutateResourceLimits: mutateResourceLimits, cpuCap: cpuCap, memoryCap: memoryCap, cpuPriorityScheduling: cpuPriorityScheduling, reporter: reporter}})
+	server.Register("/pods", &webhook.Admission{Handler: &podMutator{logger: logger, client: client, decoder: decoder, resources: resources, mutateResourceLimits: mutateResourceLimits, cpuCap: cpuCap, memoryCap: memoryCap, cpuPriorityScheduling: cpuPriorityScheduling, authoritativeCPURequests: authoritativeCPURequests, authoritativeMemoryRequests: authoritativeMemoryRequests, bqClient: bqClient, reporter: reporter}})
 	logger.Info("Serving admission webhooks.")
 	if err := server.Start(interrupts.Context()); err != nil {
 		logrus.WithError(err).Fatal("Failed to serve webhooks.")
 	}
 }
 
 type podMutator struct {
-	logger                *logrus.Entry
-	client                buildclientv1.BuildV1Interface
-	resources             *resourceServer
-	mutateResourceLimits  bool
-	decoder               admission.Decoder
-	cpuCap                int64
-	memoryCap             string
-	cpuPriorityScheduling int64
-	reporter              results.PodScalerReporter
+	logger                      *logrus.Entry
+	client                      buildclientv1.BuildV1Interface
+	resources                   *resourceServer
+	mutateResourceLimits        bool
+	decoder                     admission.Decoder
+	cpuCap                      int64
+	memoryCap                   string
+	cpuPriorityScheduling       int64
+	authoritativeCPURequests    bool
+	authoritativeMemoryRequests bool
+	bqClient                    *BigQueryClient
+	reporter                    results.PodScalerReporter
 }
 
 func (m *podMutator) Handle(ctx context.Context, req admission.Request) admission.Response {
@@ -97,7 +114,16 @@ func (m *podMutator) Handle(ctx context.Context, req admission.Request) admissio
 		logger.WithError(err).Error("Failed to handle rehearsal Pod.")
 		return admission.Allowed("Failed to handle rehearsal Pod, ignoring.")
 	}
-	mutatePodResources(pod, m.resources, m.mutateResourceLimits, m.cpuCap, m.memoryCap, m.reporter, logger)
+
+	// Classify pod as normal or measured (if enabled)
+	if m.bqClient != nil {
+		ClassifyPod(pod, m.bqClient, logger)
+		AddPodAntiAffinity(pod, logger)
+		// Apply measured pod resources before regular resource mutation
+		ApplyMeasuredPodResources(pod, m.bqClient, logger)
+	}
+
+	mutatePodResources(pod, m.resources, m.mutateResourceLimits, m.cpuCap, m.memoryCap, m.authoritativeCPURequests, m.authoritativeMemoryRequests, m.reporter, logger)
 	m.addPriorityClass(pod)
 
 	marshaledPod, err := json.Marshal(pod)
@@ -196,8 +222,10 @@ func mutatePodLabels(pod *corev1.Pod, build *buildv1.Build) {
 	}
 }
 
-// useOursIfLarger updates fields in theirs when ours are larger
-func useOursIfLarger(allOfOurs, allOfTheirs *corev1.ResourceRequirements, workloadName, workloadType string, reporter results.PodScalerReporter, logger *logrus.Entry) {
+// applyRecommendationsBasedOnRecentData applies resource recommendations based on recent usage data
+// (see resourceRecommendationWindow). If they used more, we increase resources. If they used less
+// and authoritative mode is enabled for that resource, we decrease them.
+func applyRecommendationsBasedOnRecentData(allOfOurs, allOfTheirs *corev1.ResourceRequirements, workloadName, workloadType string, authoritativeCPU, authoritativeMemory bool, reporter results.PodScalerReporter, logger *logrus.Entry) {
 	for _, item := range []*corev1.ResourceRequirements{allOfOurs, allOfTheirs} {
 		if item.Requests == nil {
 			item.Requests = corev1.ResourceList{}
@@ -215,12 +243,37 @@ func useOursIfLarger(allOfOurs, allOfTheirs *corev1.ResourceRequirements, worklo
 	} {
 		for _, field := range []corev1.ResourceName{corev1.ResourceCPU, corev1.ResourceMemory} {
 			our := (*pair.ours)[field]
-			//TODO(sgoeddel): this is a temporary experiment to see what effect setting values that are 120% of what has
-			// been determined has on the rate of OOMKilled and similar termination of workloads
-			increased := our.AsApproximateFloat64() * 1.2
-			our.Set(int64(increased))
-
+			// If we have no recommendation for this resource, skip it
+			if our.IsZero() {
+				continue
+			}
 			their := (*pair.theirs)[field]
+			
+			// Check if resources were already set by measured pods logic (which already applies 1.2x buffer).
+			// If so, skip applying the buffer again to avoid double buffering (1.2 * 1.2 = 1.44x instead of 1.2x).
+			// 
+			// Note: This heuristic (checking if theirs >= our) is imperfect because:
+			// - Users could manually configure higher values
+			// - Previous mutations or other logic could have set higher values
+			// - It doesn't definitively prove the resources came from measured pods
+			// However, in practice, measured pods logic runs before this function and sets resources
+			// with a 1.2x buffer, so if theirs >= our base recommendation, it's likely from measured pods.
+			// A more robust solution would be to pass explicit metadata tracking resource source,
+			// but this heuristic works for the current implementation where measured pods are processed first.
+			alreadySetByMeasuredPods := !their.IsZero() && their.Cmp(our) >= 0
+			
+			if !alreadySetByMeasuredPods {
+				// Apply a 1.2x safety buffer to resource recommendations to reduce the rate of OOMKilled
+				// and similar workload terminations. This buffer accounts for:
+				// - Natural variance in resource usage patterns
+				// - Transient spikes in CPU/memory consumption
+				// - Measurement inaccuracies in historical data
+				// The 20% overhead provides a safety margin while still allowing for efficient resource utilization.
+				increased := our.AsApproximateFloat64() * 1.2
+				our.Set(int64(increased))
+			} else {
+				logger.Debugf("Skipping 1.2x buffer for %s %s as resources appear to be already set by measured pods logic", pair.resource, field)
+			}
 			fieldLogger := logger.WithFields(logrus.Fields{
 				"workloadName": workloadName,
 				"workloadType": workloadType,
@@ -231,13 +284,40 @@ func useOursIfLarger(allOfOurs, allOfTheirs *corev1.ResourceRequirements, worklo
 			})
 			cmp := our.Cmp(their)
 			if cmp == 1 {
-				fieldLogger.Debug("determined amount larger than configured")
+				fieldLogger.Debug("determined amount larger than configured, increasing resources")
 				(*pair.theirs)[field] = our
 				if their.Value() > 0 && our.Value() > (their.Value()*10) {
 					reporter.ReportResourceConfigurationWarning(workloadName, workloadType, their.String(), our.String(), field.String())
 				}
 			} else if cmp < 0 {
-				fieldLogger.Debug("determined amount smaller than configured")
+				authoritative := (field == corev1.ResourceCPU && authoritativeCPU) || (field == corev1.ResourceMemory && authoritativeMemory)
+				if authoritative {
+					// Apply gradual reduction with safety limits: max 25% reduction per cycle, minimum 5% difference
+					ourValue := our.AsApproximateFloat64()
+					theirValue := their.AsApproximateFloat64()
+					if theirValue > 0 {
+						reductionPercent := 1.0 - (ourValue / theirValue)
+						maxReductionPercent := 0.25
+
+						if reductionPercent >= 0.05 {
+							if reductionPercent > maxReductionPercent {
+								maxAllowed := theirValue * (1.0 - maxReductionPercent)
+								our.Set(int64(maxAllowed))
+								fieldLogger.Debugf("applying gradual reduction (limited to 25%% per cycle)")
+							} else {
+								fieldLogger.Debug("reducing resources based on recent usage")
+							}
+							(*pair.theirs)[field] = our
+						} else {
+							fieldLogger.Debug("difference less than 5%, skipping micro-adjustment")
+						}
+					} else {
+						fieldLogger.Debug("theirs is zero, applying recommendation")
+						(*pair.theirs)[field] = our
+					}
+				} else {
+					fieldLogger.Debug("authoritative mode disabled, keeping existing value")
+				}
 			} else {
 				fieldLogger.Debug("determined amount equal to configured")
 			}
@@ -292,16 +372,44 @@ func preventUnschedulable(resources *corev1.ResourceRequirements, cpuCap int64,
 	}
 }
 
-func mutatePodResources(pod *corev1.Pod, server *resourceServer, mutateResourceLimits bool, cpuCap int64, memoryCap string, reporter results.PodScalerReporter, logger *logrus.Entry) {
+func mutatePodResources(pod *corev1.Pod, server *resourceServer, mutateResourceLimits bool, cpuCap int64, memoryCap string, authoritativeCPU, authoritativeMemory bool, reporter results.PodScalerReporter, logger *logrus.Entry) {
+	// Check if this is a measured pod - measured pods have resources set by ApplyMeasuredPodResources
+	// and we should preserve those instead of overwriting with Prometheus recommendations
+	isMeasuredPod := pod.Labels != nil && pod.Labels[PodScalerLabelKey] == PodScalerLabelValueMeasured
+	
 	mutateResources := func(containers []corev1.Container) {
 		for i := range containers {
+			// For measured pods, skip Prometheus-based recommendations if resources were already set
+			// by ApplyMeasuredPodResources (which uses BigQuery measured data)
+			if isMeasuredPod {
+				hasCPURequest := false
+				hasMemoryRequest := false
+				if containers[i].Resources.Requests != nil {
+					if cpuReq, ok := containers[i].Resources.Requests[corev1.ResourceCPU]; ok && cpuReq.Sign() > 0 {
+						hasCPURequest = true
+					}
+					if memReq, ok := containers[i].Resources.Requests[corev1.ResourceMemory]; ok && memReq.Sign() > 0 {
+						hasMemoryRequest = true
+					}
+				}
+				if hasCPURequest || hasMemoryRequest {
+					logger.Debugf("Skipping Prometheus recommendations for measured pod container %s - resources already set from BigQuery data", containers[i].Name)
+					// Still apply caps and limits even for measured pods
+					preventUnschedulable(&containers[i].Resources, cpuCap, memoryCap, logger)
+					if mutateResourceLimits {
+						reconcileLimits(&containers[i].Resources)
+					}
+					continue
+				}
+			}
+			
 			meta := podscaler.MetadataFor(pod.ObjectMeta.Labels, pod.ObjectMeta.Name, containers[i].Name)
 			resources, recommendationExists := server.recommendedRequestFor(meta)
 			if recommendationExists {
 				logger.Debugf("recommendation exists for: %s", containers[i].Name)
 				workloadType := determineWorkloadType(pod.Annotations, pod.Labels)
 				workloadName := determineWorkloadName(pod.Name, containers[i].Name, workloadType, pod.Labels)
-				useOursIfLarger(&resources, &containers[i].Resources, workloadName, workloadType, reporter, logger)
+				applyRecommendationsBasedOnRecentData(&resources, &containers[i].Resources, workloadName, workloadType, authoritativeCPU, authoritativeMemory, reporter, logger)
 				if mutateResourceLimits {
 					reconcileLimits(&containers[i].Resources)
 				}
 
@@ -554,7 +554,7 @@ func TestMutatePodResources(t *testing.T) {
 	for _, testCase := range testCases {
 		t.Run(testCase.name, func(t *testing.T) {
 			original := testCase.pod.DeepCopy()
-			mutatePodResources(testCase.pod, testCase.server, testCase.mutateResourceLimits, 10, "20Gi", &defaultReporter, logrus.WithField("test", testCase.name))
+			mutatePodResources(testCase.pod, testCase.server, testCase.mutateResourceLimits, 10, "20Gi", false, false, &defaultReporter, logrus.WithField("test", testCase.name))
 			diff := cmp.Diff(original, testCase.pod)
 			// In some cases, cmp.Diff decides to use non-breaking spaces, and it's not
 			// particularly deterministic about this. We don't care.
@@ -729,7 +729,7 @@ func TestUseOursIfLarger(t *testing.T) {
 	}
 	for _, testCase := range testCases {
 		t.Run(testCase.name, func(t *testing.T) {
-			useOursIfLarger(&testCase.ours, &testCase.theirs, "test", "build", &defaultReporter, logrus.WithField("test", testCase.name))
+			applyRecommendationsBasedOnRecentData(&testCase.ours, &testCase.theirs, "test", "build", false, false, &defaultReporter, logrus.WithField("test", testCase.name))
 			if diff := cmp.Diff(testCase.theirs, testCase.expected); diff != "" {
 				t.Errorf("%s: got incorrect resources after mutation: %v", testCase.name, diff)
 			}
@@ -814,7 +814,7 @@ func TestUseOursIsLarger_ReporterReports(t *testing.T) {
 
 	for _, tc := range testCases {
 		t.Run(tc.name, func(t *testing.T) {
-			useOursIfLarger(&tc.ours, &tc.theirs, "test", "build", &tc.reporter, logrus.WithField("test", tc.name))
+			applyRecommendationsBasedOnRecentData(&tc.ours, &tc.theirs, "test", "build", false, false, &tc.reporter, logrus.WithField("test", tc.name))
 
 			if diff := cmp.Diff(tc.reporter.called, tc.expected); diff != "" {
 				t.Errorf("actual and expected reporter states don't match, : %v", diff)
 
@@ -61,12 +61,21 @@ type consumerOptions struct {
 	port   int
 	uiPort int
 
-	dataDir               string
-	certDir               string
-	mutateResourceLimits  bool
-	cpuCap                int64
-	memoryCap             string
-	cpuPriorityScheduling int64
+	dataDir                     string
+	certDir                     string
+	mutateResourceLimits        bool
+	cpuCap                      int64
+	memoryCap                   string
+	cpuPriorityScheduling       int64
+	authoritativeCPURequests    bool
+	authoritativeMemoryRequests bool
+
+	// Measured pods options - when enabled, pods are classified as "normal" or "measured"
+	// Measured pods run on isolated nodes to get accurate CPU/memory utilization data
+	enableMeasuredPods      bool
+	bigQueryProjectID       string
+	bigQueryDatasetID       string
+	bigQueryCredentialsFile string
 }
 
 func bindOptions(fs *flag.FlagSet) *options {
@@ -89,6 +98,12 @@ func bindOptions(fs *flag.FlagSet) *options {
 	fs.Int64Var(&o.cpuCap, "cpu-cap", 10, "The maximum CPU request value, ex: 10")
 	fs.StringVar(&o.memoryCap, "memory-cap", "20Gi", "The maximum memory request value, ex: '20Gi'")
 	fs.Int64Var(&o.cpuPriorityScheduling, "cpu-priority-scheduling", 8, "Pods with CPU requests at, or above, this value will be admitted with priority scheduling")
+	fs.BoolVar(&o.authoritativeCPURequests, "authoritative-cpu-requests", false, "Enable authoritative CPU request recommendations. When enabled, pod-scaler can reduce CPU requests based on recent usage data (past 3 weeks).")
+	fs.BoolVar(&o.authoritativeMemoryRequests, "authoritative-memory-requests", false, "Enable authoritative memory request recommendations. When enabled, pod-scaler can reduce memory requests based on recent usage data (past 3 weeks).")
+	fs.BoolVar(&o.enableMeasuredPods, "enable-measured-pods", false, "Enable measured pods feature. When enabled, pods are classified as 'normal' or 'measured' and measured pods run on isolated nodes to get accurate CPU/memory utilization data.")
+	fs.StringVar(&o.bigQueryProjectID, "bigquery-project-id", "", "Google Cloud project ID for BigQuery queries (required if enable-measured-pods is true)")
+	fs.StringVar(&o.bigQueryDatasetID, "bigquery-dataset-id", "", "BigQuery dataset ID for pod metrics (required if enable-measured-pods is true)")
+	fs.StringVar(&o.bigQueryCredentialsFile, "bigquery-credentials-file", "", "Path to Google Cloud credentials file for BigQuery access")
 	o.resultsOptions.Bind(fs)
 	return &o
 }
@@ -122,6 +137,15 @@ func (o *options) validate() error {
 		if memoryCap := resource.MustParse(o.memoryCap); memoryCap.Sign() <= 0 {
 			return errors.New("--memory-cap must be greater than 0")
 		}
+		if o.enableMeasuredPods {
+			if o.bigQueryProjectID == "" {
+				return errors.New("--bigquery-project-id is required when --enable-measured-pods is true")
+			}
+			if o.bigQueryDatasetID == "" {
+				return errors.New("--bigquery-dataset-id is required when --enable-measured-pods is true")
+			}
+			// Note: bigQueryCredentialsFile may use default application credentials if not specified
+		}
 		if err := o.resultsOptions.Validate(); err != nil {
 			return err
 		}
@@ -268,7 +292,7 @@ func mainAdmission(opts *options, cache Cache) {
 		logrus.WithError(err).Fatal("Failed to create pod-scaler reporter.")
 	}
 
-	go admit(opts.port, opts.instrumentationOptions.HealthPort, opts.certDir, client, loaders(cache), opts.mutateResourceLimits, opts.cpuCap, opts.memoryCap, opts.cpuPriorityScheduling, reporter)
+	go admit(opts.port, opts.instrumentationOptions.HealthPort, opts.certDir, client, loaders(cache), opts.mutateResourceLimits, opts.cpuCap, opts.memoryCap, opts.cpuPriorityScheduling, opts.authoritativeCPURequests, opts.authoritativeMemoryRequests, opts.enableMeasuredPods, opts.bigQueryProjectID, opts.bigQueryDatasetID, opts.bigQueryCredentialsFile, reporter)
 }
 
 func loaders(cache Cache) map[string][]*cacheReloader {