Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions report.xml

Large diffs are not rendered by default.

Binary file not shown.
89 changes: 41 additions & 48 deletions test/e2e/performanceprofile/functests/0_config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,10 @@ import (
"context"
"fmt"
"os"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/onsi/gomega/format"

"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer"
Expand All @@ -25,14 +22,10 @@ import (
testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils"
testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/cluster"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/discovery"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/hypershift"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/label"
testlog "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/log"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/mcps"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodepools"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profiles"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profilesupdate"
)

var RunningOnSingleNode bool
Expand All @@ -45,51 +38,51 @@ var _ = Describe("[performance][config] Performance configuration", Ordered, fun
RunningOnSingleNode = isSNO
})

It("Should successfully deploy the performance profile", Label(string(label.Tier0)), func() {
// It("Should successfully deploy the performance profile", Label(string(label.Tier0)), func() {

performanceProfile, err := testProfile()
Expect(err).ToNot(HaveOccurred(), "failed to build performance profile: %v", err)
profileAlreadyExists := false
// performanceProfile, err := testProfile()
// Expect(err).ToNot(HaveOccurred(), "failed to build performance profile: %v", err)
// profileAlreadyExists := false

performanceManifest, foundOverride := os.LookupEnv("PERFORMANCE_PROFILE_MANIFEST_OVERRIDE")
if foundOverride {
performanceProfile, err = externalPerformanceProfile(performanceManifest)
Expect(err).ToNot(HaveOccurred(), "Failed overriding performance profile", performanceManifest)
testlog.Warningf("Consuming performance profile from %s", performanceManifest)
}
if discovery.Enabled() {
performanceProfile, err = profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
Expect(err).ToNot(HaveOccurred(), "Failed finding a performance profile in discovery mode using selector %v", testutils.NodeSelectorLabels)
testlog.Info("Discovery mode: consuming a deployed performance profile from the cluster")
profileAlreadyExists = true
}
// performanceManifest, foundOverride := os.LookupEnv("PERFORMANCE_PROFILE_MANIFEST_OVERRIDE")
// if foundOverride {
// performanceProfile, err = externalPerformanceProfile(performanceManifest)
// Expect(err).ToNot(HaveOccurred(), "Failed overriding performance profile", performanceManifest)
// testlog.Warningf("Consuming performance profile from %s", performanceManifest)
// }
// if discovery.Enabled() {
// performanceProfile, err = profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
// Expect(err).ToNot(HaveOccurred(), "Failed finding a performance profile in discovery mode using selector %v", testutils.NodeSelectorLabels)
// testlog.Info("Discovery mode: consuming a deployed performance profile from the cluster")
// profileAlreadyExists = true
// }

if !discovery.Enabled() {
By("Creating the PerformanceProfile")
// this might fail while the operator is still being deployed and the CRD does not exist yet
Eventually(func() error {
err := testclient.ControlPlaneClient.Create(context.TODO(), performanceProfile)
if errors.IsAlreadyExists(err) {
testlog.Warning(fmt.Sprintf("A PerformanceProfile with name %s already exists! If created externally, tests might have unexpected behaviour", performanceProfile.Name))
profileAlreadyExists = true
return nil
}
return err
}, cluster.ComputeTestTimeout(15*time.Minute, RunningOnSingleNode), 15*time.Second).ShouldNot(HaveOccurred(), "Failed creating the performance profile")
}
unpauseMCP(context.TODO(), performanceProfile)
attachProfileToNodePool(context.TODO(), performanceProfile)
// if the profile exists, it's likely to have been through the updating phase, so we only
// wait for updated.
if !profileAlreadyExists {
profilesupdate.WaitForTuningUpdating(context.TODO(), performanceProfile)
}
profilesupdate.WaitForTuningUpdated(context.TODO(), performanceProfile)
// if !discovery.Enabled() {
// By("Creating the PerformanceProfile")
// // this might fail while the operator is still being deployed and the CRD does not exist yet
// Eventually(func() error {
// err := testclient.ControlPlaneClient.Create(context.TODO(), performanceProfile)
// if errors.IsAlreadyExists(err) {
// testlog.Warning(fmt.Sprintf("A PerformanceProfile with name %s already exists! If created externally, tests might have unexpected behaviour", performanceProfile.Name))
// profileAlreadyExists = true
// return nil
// }
// return err
// }, cluster.ComputeTestTimeout(15*time.Minute, RunningOnSingleNode), 15*time.Second).ShouldNot(HaveOccurred(), "Failed creating the performance profile")
// }
// unpauseMCP(context.TODO(), performanceProfile)
// attachProfileToNodePool(context.TODO(), performanceProfile)
// // if the profile exists, it's likely to have been through the updating phase, so we only
// // wait for updated.
// if !profileAlreadyExists {
// profilesupdate.WaitForTuningUpdating(context.TODO(), performanceProfile)
// }
// profilesupdate.WaitForTuningUpdated(context.TODO(), performanceProfile)

Expect(testclient.ControlPlaneClient.Get(context.TODO(), client.ObjectKeyFromObject(performanceProfile), performanceProfile))
By("Printing the updated profile")
testlog.Info(format.Object(performanceProfile, 2))
})
// Expect(testclient.ControlPlaneClient.Get(context.TODO(), client.ObjectKeyFromObject(performanceProfile), performanceProfile))
// By("Printing the updated profile")
// testlog.Info(format.Object(performanceProfile, 2))
// })
})

func externalPerformanceProfile(performanceManifest string) (*performancev2.PerformanceProfile, error) {
Expand Down
28 changes: 28 additions & 0 deletions test/e2e/performanceprofile/functests/0_config/report.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<testsuites tests="1" disabled="0" errors="0" failures="0" time="5.8269e-05">
<testsuite name="Performance Addon Operator configuration" package="/home/shajmakh/ghrepo/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/0_config" tests="1" disabled="0" skipped="0" errors="0" failures="0" time="5.8269e-05" timestamp="2026-05-18T15:04:26">
<properties>
<property name="SuiteSucceeded" value="true"></property>
<property name="SuiteHasProgrammaticFocus" value="false"></property>
<property name="SpecialSuiteFailureReason" value=""></property>
<property name="SuiteLabels" value="[]"></property>
<property name="RandomSeed" value="1779105864"></property>
<property name="RandomizeAllSpecs" value="false"></property>
<property name="LabelFilter" value=""></property>
<property name="FocusStrings" value=""></property>
<property name="SkipStrings" value=""></property>
<property name="FocusFiles" value=""></property>
<property name="SkipFiles" value=""></property>
<property name="FailOnPending" value="false"></property>
<property name="FailOnEmpty" value="false"></property>
<property name="FailFast" value="false"></property>
<property name="FlakeAttempts" value="2"></property>
<property name="DryRun" value="false"></property>
<property name="ParallelTotal" value="1"></property>
<property name="OutputInterceptorMode" value=""></property>
</properties>
<testcase name="[ReportAfterSuite] e2e serial suite" classname="Performance Addon Operator configuration" status="passed" time="1.8916e-05">
<system-err>&gt; Enter [ReportAfterSuite] TOP-LEVEL - /home/shajmakh/ghrepo/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/0_config/test_suite_performance_config_test.go:65 @ 05/18/26 15:04:26.157&#xA;&lt; Exit [ReportAfterSuite] TOP-LEVEL - /home/shajmakh/ghrepo/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/0_config/test_suite_performance_config_test.go:65 @ 05/18/26 15:04:26.157 (0s)&#xA;</system-err>
</testcase>
</testsuite>
</testsuites>
79 changes: 70 additions & 9 deletions test/e2e/performanceprofile/functests/4_latency/latency.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,25 @@ import (
)

const (
oslatTestName = "oslat"
cyclictestTestName = "cyclictest"
hwlatdetectTestName = "hwlatdetect"
defaultTestDelay = 0
defaultTestRuntime = "300"
defaultMaxLatency = -1
defaultTestCpus = -1
//tools names
oslatTestName = "oslat"
cyclictestTestName = "cyclictest"
hwlatdetectTestName = "hwlatdetect"

//default values
defaultTestDelay = 0
defaultTestRuntime = "300"
defaultMaxLatency = -1
defaultTestCpus = -1
defaultTestMemory = "1Gi"

//dynamic memory mode values
dynamicMemory = "dynamic"
// 32Mi per requested CPU should be reasonable for the test; if needed more, it can
// be overridden by explicitly setting the environment variable
perCpuMemoryFactor = 32
memoryFactorFormat = "Mi"

minCpuAmountForOslat = 2
)

Expand All @@ -51,13 +63,14 @@ var (
latencyTestRuntime = defaultTestRuntime
maximumLatency = defaultMaxLatency
latencyTestCpus = defaultTestCpus
latencyTestMemory = defaultTestMemory
)

// LATENCY_TEST_DELAY delay the run of the binary, can be useful to give time to the CPU manager reconcile loop
// to update the default CPU pool
// LATENCY_TEST_RUNTIME: the amount of time in seconds that the latency test should run
// LATENCY_TEST_CPUS: the amount of CPUs the pod which run the latency test should request

// LATENCY_TEST_MEMORY: the amount of memory the pod which run the latency test should request
var _ = Describe("[performance] Latency Test", Ordered, func() {
var workerRTNode *corev1.Node
var profile *performancev2.PerformanceProfile
Expand Down Expand Up @@ -278,6 +291,50 @@ func getLatencyTestCpus() (int, error) {
return defaultTestCpus, nil
}

// GetLatencyTestMemory returns the memory limit for the latency test pod based on
// LATENCY_TEST_MEMORY and the CPU count.
// If LATENCY_TEST_MEMORY is unset, it returns defaultTestMemory.
// If set to "dynamic", it returns max(32Mi per CPU, defaultTestMemory); when cpus is unset or
// less than 1, it returns defaultTestMemory without scaling.
// Any other value must be a valid Kubernetes resource quantity (e.g. "512Mi") and is returned as-is.
func GetLatencyTestMemory(cpus int) (string, error) {
if val, ok := os.LookupEnv("LATENCY_TEST_MEMORY"); ok {
if val == dynamicMemory {
// Defensive check: fall back to default memory if the CPU count was not normalized
if cpus == defaultTestCpus || cpus < 1 {
return defaultTestMemory, nil
}

computedInt := perCpuMemoryFactor * cpus

computedQuantity, err := resource.ParseQuantity(fmt.Sprintf("%d%s", computedInt, memoryFactorFormat))
if err != nil {
return "", fmt.Errorf("failed to parse computed quantity %q: %w", fmt.Sprintf("%d%s", computedInt, memoryFactorFormat), err)
}
defaultQuantity, err := resource.ParseQuantity(defaultTestMemory)
if err != nil {
return "", fmt.Errorf("failed to parse default quantity %q: %w", defaultTestMemory, err)
}

maxQuantity := defaultQuantity // floor at defaultTestMemory for backward compatibility
if computedQuantity.Cmp(defaultQuantity) > 0 {
maxQuantity = computedQuantity
}
return maxQuantity.String(), nil
}

q, err := resource.ParseQuantity(val)
if err != nil {
return val, fmt.Errorf("the environment variable LATENCY_TEST_MEMORY has incorrect value %q, it must be a valid quantity: %w", val, err)
}
if q.Sign() <= 0 {
return val, fmt.Errorf("the environment variable LATENCY_TEST_MEMORY has invalid value %q, it must be greater than 0", val)
}
return val, nil
}
return defaultTestMemory, nil
}

// getMaximumLatency should look for one of the following environment variables:
// OSLAT_MAXIMUM_LATENCY: the expected maximum latency for all buckets in us
// CYCLICTEST_MAXIMUM_LATENCY: the expected maximum latency for all buckets in us
Expand Down Expand Up @@ -325,6 +382,10 @@ func getLatencyTestPod(profile *performancev2.PerformanceProfile, node *corev1.N
latencyTestCpus = cpus.Size() - 1
}

var err error
latencyTestMemory, err = GetLatencyTestMemory(latencyTestCpus)
Expect(err).ToNot(HaveOccurred())

latencyTestRunnerArgs := []string{
"-logtostderr=false",
"-alsologtostderr=true",
Expand Down Expand Up @@ -361,7 +422,7 @@ func getLatencyTestPod(profile *performancev2.PerformanceProfile, node *corev1.N
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse(strconv.Itoa(latencyTestCpus)),
corev1.ResourceMemory: resource.MustParse("1Gi"),
corev1.ResourceMemory: resource.MustParse(latencyTestMemory),
},
},
SecurityContext: &corev1.SecurityContext{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package tester

import (
"os"
"testing"

latency "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/4_latency"
)

func TestGetLatencyTestMemory(t *testing.T) {
testcases := []struct {
testName string
envVarValue string
cpus int
expectedMemory string
}{
{
testName: "no env var set should default to 1Gi - low count of CPUs",
envVarValue: "",
cpus: 4,
expectedMemory: "1Gi",
},
{
testName: "no env var set should default to 1Gi - high count of CPUs",
envVarValue: "",
cpus: 50,
expectedMemory: "1Gi",
},
{
testName: "dynamic memory should be 32Mi per CPU with high count of CPUs",
envVarValue: "dynamic",
cpus: 50,
expectedMemory: "1600Mi",
},
{
testName: "explicitly set to 100Mi despite the CPUs count",
envVarValue: "100Mi",
cpus: 50,
expectedMemory: "100Mi",
},
{
testName: "2 CPUs should default to 1Gi",
envVarValue: "dynamic",
cpus: 2,
expectedMemory: "1Gi",
},
{
testName: "unset env var and unset cpus should default to 1Gi",
envVarValue: "",
cpus: -1,
expectedMemory: "1Gi",
},
{
testName: "dynamic memory and unset cpus should default to 1Gi",
envVarValue: "dynamic",
cpus: -1,
expectedMemory: "1Gi",
},
{
testName: "default memory and 0 cpus should default to 1Gi",
envVarValue: "",
cpus: 0,
expectedMemory: "1Gi",
},
}
for _, testcase := range testcases {
t.Run(testcase.testName, func(t *testing.T) {
if testcase.envVarValue != "" {
os.Setenv("LATENCY_TEST_MEMORY", testcase.envVarValue)
}
defer os.Unsetenv("LATENCY_TEST_MEMORY")

memory, err := latency.GetLatencyTestMemory(testcase.cpus)
if err != nil {
t.Fatalf("failed to get latency test memory: %v", err)
}
if memory != testcase.expectedMemory {
t.Fatalf("expected memory %s, got %s", testcase.expectedMemory, memory)
}
})
}
}
Binary file not shown.
Loading