@@ -14,6 +14,7 @@ import (
1414 "os/exec"
1515 "path/filepath"
1616 "reflect"
17+ "strconv"
1718 "strings"
1819 "time"
1920
@@ -88,6 +89,13 @@ func RegisterSteps(sc *godog.ScenarioContext) {
8889 sc .Step (`^(?i)resource apply fails with error msg containing "([^"]+)"$` , ResourceApplyFails )
8990 sc .Step (`^(?i)resource "([^"]+)" is eventually restored$` , ResourceRestored )
9091 sc .Step (`^(?i)resource "([^"]+)" matches$` , ResourceMatches )
92+ sc .Step (`^(?i)user performs rollout restart on "([^"]+)"$` , UserPerformsRolloutRestart )
93+ sc .Step (`^(?i)resource "([^"]+)" has restart annotation$` , ResourceHasRestartAnnotation )
94+ sc .Step (`^(?i)deployment "([^"]+)" is ready$` , DeploymentIsReady )
95+ sc .Step (`^(?i)deployment "([^"]+)" rollout completes successfully$` , DeploymentRolloutCompletesSuccessfully )
96+ sc .Step (`^(?i)I wait for "([^"]+)" seconds$` , WaitForSeconds )
97+ sc .Step (`^(?i)deployment "([^"]+)" rollout is still successful$` , DeploymentRolloutIsStillSuccessful )
98+ sc .Step (`^(?i)deployment "([^"]+)" has expected number of ready replicas$` , DeploymentHasExpectedReadyReplicas )
9199
92100 sc .Step (`^(?i)ServiceAccount "([^"]*)" with needed permissions is available in test namespace$` , ServiceAccountWithNeededPermissionsIsAvailableInNamespace )
93101 sc .Step (`^(?i)ServiceAccount "([^"]*)" with needed permissions is available in \${TEST_NAMESPACE}$` , ServiceAccountWithNeededPermissionsIsAvailableInNamespace )
@@ -1288,3 +1296,265 @@ func latestActiveRevisionForExtension(extName string) (*ocv1.ClusterExtensionRev
12881296
12891297 return latest , nil
12901298}
1299+
1300+ // UserPerformsRolloutRestart simulates a user running "kubectl rollout restart deployment/<name>".
1301+ // This adds a restart annotation to trigger a rolling restart of pods.
1302+ // This is used to test the generic fix - OLM should not undo ANY user-added annotations.
1303+ // In OLMv0, OLM would undo this change. In OLMv1, it should stay because kubectl owns it.
1304+ // See: https://github.com/operator-framework/operator-lifecycle-manager/issues/3392
1305+ func UserPerformsRolloutRestart (ctx context.Context , resourceName string ) error {
1306+ sc := scenarioCtx (ctx )
1307+ resourceName = substituteScenarioVars (resourceName , sc )
1308+
1309+ kind , deploymentName , ok := strings .Cut (resourceName , "/" )
1310+ if ! ok {
1311+ return fmt .Errorf ("invalid resource name format: %s (expected kind/name)" , resourceName )
1312+ }
1313+
1314+ if kind != "deployment" {
1315+ return fmt .Errorf ("only deployment resources are supported for restart annotation, got: %s" , kind )
1316+ }
1317+
1318+ // Run kubectl rollout restart to add the restart annotation.
1319+ // This is the real command users run, so we test actual user behavior.
1320+ out , err := k8sClient ("rollout" , "restart" , resourceName , "-n" , sc .namespace )
1321+ if err != nil {
1322+ return fmt .Errorf ("failed to rollout restart %s: %w" , resourceName , err )
1323+ }
1324+
1325+ logger .V (1 ).Info ("Rollout restart initiated" , "deployment" , deploymentName , "output" , out )
1326+
1327+ return nil
1328+ }
1329+
1330+ // ResourceHasRestartAnnotation checks that a deployment has a restart annotation.
1331+ // This confirms that user changes stay in place after OLM runs.
1332+ // The fix is generic and works for ANY user-added annotations on pod templates.
1333+ func ResourceHasRestartAnnotation (ctx context.Context , resourceName string ) error {
1334+ sc := scenarioCtx (ctx )
1335+ resourceName = substituteScenarioVars (resourceName , sc )
1336+
1337+ kind , deploymentName , ok := strings .Cut (resourceName , "/" )
1338+ if ! ok {
1339+ return fmt .Errorf ("invalid resource name format: %s (expected kind/name)" , resourceName )
1340+ }
1341+
1342+ if kind != "deployment" {
1343+ return fmt .Errorf ("only deployment resources are supported for restart annotation check, got: %s" , kind )
1344+ }
1345+
1346+ // Look for the restart annotation added by "kubectl rollout restart"
1347+ restartAnnotationKey := "kubectl.kubernetes.io/restartedAt"
1348+
1349+ // Keep checking until the annotation appears (it may not show up immediately)
1350+ // This is better than checking only once
1351+ var annotationValue string
1352+ waitFor (ctx , func () bool {
1353+ out , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1354+ "-o" , fmt .Sprintf ("jsonpath={.spec.template.metadata.annotations['%s']}" , restartAnnotationKey ))
1355+ if err != nil {
1356+ return false
1357+ }
1358+ // If the annotation exists and has a value, it stayed in place
1359+ if out == "" {
1360+ return false
1361+ }
1362+ annotationValue = out
1363+ return true
1364+ })
1365+
1366+ logger .V (1 ).Info ("Restart annotation found" , "deployment" , deploymentName , "restartedAt" , annotationValue )
1367+ return nil
1368+ }
1369+
1370+ // DeploymentIsReady checks that a deployment is ready with all pods running.
1371+ func DeploymentIsReady (ctx context.Context , deploymentName string ) error {
1372+ sc := scenarioCtx (ctx )
1373+ deploymentName = substituteScenarioVars (deploymentName , sc )
1374+
1375+ waitFor (ctx , func () bool {
1376+ // Check if deployment is ready
1377+ out , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1378+ "-o" , "jsonpath={.status.conditions[?(@.type=='Available')].status}" )
1379+ if err != nil {
1380+ return false
1381+ }
1382+ return out == "True"
1383+ })
1384+
1385+ logger .V (1 ).Info ("Deployment is ready" , "deployment" , deploymentName )
1386+ return nil
1387+ }
1388+
1389+ // DeploymentRolloutCompletesSuccessfully waits for the rollout to finish.
1390+ // This checks that new pods were created and are running.
1391+ func DeploymentRolloutCompletesSuccessfully (ctx context.Context , deploymentName string ) error {
1392+ sc := scenarioCtx (ctx )
1393+ deploymentName = substituteScenarioVars (deploymentName , sc )
1394+
1395+ // Use kubectl rollout status to wait until done.
1396+ // This makes sure new pods are created and running.
1397+ // Timeout is 7m to account for startup probes (test-operator has 5min startup probe)
1398+ out , err := k8sClient ("rollout" , "status" , "deployment/" + deploymentName , "-n" , sc .namespace , "--timeout=7m" )
1399+ if err != nil {
1400+ return fmt .Errorf ("deployment rollout failed: %w, output: %s" , err , out )
1401+ }
1402+
1403+ logger .V (1 ).Info ("Deployment rollout completed" , "deployment" , deploymentName , "status" , out )
1404+
1405+ // Check deployment status
1406+ available , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1407+ "-o" , "jsonpath={.status.conditions[?(@.type=='Available')].status}" )
1408+ if err != nil {
1409+ return fmt .Errorf ("failed to check deployment availability: %w" , err )
1410+ }
1411+ if available != "True" {
1412+ return fmt .Errorf ("deployment %s is not available" , deploymentName )
1413+ }
1414+
1415+ progressing , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1416+ "-o" , "jsonpath={.status.conditions[?(@.type=='Progressing')].status}" )
1417+ if err != nil {
1418+ return fmt .Errorf ("failed to check deployment progressing: %w" , err )
1419+ }
1420+ if progressing != "True" {
1421+ return fmt .Errorf ("deployment %s is not progressing correctly" , deploymentName )
1422+ }
1423+
1424+ return nil
1425+ }
1426+
1427+ // WaitForSeconds waits for the given number of seconds.
1428+ // This gives OLM time to run its checks between test steps.
1429+ //
1430+ // Note: We wait for a fixed time (not checking in a loop) because we need to make sure
1431+ // OLM actually runs (it runs every 10 seconds). We want to check that user changes stay
1432+ // AFTER OLM runs. If we just checked in a loop, we wouldn't know if OLM ran yet.
1433+ func WaitForSeconds (ctx context.Context , seconds string ) error {
1434+ sec , err := strconv .Atoi (seconds )
1435+ if err != nil {
1436+ return fmt .Errorf ("invalid seconds value %s: %w" , seconds , err )
1437+ }
1438+
1439+ if sec <= 0 {
1440+ return fmt .Errorf ("seconds value must be greater than 0, got %d" , sec )
1441+ }
1442+
1443+ logger .V (1 ).Info ("Waiting for reconciliation" , "seconds" , sec )
1444+
1445+ // Use select so the wait can be stopped if needed
1446+ dur := time .Duration (sec ) * time .Second
1447+ select {
1448+ case <- time .After (dur ):
1449+ logger .V (1 ).Info ("Wait complete" )
1450+ return nil
1451+ case <- ctx .Done ():
1452+ return fmt .Errorf ("wait canceled: %w" , ctx .Err ())
1453+ }
1454+ }
1455+
1456+ // verifyDeploymentReplicaStatus checks if a deployment has the right number of ready pods.
1457+ func verifyDeploymentReplicaStatus (deploymentName , namespace string ) (string , string , error ) {
1458+ readyReplicas , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , namespace ,
1459+ "-o" , "jsonpath={.status.readyReplicas}" )
1460+ if err != nil {
1461+ return "" , "" , fmt .Errorf ("failed to get ready replicas: %w" , err )
1462+ }
1463+
1464+ replicas , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , namespace ,
1465+ "-o" , "jsonpath={.spec.replicas}" )
1466+ if err != nil {
1467+ return "" , "" , fmt .Errorf ("failed to get desired replicas: %w" , err )
1468+ }
1469+
1470+ // Normalize empty jsonpath results (when readyReplicas is omitted for 0 replicas)
1471+ readyReplicas = strings .TrimSpace (readyReplicas )
1472+ replicas = strings .TrimSpace (replicas )
1473+ if readyReplicas == "" {
1474+ readyReplicas = "0"
1475+ }
1476+ if replicas == "" {
1477+ replicas = "0"
1478+ }
1479+
1480+ // Compare as integers to avoid false negatives
1481+ readyInt , err := strconv .Atoi (readyReplicas )
1482+ if err != nil {
1483+ return readyReplicas , replicas , fmt .Errorf ("invalid ready replicas value %q: %w" , readyReplicas , err )
1484+ }
1485+ replicasInt , err := strconv .Atoi (replicas )
1486+ if err != nil {
1487+ return readyReplicas , replicas , fmt .Errorf ("invalid desired replicas value %q: %w" , replicas , err )
1488+ }
1489+
1490+ if readyInt != replicasInt {
1491+ return readyReplicas , replicas , fmt .Errorf ("deployment %s has %d ready replicas but expected %d" ,
1492+ deploymentName , readyInt , replicasInt )
1493+ }
1494+
1495+ return readyReplicas , replicas , nil
1496+ }
1497+
1498+ // DeploymentRolloutIsStillSuccessful checks that the rollout is still working.
1499+ // This makes sure OLM didn't undo the user's "kubectl rollout restart" command.
1500+ // It checks that new pods are still running.
1501+ func DeploymentRolloutIsStillSuccessful (ctx context.Context , deploymentName string ) error {
1502+ sc := scenarioCtx (ctx )
1503+ deploymentName = substituteScenarioVars (deploymentName , sc )
1504+
1505+ // Check deployment status
1506+ available , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1507+ "-o" , "jsonpath={.status.conditions[?(@.type=='Available')].status}" )
1508+ if err != nil {
1509+ return fmt .Errorf ("failed to check deployment availability: %w" , err )
1510+ }
1511+ if available != "True" {
1512+ return fmt .Errorf ("deployment %s is not available - rollout was undone" , deploymentName )
1513+ }
1514+
1515+ // Check that the deployment is still working correctly
1516+ progressing , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1517+ "-o" , "jsonpath={.status.conditions[?(@.type=='Progressing')].status}" )
1518+ if err != nil {
1519+ return fmt .Errorf ("failed to check deployment progressing: %w" , err )
1520+ }
1521+ if progressing != "True" {
1522+ return fmt .Errorf ("deployment %s is not working - rollout may have been undone" , deploymentName )
1523+ }
1524+
1525+ // Check that the right number of pods are ready (rollout finished and wasn't stopped)
1526+ readyReplicas , replicas , err := verifyDeploymentReplicaStatus (deploymentName , sc .namespace )
1527+ if err != nil {
1528+ return fmt .Errorf ("%w - rollout may have been undone" , err )
1529+ }
1530+
1531+ logger .V (1 ).Info ("Deployment rollout is still successful" , "deployment" , deploymentName ,
1532+ "readyReplicas" , readyReplicas , "desiredReplicas" , replicas )
1533+
1534+ return nil
1535+ }
1536+
1537+ // DeploymentHasExpectedReadyReplicas checks that the deployment has the right number of ready pods.
1538+ // This makes sure the rollout finished successfully and pods are running.
1539+ func DeploymentHasExpectedReadyReplicas (ctx context.Context , deploymentName string ) error {
1540+ sc := scenarioCtx (ctx )
1541+ deploymentName = substituteScenarioVars (deploymentName , sc )
1542+
1543+ // Check that the right number of pods are ready
1544+ readyReplicas , replicas , err := verifyDeploymentReplicaStatus (deploymentName , sc .namespace )
1545+ if err != nil {
1546+ return err
1547+ }
1548+
1549+ // Also check that there are no unavailable pods
1550+ unavailableReplicas , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1551+ "-o" , "jsonpath={.status.unavailableReplicas}" )
1552+ if err == nil && unavailableReplicas != "" && unavailableReplicas != "0" {
1553+ return fmt .Errorf ("deployment %s has %s unavailable pods" , deploymentName , unavailableReplicas )
1554+ }
1555+
1556+ logger .V (1 ).Info ("Deployment has expected ready replicas" , "deployment" , deploymentName ,
1557+ "readyReplicas" , readyReplicas , "desiredReplicas" , replicas )
1558+
1559+ return nil
1560+ }
0 commit comments