diff --git a/ci-operator/config/openshift/kubernetes-nmstate/openshift-kubernetes-nmstate-main.yaml b/ci-operator/config/openshift/kubernetes-nmstate/openshift-kubernetes-nmstate-main.yaml index 104aa4cbdbe93..65bc4cca42c0d 100644 --- a/ci-operator/config/openshift/kubernetes-nmstate/openshift-kubernetes-nmstate-main.yaml +++ b/ci-operator/config/openshift/kubernetes-nmstate/openshift-kubernetes-nmstate-main.yaml @@ -94,6 +94,12 @@ tests: IP_STACK=v4 NETWORK_TYPE="OVNKubernetes" MIRROR_IMAGES=false + observers: + enable: + - observers-resource-watch + test: + - ref: kubernetes-nmstate-e2e-operator + - ref: kubernetes-nmstate-e2e-monitor-tests workflow: kubernetes-nmstate-e2e-operator - always_run: false as: e2e-handler-azure diff --git a/ci-operator/step-registry/kubernetes-nmstate/e2e/monitor-tests/OWNERS b/ci-operator/step-registry/kubernetes-nmstate/e2e/monitor-tests/OWNERS new file mode 100644 index 0000000000000..1e2e314796fd9 --- /dev/null +++ b/ci-operator/step-registry/kubernetes-nmstate/e2e/monitor-tests/OWNERS @@ -0,0 +1,11 @@ +approvers: +- cybertron +- phoracek +- qinqon +- rhrazdil +reviewers: +- cybertron +- phoracek +- qinqon +- ramlavi +- rhrazdil diff --git a/ci-operator/step-registry/kubernetes-nmstate/e2e/monitor-tests/kubernetes-nmstate-e2e-monitor-tests-commands.sh b/ci-operator/step-registry/kubernetes-nmstate/e2e/monitor-tests/kubernetes-nmstate-e2e-monitor-tests-commands.sh new file mode 100755 index 0000000000000..055b7cdb327c6 --- /dev/null +++ b/ci-operator/step-registry/kubernetes-nmstate/e2e/monitor-tests/kubernetes-nmstate-e2e-monitor-tests-commands.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +set -euo pipefail + +export KUBECONFIG=${SHARED_DIR}/kubeconfig + +MONITOR_DIR="${ARTIFACT_DIR}/monitor-tests" +mkdir -p "${MONITOR_DIR}" + +DISABLE_ARGS="" +if [[ -n "${DISABLED_MONITOR_TESTS:-}" ]]; then + DISABLE_ARGS="--disable-monitor=${DISABLED_MONITOR_TESTS}" +fi + +echo "Starting openshift-tests run-monitor..." +openshift-tests run-monitor ${DISABLE_ARGS} --artifact-dir "${MONITOR_DIR}" & +MONITOR_PID=$! + +echo "Monitoring cluster for ${MONITOR_DURATION} seconds..." +sleep "${MONITOR_DURATION}" + +echo "Stopping monitor and evaluating results..." +kill -TERM "${MONITOR_PID}" 2>/dev/null || true + +set +e +wait "${MONITOR_PID}" +monitor_exit=$? +set -e + +echo "Monitor exited with code: ${monitor_exit}" + +# Copy junit results to artifact dir for CI reporting +find "${MONITOR_DIR}" -name "junit*.xml" -exec cp {} "${ARTIFACT_DIR}/" \; 2>/dev/null || true + +# Exit code 143 (128+15) means SIGTERM was not caught by the process. +# A well-behaved openshift-tests handles SIGTERM, evaluates results, and exits +# with 0 (pass) or non-zero (failures detected). +if [ "${monitor_exit}" -ne 0 ] && [ "${monitor_exit}" -ne 143 ]; then + echo "ERROR: Monitor tests detected failures (exit code: ${monitor_exit})" + exit 1 +fi + +echo "Monitor tests completed successfully." diff --git a/ci-operator/step-registry/kubernetes-nmstate/e2e/monitor-tests/kubernetes-nmstate-e2e-monitor-tests-ref.metadata.json b/ci-operator/step-registry/kubernetes-nmstate/e2e/monitor-tests/kubernetes-nmstate-e2e-monitor-tests-ref.metadata.json new file mode 100644 index 0000000000000..05e72c284a0aa --- /dev/null +++ b/ci-operator/step-registry/kubernetes-nmstate/e2e/monitor-tests/kubernetes-nmstate-e2e-monitor-tests-ref.metadata.json @@ -0,0 +1,18 @@ +{ + "path": "kubernetes-nmstate/e2e/monitor-tests/kubernetes-nmstate-e2e-monitor-tests-ref.yaml", + "owners": { + "approvers": [ + "cybertron", + "phoracek", + "qinqon", + "rhrazdil" + ], + "reviewers": [ + "cybertron", + "phoracek", + "qinqon", + "ramlavi", + "rhrazdil" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/kubernetes-nmstate/e2e/monitor-tests/kubernetes-nmstate-e2e-monitor-tests-ref.yaml b/ci-operator/step-registry/kubernetes-nmstate/e2e/monitor-tests/kubernetes-nmstate-e2e-monitor-tests-ref.yaml new file mode 100644 index 0000000000000..3c48a15e9e6aa --- /dev/null +++ b/ci-operator/step-registry/kubernetes-nmstate/e2e/monitor-tests/kubernetes-nmstate-e2e-monitor-tests-ref.yaml @@ -0,0 +1,23 @@ +ref: + as: kubernetes-nmstate-e2e-monitor-tests + from: tests + commands: kubernetes-nmstate-e2e-monitor-tests-commands.sh + env: + - name: MONITOR_DURATION + default: "60" + documentation: |- + Duration in seconds to run the monitor tests before evaluating results. + - name: DISABLED_MONITOR_TESTS + default: "" + documentation: |- + Comma-separated list of monitor tests to disable. Example: + "apiserver-new-disruption-invariant,disruption-summary-serializer" + resources: + requests: + cpu: 100m + memory: 200Mi + timeout: 30m0s + documentation: |- + Runs openshift-tests run-monitor for a configurable duration and fails the + job if any monitor tests detect issues such as pods using the default service + account, API server disruptions, or other cluster state violations.