Add e2e tests with gofail panic after scaling in and out#298
Add e2e tests with gofail panic after scaling in and out#298ahrtr merged 1 commit intoetcd-io:mainfrom
Conversation
dccb9a6 to
3ce88ad
Compare
Signed-off-by: Nont <nont@duck.com>
3ce88ad to
848e8c1
Compare
|
/ok-to-test |
This seems to be a flake. I tried adding |
|
/retest |
| tc.expectedMembers, len(ml.Members)) | ||
| } | ||
|
|
||
| waitForNoLearners(t, c, podName, tc.expectedMembers, 3*time.Minute) |
There was a problem hiding this comment.
Should we set this 3*time.Minute (and the one from line 330) as a constant or environment variable? I know you just moved it from another file, but it could be an improvement.
There was a problem hiding this comment.
Yes, let me send a follow up PR maybe tonight my time.
|
[APPROVALNOTIFIER] This PR is APPROVED This pull-request has been approved by: ahrtr, ivanvc, nwnt The full list of commands accepted by this bot can be found here. The pull request process is described here DetailsNeeds approval from an approver in each of these files:
Approvers can indicate their approval by writing |
Fix: #26
Sorry to keep you guys waiting, but this was not as simple as I thought + each e2e run took some time to complete... But anyway, I think this should now achieves what we want for adding fail points as proposed in the issue.
Details for verification
Fail points exist
Logs during scaling in with failpoint enabled
2026-03-02T14:28:40Z ERROR Observed a panic {"controller": "etcdcluster", "controllerGroup": "operator.etcd.io", "controllerKind": "EtcdCluster", "EtcdCluster": {"name":"etcd-scaleinfrom3to1withpanicfailpoint","namespace":"etcd-operator-system"}, "namespace": "etcd-operator-system", "name": "etcd-scaleinfrom3to1withpanicfailpoint", "reconcileID": "f8421f6c-cd29-4b40-bec1-e86bfcc6ac4d", "panic": "failpoint panic: {}", "stacktrace": "goroutine 183 [running]:\nk8s.io/apimachinery/pkg/util/runtime.logPanic({0x23c46c8, 0x8b9f7172cf0}, {0x1de9aa0, 0x8b9f6c57c50})\n\t/go/pkg/mod/k8s.io/apimachinery@v0.35.1/pkg/util/runtime/runtime.go:132 +0xbc\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile.func1()\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:202 +0x103\npanic({0x1de9aa0?, 0x8b9f6c57c50?})\n\t/usr/local/go/src/runtime/panic.go:860 +0x13a\ngo.etcd.io/gofail/runtime.actPanic(0x0?)\n\t/go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/terms.go:324 +0xc5\ngo.etcd.io/gofail/runtime.(*term).do(...)\n\t/go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/terms.go:293\ngo.etcd.io/gofail/runtime.(*terms).eval(0x8b9f6c70640)\n\t/go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/terms.go:108 +0xde\ngo.etcd.io/gofail/runtime.(*Failpoint).Acquire(0x8b9f72dc600?)\n\t/go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/failpoint.go:45 +0x6e\ngo.etcd.io/etcd-operator/internal/controller.(*EtcdClusterReconciler).reconcileClusterState(0x8b9f570b700, {0x23c46c8, 0x8b9f7172cf0}, 0x8b9f7172ed0)\n\t/workspace/internal/controller/etcdcluster_controller.go:365 +0x6be\ngo.etcd.io/etcd-operator/internal/controller.(*EtcdClusterReconciler).Reconcile(0x8b9f570b700, {0x23c46c8, 0x8b9f7172cf0}, {{{0x8b9f72f3590?, 0x16cef05?}, {0x8b9f7308ff0?, 0x0?}}})\n\t/workspace/internal/controller/etcdcluster_controller.go:96 +0x116\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile(0x8b9f7172c60, {0x23c46c8?, 0x8b9f7172cf0}, {{{0x8b9f72f3590, 0x0?}, {0x8b9f7308ff0?, 0x0?}}})\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:222 +0x1ab\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler(0x23f21a0, {0x23c4700, 0x8b9f548c690}, {{{0x8b9f72f3590, 0x14}, {0x8b9f7308ff0, 0x26}}}, 0x0)\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:479 +0x39b\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem(0x23f21a0, {0x23c4700, 0x8b9f548c690})\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:438 +0x1f8\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func1.1()\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:313 +0x85\ncreated by sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func1 in goroutine 110\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:309 +0x26b\n"} k8s.io/apimachinery/pkg/util/runtime.logPanic /go/pkg/mod/k8s.io/apimachinery@v0.35.1/pkg/util/runtime/runtime.go:140 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile.func1 /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:202 runtime.gopanic /usr/local/go/src/runtime/panic.go:860 go.etcd.io/gofail/runtime.actPanic /go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/terms.go:324 go.etcd.io/gofail/runtime.(*term).do /go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/terms.go:293 go.etcd.io/gofail/runtime.(*terms).eval /go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/terms.go:108 go.etcd.io/gofail/runtime.(*Failpoint).Acquire /go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/failpoint.go:45 go.etcd.io/etcd-operator/internal/controller.(*EtcdClusterReconciler).reconcileClusterState /workspace/internal/controller/etcdcluster_controller.go:365 go.etcd.io/etcd-operator/internal/controller.(*EtcdClusterReconciler).Reconcile /workspace/internal/controller/etcdcluster_controller.go:96 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:222 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:479 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:438 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func1.1 /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:313 2026-03-02T14:28:40Z ERROR Reconciler error {"controller": "etcdcluster", "controllerGroup": "operator.etcd.io", "controllerKind": "EtcdCluster", "EtcdCluster": {"name":"etcd-scaleinfrom3to1withpanicfailpoint","namespace":"etcd-operator-system"}, "namespace": "etcd-operator-system", "name": "etcd-scaleinfrom3to1withpanicfailpoint", "reconcileID": "f8421f6c-cd29-4b40-bec1-e86bfcc6ac4d", "error": "panic: failpoint panic: {} [recovered]"} sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:495 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:438 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func1.1 /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:313Logs during scaling out with failpoint enabled
2026-03-02T14:30:30Z ERROR Observed a panic {"controller": "etcdcluster", "controllerGroup": "operator.etcd.io", "controllerKind": "EtcdCluster", "EtcdCluster": {"name":"etcd-scaleoutfrom1to3withpanicfailpoint","namespace":"etcd-operator-system"}, "namespace": "etcd-operator-system", "name": "etcd-scaleoutfrom1to3withpanicfailpoint", "reconcileID": "769c23b3-4174-442a-a192-a0b50ccb17c1", "panic": "failpoint panic: {}", "stacktrace": "goroutine 183 [running]:\nk8s.io/apimachinery/pkg/util/runtime.logPanic({0x23c46c8, 0x8b9f6c98c90}, {0x1de9aa0, 0x8b9f58fd230})\n\t/go/pkg/mod/k8s.io/apimachinery@v0.35.1/pkg/util/runtime/runtime.go:132 +0xbc\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile.func1()\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:202 +0x103\npanic({0x1de9aa0?, 0x8b9f58fd230?})\n\t/usr/local/go/src/runtime/panic.go:860 +0x13a\ngo.etcd.io/gofail/runtime.actPanic(0x0?)\n\t/go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/terms.go:324 +0xc5\ngo.etcd.io/gofail/runtime.(*term).do(...)\n\t/go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/terms.go:293\ngo.etcd.io/gofail/runtime.(*terms).eval(0x8b9f729d720)\n\t/go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/terms.go:108 +0xde\ngo.etcd.io/gofail/runtime.(*Failpoint).Acquire(0x23cfe48?)\n\t/go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/failpoint.go:45 +0x6e\ngo.etcd.io/etcd-operator/internal/controller.(*EtcdClusterReconciler).reconcileClusterState(0x8b9f570b700, {0x23c46c8, 0x8b9f6c98c90}, 0x8b9f6c98e10)\n\t/workspace/internal/controller/etcdcluster_controller.go:343 +0xa45\ngo.etcd.io/etcd-operator/internal/controller.(*EtcdClusterReconciler).Reconcile(0x8b9f570b700, {0x23c46c8, 0x8b9f6c98c90}, {{{0x8b9f72f2510?, 0x16cef05?}, {0x8b9f73472c0?, 0x0?}}})\n\t/workspace/internal/controller/etcdcluster_controller.go:96 +0x116\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile(0x8b9f6c98c00, {0x23c46c8?, 0x8b9f6c98c90}, {{{0x8b9f72f2510, 0x0?}, {0x8b9f73472c0?, 0x0?}}})\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:222 +0x1ab\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler(0x23f21a0, {0x23c4700, 0x8b9f548c690}, {{{0x8b9f72f2510, 0x14}, {0x8b9f73472c0, 0x27}}}, 0x0)\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:479 +0x39b\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem(0x23f21a0, {0x23c4700, 0x8b9f548c690})\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:438 +0x1f8\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func1.1()\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:313 +0x85\ncreated by sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func1 in goroutine 110\n\t/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:309 +0x26b\n"} k8s.io/apimachinery/pkg/util/runtime.logPanic /go/pkg/mod/k8s.io/apimachinery@v0.35.1/pkg/util/runtime/runtime.go:140 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile.func1 /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:202 runtime.gopanic /usr/local/go/src/runtime/panic.go:860 go.etcd.io/gofail/runtime.actPanic /go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/terms.go:324 go.etcd.io/gofail/runtime.(*term).do /go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/terms.go:293 go.etcd.io/gofail/runtime.(*terms).eval /go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/terms.go:108 go.etcd.io/gofail/runtime.(*Failpoint).Acquire /go/pkg/mod/go.etcd.io/gofail@v0.2.0/runtime/failpoint.go:45 go.etcd.io/etcd-operator/internal/controller.(*EtcdClusterReconciler).reconcileClusterState /workspace/internal/controller/etcdcluster_controller.go:343 go.etcd.io/etcd-operator/internal/controller.(*EtcdClusterReconciler).Reconcile /workspace/internal/controller/etcdcluster_controller.go:96 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:222 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:479 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:438 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func1.1 /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:313 2026-03-02T14:30:30Z ERROR Reconciler error {"controller": "etcdcluster", "controllerGroup": "operator.etcd.io", "controllerKind": "EtcdCluster", "EtcdCluster": {"name":"etcd-scaleoutfrom1to3withpanicfailpoint","namespace":"etcd-operator-system"}, "namespace": "etcd-operator-system", "name": "etcd-scaleoutfrom1to3withpanicfailpoint", "reconcileID": "769c23b3-4174-442a-a192-a0b50ccb17c1", "error": "panic: failpoint panic: {} [recovered]"} sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:495 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:438 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func1.1 /go/pkg/mod/sigs.k8s.io/controller-runtime@v0.23.1/pkg/internal/controller/controller.go:313cc @ahrtr, @ivanvc and @ArkaSaha30