From 4edf4a94400bc8056b737ec1cfa19c16e5e3aa99 Mon Sep 17 00:00:00 2001 From: Michael Weibel Date: Fri, 8 May 2026 13:36:07 +0200 Subject: [PATCH 1/3] chore: improve integration test reliability - ensure deployment is ready before continuing - retry cleanup in case API is temporarily down during cleanup - use pre-pushed container image instead of cloning/building/running in every pod on every integration test --- .github/workflows/http-echo-image.yml | 2 +- cmd/http-echo/Dockerfile | 2 +- helpers/cleanup | 18 ++++++-- pkg/internal/integration/service_test.go | 56 ++++++++++++++++-------- 4 files changed, 54 insertions(+), 24 deletions(-) diff --git a/.github/workflows/http-echo-image.yml b/.github/workflows/http-echo-image.yml index b25cc6a..7547d7f 100644 --- a/.github/workflows/http-echo-image.yml +++ b/.github/workflows/http-echo-image.yml @@ -39,4 +39,4 @@ jobs: file: cmd/http-echo/Dockerfile push: true tags: | - ghcr.io/cloudscale-ch/cloudscale-cloud-controller-manager/http-echo:latest \ No newline at end of file + ghcr.io/cloudscale-ch/cloudscale-cloud-controller-manager/http-echo:latest diff --git a/cmd/http-echo/Dockerfile b/cmd/http-echo/Dockerfile index 4a6bf0c..9fe39d4 100644 --- a/cmd/http-echo/Dockerfile +++ b/cmd/http-echo/Dockerfile @@ -21,4 +21,4 @@ FROM gcr.io/distroless/static-debian11 COPY --from=build /http-echo /http-echo -ENTRYPOINT ["/http-echo"] \ No newline at end of file +ENTRYPOINT ["/http-echo"] diff --git a/helpers/cleanup b/helpers/cleanup index b8392f4..94c1afe 100755 --- a/helpers/cleanup +++ b/helpers/cleanup @@ -10,6 +10,18 @@ export CLUSTER_PREFIX="${CLUSTER_PREFIX-k8test}" source helpers/run-in-test-cluster ensure-k8test > /dev/null -k8test/playbooks/destroy-cluster.yml \ - -i k8test/cluster/inventory.yml \ - -e cluster_prefix="$CLUSTER_PREFIX" +for attempt in 1 2 3; do + echo "Destroy attempt ${attempt}..." + + if k8test/playbooks/destroy-cluster.yml \ + -i k8test/cluster/inventory.yml \ + -e cluster_prefix="$CLUSTER_PREFIX"; then + exit 0 + fi + + echo "Cleanup failed, retrying in 60s..." + sleep 60 +done + +echo "Cleanup failed after 3 attempts." +exit 1 diff --git a/pkg/internal/integration/service_test.go b/pkg/internal/integration/service_test.go index e78bb29..d547e51 100644 --- a/pkg/internal/integration/service_test.go +++ b/pkg/internal/integration/service_test.go @@ -74,6 +74,33 @@ func (s *IntegrationTestSuite) CreateDeployment( s.Require().NoError(err) } +// AwaitDeploymentReady waits for all deployment replicas to be ready. +func (s *IntegrationTestSuite) AwaitDeploymentReady(name string, timeout time.Duration) { + s.T().Log("Waiting for deployment", name, "to be ready") + + err := wait.PollUntilContextTimeout( + context.Background(), + 1*time.Second, + timeout, + true, + func(ctx context.Context) (bool, error) { + deployment, err := s.k8s.AppsV1().Deployments(s.ns).Get( + ctx, name, metav1.GetOptions{}) + if err != nil { + return false, err + } + + ready := deployment.Status.ReadyReplicas + expected := *deployment.Spec.Replicas + + s.T().Logf("Deployment %s: %d/%d replicas ready", name, ready, expected) + return ready == expected, nil + }, + ) + + s.Require().NoError(err, "deployment %s did not become ready within %v", name, timeout) +} + func (s *IntegrationTestSuite) CreateConfigMap(name string, data map[string]string) { _, err := s.k8s.CoreV1().ConfigMaps(s.ns).Create( context.Background(), @@ -1084,29 +1111,20 @@ func (s *IntegrationTestSuite) TestFloatingIPConflicts() { func (s *IntegrationTestSuite) TestServiceProxyProtocol() { - // Get the branch to run http-echo with (in the future, we might - // offer this in a separate container). - branch := os.Getenv("HTTP_ECHO_BRANCH") - if len(branch) == 0 { - branch = "main" + // Deploy our http-echo server to check for proxy connections + httpEchoImage := os.Getenv("HTTP_ECHO_IMAGE") + if httpEchoImage == "" { + httpEchoImage = "ghcr.io/cloudscale-ch/cloudscale-cloud-controller-manager/http-echo:latest" } - // Deploy our http-echo server to check for proxy connections - s.T().Log("Creating http-echo deployment", "branch", branch) - s.CreateDeployment("http-echo", "docker.io/golang", 2, v1.ProtocolTCP, 80, func(spec *appsv1.DeploymentSpec) { - spec.Template.Spec.Containers[0].Command = []string{"bash"} - spec.Template.Spec.Containers[0].Args = []string{ - "-c", - fmt.Sprintf(` - git clone https://github.com/cloudscale-ch/cloudscale-cloud-controller-manager ccm; - cd ccm; - git checkout %s || exit 1; - cd cmd/http-echo; - go run main.go -host 0.0.0.0 -port 80 - `, branch), - } + s.T().Log("Creating http-echo deployment", "image", httpEchoImage) + s.CreateDeployment("http-echo", httpEchoImage, 2, v1.ProtocolTCP, 80, func(spec *appsv1.DeploymentSpec) { + spec.Template.Spec.Containers[0].Args = []string{"-host", "0.0.0.0", "-port", "80"} }) + // Wait for all replicas to be ready + s.AwaitDeploymentReady("http-echo", 120*time.Second) + // Expose the deployment using a LoadBalancer service s.ExposeDeployment("http-echo", map[string]string{ "k8s.cloudscale.ch/loadbalancer-pool-protocol": "proxy", From bab2f87e783fd0ae852ad261a377cdb11c72a1ca Mon Sep 17 00:00:00 2001 From: Michael Weibel Date: Fri, 8 May 2026 15:01:36 +0200 Subject: [PATCH 2/3] chore: clean up services before API resources ensures that clean up works the right way. --- pkg/internal/integration/main_test.go | 85 ++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 16 deletions(-) diff --git a/pkg/internal/integration/main_test.go b/pkg/internal/integration/main_test.go index 8fecef0..e724127 100644 --- a/pkg/internal/integration/main_test.go +++ b/pkg/internal/integration/main_test.go @@ -4,6 +4,7 @@ package integration import ( "context" + "errors" "fmt" "log" "math/rand" @@ -146,44 +147,96 @@ func (s *IntegrationTestSuite) CreateRegionalFloatingIP(region string) ( return ip, nil } +func (s *IntegrationTestSuite) deleteServices(ctx context.Context) error { + svcs, err := s.k8s.CoreV1().Services(s.ns).List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("listing services in namespace %s failed: %w", s.ns, err) + } + + for _, svc := range svcs.Items { + if svc.Spec.Type != v1.ServiceTypeLoadBalancer { + continue + } + err := s.k8s.CoreV1().Services(s.ns).Delete( + ctx, + svc.Name, + metav1.DeleteOptions{}, + ) + if err != nil { + s.T().Logf("deleting loadbalancer service %s in namespace %s failed: %s", svc.Name, s.ns, err) + } + } + + // Wait for loadbalancer services to be deleted + err = wait.PollUntilContextCancel(ctx, 1*time.Second, true, + func(ctx context.Context) (bool, error) { + svcs, err := s.k8s.CoreV1().Services(s.ns).List(ctx, metav1.ListOptions{}) + if err != nil { + return false, err + } + for _, svc := range svcs.Items { + if svc.Spec.Type == v1.ServiceTypeLoadBalancer { + return false, nil + } + } + return true, nil + }) + if err != nil { + return fmt.Errorf("took too long to delete loadbalancer services in namespace %s: %w", s.ns, err) + } + + return nil +} + func (s *IntegrationTestSuite) TearDownTest() { - errors := 0 + errCount := 0 + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + // 1. Delete services + svcCtx, svcCancel := context.WithTimeout(ctx, 3*time.Minute) + if err := s.deleteServices(svcCtx); err != nil { + s.T().Logf("failed to delete services: %s", err) + errCount++ + } + svcCancel() + + // 2. Delete API resources if s.resources != nil { for _, url := range s.resources { req, err := s.api.NewRequest( - context.Background(), http.MethodDelete, url, nil) + ctx, http.MethodDelete, url, nil) if err != nil { s.T().Logf("preparing to delete %s failed: %s", url, err) - errors++ + errCount++ } - err = s.api.Do(context.Background(), req, nil) + err = s.api.Do(ctx, req, nil) if err != nil { + var apiErr *cloudscale.ErrorResponse + if errors.As(err, &apiErr) && apiErr.StatusCode == http.StatusNotFound { + continue + } s.T().Logf("deleting %s failed: %s", url, err) - errors++ + errCount++ } } } s.resources = nil + // 3. Delete namespace err := s.k8s.CoreV1().Namespaces().Delete( - context.Background(), + ctx, s.ns, metav1.DeleteOptions{}, ) if err != nil { s.T().Logf("could not delete namespace %s: %s", s.ns, err) - errors++ + errCount++ } - // Wait up to five minutes for the namespace to be deleted - timeout := 5 * time.Minute - - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - + // Wait for the namespace to be deleted err = wait.PollUntilContextCancel(ctx, 1*time.Second, true, func(ctx context.Context) (bool, error) { _, err := s.k8s.CoreV1().Namespaces().Get( @@ -208,11 +261,11 @@ func (s *IntegrationTestSuite) TearDownTest() { if err != nil { s.T().Logf("took too long to delete namespace %s: %s", s.ns, err) - errors++ + errCount++ } - if errors > 0 { - panic(fmt.Sprintf("failed cleanup test: %d errors", errors)) + if errCount > 0 { + panic(fmt.Sprintf("failed cleanup test: %d errors", errCount)) } s.ns = "" From 4c03acd1b19178b9b1e5fb5559e6ac90b68fa9ea Mon Sep 17 00:00:00 2001 From: Michael Weibel Date: Fri, 8 May 2026 15:38:17 +0200 Subject: [PATCH 3/3] fix: context dir for http-echo container build --- .github/workflows/http-echo-image.yml | 3 +-- cmd/http-echo/Dockerfile | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/http-echo-image.yml b/.github/workflows/http-echo-image.yml index 7547d7f..2d026b1 100644 --- a/.github/workflows/http-echo-image.yml +++ b/.github/workflows/http-echo-image.yml @@ -35,8 +35,7 @@ jobs: - name: Build and push uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5 with: - context: . - file: cmd/http-echo/Dockerfile + context: ./cmd/http-echo push: true tags: | ghcr.io/cloudscale-ch/cloudscale-cloud-controller-manager/http-echo:latest diff --git a/cmd/http-echo/Dockerfile b/cmd/http-echo/Dockerfile index 9fe39d4..b713717 100644 --- a/cmd/http-echo/Dockerfile +++ b/cmd/http-echo/Dockerfile @@ -19,6 +19,8 @@ RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \ # Production stage - distroless static FROM gcr.io/distroless/static-debian11 +LABEL org.opencontainers.image.description="Internal test helper for CCM integration tests. Not for general use." + COPY --from=build /http-echo /http-echo ENTRYPOINT ["/http-echo"]