From 91e51819ff4e645cfa7a7a0b8f271f4e6f9233e3 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 12:16:57 -0600 Subject: [PATCH 01/38] tear-down PR dev cluster and children --- .github/workflows/PR-close.yaml | 108 ++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 .github/workflows/PR-close.yaml diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml new file mode 100644 index 000000000..a748329a9 --- /dev/null +++ b/.github/workflows/PR-close.yaml @@ -0,0 +1,108 @@ +name: PR Workflow + +on: + pull_request: + types: + - closed + +defaults: + run: + working-directory: go/src/github.com/stackrox/infra + +concurrency: pr-${{ github.ref }} + +env: + CLUSTER_NAME: infra-pr-${{ github.event.pull_request.number }} + GH_TOKEN: ${{ secrets.RHACS_BOT_GITHUB_TOKEN }} + +jobs: + create-dev-cluster: + runs-on: ubuntu-latest + steps: + - uses: stackrox/actions/infra/create-cluster@v1 + with: + flavor: gke-default + name: infra-pr-${{ github.event.pull_request.number }} + args: machine-type=e2-medium,nodes=3,gcp-image-type=ubuntu_containerd + lifespan: ${{ github.actor == 'dependabot[bot]' && '1h' || '24h' }} + wait: true + token: ${{ secrets.INFRA_TOKEN }} + + deploy-and-test: + runs-on: ubuntu-latest + container: + image: quay.io/stackrox-io/apollo-ci:stackrox-test-0.4.4 + env: + KUBECONFIG: /github/home/artifacts/kubeconfig + INFRA_TOKEN: ${{ secrets.INFRA_TOKEN }} + INFRACTL: bin/infractl -k -e localhost:8443 + USE_GKE_GCLOUD_AUTH_PLUGIN: "True" + + steps: + - uses: actions/setup-go@v5 + with: + go-version: "1.23" + + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.pull_request.head.sha }} + path: go/src/github.com/stackrox/infra + + - name: Authenticate to GCloud + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.INFRA_CI_AUTOMATION_GCP_SA }} + + - name: Set up Cloud SDK + uses: "google-github-actions/setup-gcloud@v2" + with: + install_components: "gke-gcloud-auth-plugin" + + - name: Download production infractl + uses: stackrox/actions/infra/install-infractl@v1 + + - name: Download artifacts + run: | + /github/home/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /github/home/artifacts >> "$GITHUB_STEP_SUMMARY" + kubectl get nodes -o wide || true + + - name: Download branch infractl + run: | + kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & + sleep 10 + + kubectl -n infra logs -l app=infra-server --tail=-1 + + make pull-infractl-from-dev-server + + kill %1 + + - name: Destroy created clusters still running + run: | + kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & + sleep 10 + + $INFRACTL version + + echo 'For 30 minutes, list and delete child clusters that are not failed.' + for I in {1..30}; do + if [[ $($INFRACTL list --all --expired --status='READY,CREATING,DESTROYING' | tee >(cat >&2) | wc -l) -gt 0 ]]; then + echo "Active clusters found. Deleting ..." + else + echo "No active clusters found." + break + fi + $INFRACTL list --all --expired --status='READY,FAILED,CREATING' | grep '^[^ ]' \ + | xargs -I{} $INFRACTL delete {} + echo "(sleep 1 minute then check again)" + sleep 60 + done + + kill %1 + + - name: Destroy PR dev cluster + run: | + /github/home/.local/bin/infractl delete "$CLUSTER_NAME" + From 199d647fab22709f0a3f9e8bca21803990ead74d Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 12:26:11 -0600 Subject: [PATCH 02/38] lint --- .github/workflows/PR-close.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index a748329a9..bd0172c8b 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -89,14 +89,14 @@ jobs: echo 'For 30 minutes, list and delete child clusters that are not failed.' for I in {1..30}; do if [[ $($INFRACTL list --all --expired --status='READY,CREATING,DESTROYING' | tee >(cat >&2) | wc -l) -gt 0 ]]; then - echo "Active clusters found. Deleting ..." + echo 'Active clusters found. Deleting ...' else - echo "No active clusters found." + echo 'No active clusters found.' break fi $INFRACTL list --all --expired --status='READY,FAILED,CREATING' | grep '^[^ ]' \ | xargs -I{} $INFRACTL delete {} - echo "(sleep 1 minute then check again)" + echo "(sleep 1 minute then check again. try $I/30)" sleep 60 done From 796e181f1e57e43eaa564f46b10f3b23810edc2f Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 13:04:24 -0600 Subject: [PATCH 03/38] comment after deployed --- .github/workflows/PR.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index b8a08cffa..b6f33f063 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -143,6 +143,12 @@ jobs: kill %1 + - name: Comment on PR + run: | + set -x + gh pr comment "${{ github.event.issue.number }}" \ + --body "Deployment to development cluster completed." + - name: Install Argo CLI run: | ARGO_VERSION=$(grep "github.com/argoproj/argo-workflows/v3" go.mod | awk '{ print $2 }') From a9f9d22c2e56d073da782f01fd0e2b96c381791e Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 13:23:38 -0600 Subject: [PATCH 04/38] ignore shellcheck quote --- .github/workflows/PR-close.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index bd0172c8b..fc0854e5f 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -81,6 +81,7 @@ jobs: - name: Destroy created clusters still running run: | + #!/usr/bin/env bash # shellcheck disable=SC2086 kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 10 From de657bda678d38d83513f2d7f5b2e2a95f43602c Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 13:24:52 -0600 Subject: [PATCH 05/38] directly on runner --- .github/workflows/PR-close.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index fc0854e5f..15e214a30 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -30,8 +30,6 @@ jobs: deploy-and-test: runs-on: ubuntu-latest - container: - image: quay.io/stackrox-io/apollo-ci:stackrox-test-0.4.4 env: KUBECONFIG: /github/home/artifacts/kubeconfig INFRA_TOKEN: ${{ secrets.INFRA_TOKEN }} From ea89896ec2d2fc929e9697d6e26006cd20a2a336 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 13:26:21 -0600 Subject: [PATCH 06/38] pr create directly on runner also --- .github/workflows/PR.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index b6f33f063..864157f80 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -44,9 +44,6 @@ jobs: - build-and-push - create-dev-cluster runs-on: ubuntu-latest - container: - image: quay.io/stackrox-io/apollo-ci:stackrox-test-0.4.4 - steps: - name: Checkout uses: actions/checkout@v4 From 1d8455e3e86402554c6a573c3e5358364995fe45 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 13:31:58 -0600 Subject: [PATCH 07/38] lint on PR also --- .github/workflows/PR.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index 864157f80..3559d8745 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -164,6 +164,8 @@ jobs: env: INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} run: | + #!/usr/bin/env bash # shellcheck disable=SC2086 + kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 5 From 12255f47d2b414aca7f080855e6860738acb8e4d Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 13:43:27 -0600 Subject: [PATCH 08/38] try line after shebang --- .github/workflows/PR-close.yaml | 4 +++- .github/workflows/PR.yaml | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 15e214a30..b6342d737 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -79,7 +79,9 @@ jobs: - name: Destroy created clusters still running run: | - #!/usr/bin/env bash # shellcheck disable=SC2086 + #!/usr/bin/env bash + # shellcheck disable=SC2086 + kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 10 diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index 3559d8745..e147d1794 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -164,7 +164,8 @@ jobs: env: INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} run: | - #!/usr/bin/env bash # shellcheck disable=SC2086 + #!/usr/bin/env bash + # shellcheck disable=SC2086 kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 5 From d548c7cece1cd102622afa8c0e1cd4780d7e41d9 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 13:56:48 -0600 Subject: [PATCH 09/38] gh use if hub-comment fails --- scripts/add-PR-comment-for-deploy-to-dev.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/add-PR-comment-for-deploy-to-dev.sh b/scripts/add-PR-comment-for-deploy-to-dev.sh index 9a3ebd926..b31f9dde7 100755 --- a/scripts/add-PR-comment-for-deploy-to-dev.sh +++ b/scripts/add-PR-comment-for-deploy-to-dev.sh @@ -68,7 +68,8 @@ kubectl -n infra logs -l app=infra-server --tail=1 -f EOT - hub-comment -type deploy -template-file "$tmpfile" + hub-comment -type deploy -template-file "$tmpfile" \ + || gh pr comment "${url}" --edit-last --create-if-none --body-file "$tmpfile" } add_PR_comment_for_deploy_to_dev "$@" From 2ee6702aa562cb5f2a739cdfefc8900cf89179df Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 14:12:06 -0600 Subject: [PATCH 10/38] remove container --- .github/workflows/PR-close.yaml | 2 +- .github/workflows/PR.yaml | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index b6342d737..f1c4346d0 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -79,8 +79,8 @@ jobs: - name: Destroy created clusters still running run: | - #!/usr/bin/env bash # shellcheck disable=SC2086 + true kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 10 diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index e147d1794..b4c272ccf 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -63,8 +63,6 @@ jobs: - build-and-push - create-dev-cluster runs-on: ubuntu-latest - container: - image: quay.io/stackrox-io/apollo-ci:stackrox-test-0.4.4 env: KUBECONFIG: /github/home/artifacts/kubeconfig INFRA_TOKEN: ${{ secrets.INFRA_TOKEN }} @@ -164,7 +162,6 @@ jobs: env: INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} run: | - #!/usr/bin/env bash # shellcheck disable=SC2086 kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & From ba846190f9af9c22cb7ec1acaad0f9a4095cbda6 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 14:21:54 -0600 Subject: [PATCH 11/38] only specific place --- .github/workflows/PR-close.yaml | 4 +--- .github/workflows/PR.yaml | 2 -- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index f1c4346d0..d61566ec1 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -79,9 +79,6 @@ jobs: - name: Destroy created clusters still running run: | - # shellcheck disable=SC2086 - true - kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 10 @@ -95,6 +92,7 @@ jobs: echo 'No active clusters found.' break fi + # shellcheck disable=SC2086 $INFRACTL list --all --expired --status='READY,FAILED,CREATING' | grep '^[^ ]' \ | xargs -I{} $INFRACTL delete {} echo "(sleep 1 minute then check again. try $I/30)" diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index b4c272ccf..65e131e9d 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -162,8 +162,6 @@ jobs: env: INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} run: | - # shellcheck disable=SC2086 - kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 5 From 675bb1067211533b6bda475086b8d28f493b2140 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 14:24:03 -0600 Subject: [PATCH 12/38] use HOME --- .github/workflows/PR-close.yaml | 2 +- .github/workflows/PR.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index d61566ec1..0609cbe9f 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -63,7 +63,7 @@ jobs: - name: Download artifacts run: | - /github/home/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /github/home/artifacts >> "$GITHUB_STEP_SUMMARY" + ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /github/home/artifacts >> "$GITHUB_STEP_SUMMARY" kubectl get nodes -o wide || true - name: Download branch infractl diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index 65e131e9d..cbad3a06c 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -96,7 +96,7 @@ jobs: - name: Download artifacts run: | - /github/home/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /github/home/artifacts >> "$GITHUB_STEP_SUMMARY" + ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /github/home/artifacts >> "$GITHUB_STEP_SUMMARY" kubectl get nodes -o wide || true - name: Deploy infra to dev cluster From 36a1efde4eaf167cb5fe050d9eb45b0fcdca78e9 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 14:32:08 -0600 Subject: [PATCH 13/38] retry pull from dev server --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7892ecbd1..1f0a15b95 100644 --- a/Makefile +++ b/Makefile @@ -194,7 +194,7 @@ pull-infractl-from-dev-server: @mkdir -p bin @rm -f bin/infractl set -o pipefail; \ - curl --retry 3 --insecure --silent --show-error --fail --location https://localhost:8443/v1/cli/$(shell go env GOOS)/$(shell go env GOARCH)/upgrade \ + curl --retry 3 --retry-all-errors --retry-delay 5 --insecure --silent --show-error --fail --location https://localhost:8443/v1/cli/$(shell go env GOOS)/$(shell go env GOARCH)/upgrade \ | jq -r ".result.fileChunk" \ | base64 -d \ > bin/infractl From 475545dfa182ec6379363c22bb5bb82a6588935f Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 14:37:01 -0600 Subject: [PATCH 14/38] unnecessary use of templating --- scripts/add-PR-comment-for-deploy-to-dev.sh | 87 ++++++++++----------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/scripts/add-PR-comment-for-deploy-to-dev.sh b/scripts/add-PR-comment-for-deploy-to-dev.sh index b31f9dde7..5bb083a8b 100755 --- a/scripts/add-PR-comment-for-deploy-to-dev.sh +++ b/scripts/add-PR-comment-for-deploy-to-dev.sh @@ -22,50 +22,49 @@ add_PR_comment_for_deploy_to_dev() { local tmpfile tmpfile=$(mktemp) cat > "$tmpfile" <<- EOT -A single node development cluster ({{.Env.DEV_CLUSTER_NAME}}) was allocated in production infra for this PR. - -CI will attempt to deploy \`{{.Env.IMAGE_NAME}}\` to it. - -:electric_plug: You can **connect** to this cluster with: -\`\`\` -gcloud container clusters get-credentials {{.Env.DEV_CLUSTER_NAME}} --zone us-central1-a --project acs-team-temp-dev -\`\`\` - -:hammer_and_wrench: And pull **infractl** from the deployed dev infra-server with: -\`\`\` -nohup kubectl -n infra port-forward svc/infra-server-service 8443:8443 & -make pull-infractl-from-dev-server -\`\`\` - -:bike: You can then **use** the dev infra instance e.g.: -\`\`\` -bin/infractl -k -e localhost:8443 whoami -\`\`\` - -:warning: ***Any clusters that you start using your dev infra instance should have a lifespan shorter then the development cluster instance. Otherwise they will not be destroyed when the dev infra instance ceases to exist when the development cluster is deleted.*** :warning: - -### Further Development - -:coffee: If you make changes, you can commit and push and CI will take care of updating the development cluster. - -:rocket: If you only modify configuration (chart/infra-server/configuration) or templates (chart/infra-server/{static,templates}), you can get a faster update with: - -\`\`\` -make helm-deploy -\`\`\` - -### Logs - -Logs for the development infra depending on your @redhat.com authuser: -- [authuser=0](https://console.cloud.google.com/logs/query;query=resource.labels.cluster_name%3D%22{{.Env.DEV_CLUSTER_NAME}}%22%0Aresource.labels.container_name%3D%22infra-server%22?project=acs-team-temp-dev&authuser=0) -- [authuser=1](https://console.cloud.google.com/logs/query;query=resource.labels.cluster_name%3D%22{{.Env.DEV_CLUSTER_NAME}}%22%0Aresource.labels.container_name%3D%22infra-server%22?project=acs-team-temp-dev&authuser=1) -- [authuser=2](https://console.cloud.google.com/logs/query;query=resource.labels.cluster_name%3D%22{{.Env.DEV_CLUSTER_NAME}}%22%0Aresource.labels.container_name%3D%22infra-server%22?project=acs-team-temp-dev&authuser=2) - -Or: -\`\`\` -kubectl -n infra logs -l app=infra-server --tail=1 -f -\`\`\` - + A single node development cluster (${DEV_CLUSTER_NAME}) was allocated in production infra for this PR. + + CI will attempt to deploy \`${IMAGE_NAME}\` to it. + + :electric_plug: You can **connect** to this cluster with: + \`\`\` + gcloud container clusters get-credentials ${DEV_CLUSTER_NAME} --zone us-central1-a --project acs-team-temp-dev + \`\`\` + + :hammer_and_wrench: And pull **infractl** from the deployed dev infra-server with: + \`\`\` + nohup kubectl -n infra port-forward svc/infra-server-service 8443:8443 & + make pull-infractl-from-dev-server + \`\`\` + + :bike: You can then **use** the dev infra instance e.g.: + \`\`\` + bin/infractl -k -e localhost:8443 whoami + \`\`\` + + :warning: ***Any clusters that you start using your dev infra instance should have a lifespan shorter then the development cluster instance. Otherwise they will not be destroyed when the dev infra instance ceases to exist when the development cluster is deleted.*** :warning: + + ### Further Development + + :coffee: If you make changes, you can commit and push and CI will take care of updating the development cluster. + + :rocket: If you only modify configuration (chart/infra-server/configuration) or templates (chart/infra-server/{static,templates}), you can get a faster update with: + + \`\`\` + make helm-deploy + \`\`\` + + ### Logs + + Logs for the development infra depending on your @redhat.com authuser: + - [authuser=0](https://console.cloud.google.com/logs/query;query=resource.labels.cluster_name%3D%22${DEV_CLUSTER_NAME}%22%0Aresource.labels.container_name%3D%22infra-server%22?project=acs-team-temp-dev&authuser=0) + - [authuser=1](https://console.cloud.google.com/logs/query;query=resource.labels.cluster_name%3D%22${DEV_CLUSTER_NAME}%22%0Aresource.labels.container_name%3D%22infra-server%22?project=acs-team-temp-dev&authuser=1) + - [authuser=2](https://console.cloud.google.com/logs/query;query=resource.labels.cluster_name%3D%22${DEV_CLUSTER_NAME}%22%0Aresource.labels.container_name%3D%22infra-server%22?project=acs-team-temp-dev&authuser=2) + + Or: + \`\`\` + kubectl -n infra logs -l app=infra-server --tail=1 -f + \`\`\` EOT hub-comment -type deploy -template-file "$tmpfile" \ From f35293e6238b719cce717a8d30f168664f821a11 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 14:39:47 -0600 Subject: [PATCH 15/38] use tmpdir --- .github/workflows/PR-close.yaml | 6 +++--- .github/workflows/PR.yaml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 0609cbe9f..3b54918af 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -31,7 +31,7 @@ jobs: deploy-and-test: runs-on: ubuntu-latest env: - KUBECONFIG: /github/home/artifacts/kubeconfig + KUBECONFIG: /tmp/kubeconfig INFRA_TOKEN: ${{ secrets.INFRA_TOKEN }} INFRACTL: bin/infractl -k -e localhost:8443 USE_GKE_GCLOUD_AUTH_PLUGIN: "True" @@ -63,7 +63,7 @@ jobs: - name: Download artifacts run: | - ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /github/home/artifacts >> "$GITHUB_STEP_SUMMARY" + ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /tmp/ >> "$GITHUB_STEP_SUMMARY" kubectl get nodes -o wide || true - name: Download branch infractl @@ -103,5 +103,5 @@ jobs: - name: Destroy PR dev cluster run: | - /github/home/.local/bin/infractl delete "$CLUSTER_NAME" + ~/.local/bin/infractl delete "$CLUSTER_NAME" diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index cbad3a06c..2421cb4b8 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -64,7 +64,7 @@ jobs: - create-dev-cluster runs-on: ubuntu-latest env: - KUBECONFIG: /github/home/artifacts/kubeconfig + KUBECONFIG: /tmp/kubeconfig INFRA_TOKEN: ${{ secrets.INFRA_TOKEN }} INFRACTL: bin/infractl -k -e localhost:8443 USE_GKE_GCLOUD_AUTH_PLUGIN: "True" @@ -96,7 +96,7 @@ jobs: - name: Download artifacts run: | - ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /github/home/artifacts >> "$GITHUB_STEP_SUMMARY" + ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /tmp/ >> "$GITHUB_STEP_SUMMARY" kubectl get nodes -o wide || true - name: Deploy infra to dev cluster From d8e5869d5870c3c9121bfe2c54b44fedff7204e8 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 15:01:53 -0600 Subject: [PATCH 16/38] log github event object --- .github/workflows/PR.yaml | 3 ++- scripts/add-PR-comment-for-deploy-to-dev.sh | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index 2421cb4b8..00ab158bf 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -142,7 +142,8 @@ jobs: run: | set -x gh pr comment "${{ github.event.issue.number }}" \ - --body "Deployment to development cluster completed." + --body "Deployment to development cluster completed." || true + cat <<<"${{ github.event }}" - name: Install Argo CLI run: | diff --git a/scripts/add-PR-comment-for-deploy-to-dev.sh b/scripts/add-PR-comment-for-deploy-to-dev.sh index 5bb083a8b..4b14bf503 100755 --- a/scripts/add-PR-comment-for-deploy-to-dev.sh +++ b/scripts/add-PR-comment-for-deploy-to-dev.sh @@ -21,7 +21,7 @@ add_PR_comment_for_deploy_to_dev() { local tmpfile tmpfile=$(mktemp) - cat > "$tmpfile" <<- EOT + cat > "$tmpfile" <<-EOT A single node development cluster (${DEV_CLUSTER_NAME}) was allocated in production infra for this PR. CI will attempt to deploy \`${IMAGE_NAME}\` to it. From bb639d318599ab2d4f84de8a2596dba454475a62 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 15:16:05 -0600 Subject: [PATCH 17/38] dump event obj as json --- .github/workflows/PR.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index 00ab158bf..5e41cb39b 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -143,7 +143,7 @@ jobs: set -x gh pr comment "${{ github.event.issue.number }}" \ --body "Deployment to development cluster completed." || true - cat <<<"${{ github.event }}" + cat <<<"${{ toJSON(github.event) }}" - name: Install Argo CLI run: | From 89955d8523b8566feeb09a7420539dc1e4821245 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 15:49:05 -0600 Subject: [PATCH 18/38] just use the html_url --- .github/workflows/PR.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index 5e41cb39b..67ba6d1ec 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -141,9 +141,9 @@ jobs: - name: Comment on PR run: | set -x - gh pr comment "${{ github.event.issue.number }}" \ + gh pr comment "${{ github.event.pull_request.html_url }}" \ --body "Deployment to development cluster completed." || true - cat <<<"${{ toJSON(github.event) }}" + echo "${{ github.event.number }}" - name: Install Argo CLI run: | From a14ef58ec024eee93b6552bd12e0f7bfd309d51f Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 15:52:01 -0600 Subject: [PATCH 19/38] restore no leading space --- scripts/add-PR-comment-for-deploy-to-dev.sh | 88 ++++++++++----------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/scripts/add-PR-comment-for-deploy-to-dev.sh b/scripts/add-PR-comment-for-deploy-to-dev.sh index 4b14bf503..da452f40c 100755 --- a/scripts/add-PR-comment-for-deploy-to-dev.sh +++ b/scripts/add-PR-comment-for-deploy-to-dev.sh @@ -21,50 +21,50 @@ add_PR_comment_for_deploy_to_dev() { local tmpfile tmpfile=$(mktemp) - cat > "$tmpfile" <<-EOT - A single node development cluster (${DEV_CLUSTER_NAME}) was allocated in production infra for this PR. - - CI will attempt to deploy \`${IMAGE_NAME}\` to it. - - :electric_plug: You can **connect** to this cluster with: - \`\`\` - gcloud container clusters get-credentials ${DEV_CLUSTER_NAME} --zone us-central1-a --project acs-team-temp-dev - \`\`\` - - :hammer_and_wrench: And pull **infractl** from the deployed dev infra-server with: - \`\`\` - nohup kubectl -n infra port-forward svc/infra-server-service 8443:8443 & - make pull-infractl-from-dev-server - \`\`\` - - :bike: You can then **use** the dev infra instance e.g.: - \`\`\` - bin/infractl -k -e localhost:8443 whoami - \`\`\` - - :warning: ***Any clusters that you start using your dev infra instance should have a lifespan shorter then the development cluster instance. Otherwise they will not be destroyed when the dev infra instance ceases to exist when the development cluster is deleted.*** :warning: - - ### Further Development - - :coffee: If you make changes, you can commit and push and CI will take care of updating the development cluster. - - :rocket: If you only modify configuration (chart/infra-server/configuration) or templates (chart/infra-server/{static,templates}), you can get a faster update with: - - \`\`\` - make helm-deploy - \`\`\` - - ### Logs - - Logs for the development infra depending on your @redhat.com authuser: - - [authuser=0](https://console.cloud.google.com/logs/query;query=resource.labels.cluster_name%3D%22${DEV_CLUSTER_NAME}%22%0Aresource.labels.container_name%3D%22infra-server%22?project=acs-team-temp-dev&authuser=0) - - [authuser=1](https://console.cloud.google.com/logs/query;query=resource.labels.cluster_name%3D%22${DEV_CLUSTER_NAME}%22%0Aresource.labels.container_name%3D%22infra-server%22?project=acs-team-temp-dev&authuser=1) - - [authuser=2](https://console.cloud.google.com/logs/query;query=resource.labels.cluster_name%3D%22${DEV_CLUSTER_NAME}%22%0Aresource.labels.container_name%3D%22infra-server%22?project=acs-team-temp-dev&authuser=2) - - Or: - \`\`\` - kubectl -n infra logs -l app=infra-server --tail=1 -f - \`\`\` + cat > "$tmpfile" < Date: Mon, 24 Mar 2025 16:04:45 -0600 Subject: [PATCH 20/38] remove bats tests to be removed in https://github.com/stackrox/infra/pull/1524 --- .github/workflows/PR.yaml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index 67ba6d1ec..7ae8e0e56 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -159,22 +159,6 @@ jobs: run: | make argo-workflow-lint - - name: Run BATS e2e tests - env: - INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} - run: | - kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & - sleep 5 - - $INFRACTL whoami || true - $INFRACTL version || true - - make bats-e2e-tests || touch FAIL - kubectl -n infra logs -l app=infra-server --tail=-1 - kill %1 - - [[ ! -f FAIL ]] - - name: Run Go e2e tests env: INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} From c2b543a68d2b08d083d8c775a9896a1941db3511 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 18:17:44 -0600 Subject: [PATCH 21/38] set gopath --- .github/workflows/PR.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index 7ae8e0e56..a7d91cbbb 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -165,4 +165,5 @@ jobs: run: | kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 5 + export GOPATH=$(go env GOPATH) make go-e2e-tests From a032797b499ecf4c4ca9238aa407fce45e8460a4 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 21:20:31 -0600 Subject: [PATCH 22/38] lint --- .github/workflows/PR.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index a7d91cbbb..9e2a60728 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -165,5 +165,6 @@ jobs: run: | kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 5 - export GOPATH=$(go env GOPATH) + GOPATH=$(go env GOPATH) + export GOPATH make go-e2e-tests From 327c1af0f6378a0e26c45456da84aca0555cabc3 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 22:16:21 -0600 Subject: [PATCH 23/38] closed as sync workflow --- .github/workflows/PR-close.yaml | 29 ++--------------------------- .github/workflows/PR.yaml | 1 - 2 files changed, 2 insertions(+), 28 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 3b54918af..335cdaedf 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -3,6 +3,7 @@ name: PR Workflow on: pull_request: types: + - synchronize - closed defaults: @@ -16,19 +17,7 @@ env: GH_TOKEN: ${{ secrets.RHACS_BOT_GITHUB_TOKEN }} jobs: - create-dev-cluster: - runs-on: ubuntu-latest - steps: - - uses: stackrox/actions/infra/create-cluster@v1 - with: - flavor: gke-default - name: infra-pr-${{ github.event.pull_request.number }} - args: machine-type=e2-medium,nodes=3,gcp-image-type=ubuntu_containerd - lifespan: ${{ github.actor == 'dependabot[bot]' && '1h' || '24h' }} - wait: true - token: ${{ secrets.INFRA_TOKEN }} - - deploy-and-test: + destroy: runs-on: ubuntu-latest env: KUBECONFIG: /tmp/kubeconfig @@ -37,10 +26,6 @@ jobs: USE_GKE_GCLOUD_AUTH_PLUGIN: "True" steps: - - uses: actions/setup-go@v5 - with: - go-version: "1.23" - - name: Checkout uses: actions/checkout@v4 with: @@ -48,16 +33,6 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} path: go/src/github.com/stackrox/infra - - name: Authenticate to GCloud - uses: google-github-actions/auth@v2 - with: - credentials_json: ${{ secrets.INFRA_CI_AUTOMATION_GCP_SA }} - - - name: Set up Cloud SDK - uses: "google-github-actions/setup-gcloud@v2" - with: - install_components: "gke-gcloud-auth-plugin" - - name: Download production infractl uses: stackrox/actions/infra/install-infractl@v1 diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index 9e2a60728..4b40d5773 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -4,7 +4,6 @@ on: pull_request: types: - opened - - synchronize defaults: run: From cbfeb50f8603b747ffbb2b47e859670cb567d5df Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 22:18:05 -0600 Subject: [PATCH 24/38] needs gcloud --- .github/workflows/PR-close.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 335cdaedf..8fdbbbae0 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -33,6 +33,16 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} path: go/src/github.com/stackrox/infra + - name: Authenticate to GCloud + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.INFRA_CI_AUTOMATION_GCP_SA }} + + - name: Set up Cloud SDK + uses: "google-github-actions/setup-gcloud@v2" + with: + install_components: "gke-gcloud-auth-plugin" + - name: Download production infractl uses: stackrox/actions/infra/install-infractl@v1 From 909ecc019c12189b6ba897688a0209ff7304b6ad Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 23:13:45 -0600 Subject: [PATCH 25/38] try some creates --- .github/workflows/PR-close.yaml | 39 ++++++++++++++++++++++++++++++++- .github/workflows/PR.yaml | 20 +++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 8fdbbbae0..558ef4fed 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -64,6 +64,9 @@ jobs: - name: Destroy created clusters still running run: | + set -x + set +e + sleep 3600 kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 10 @@ -71,6 +74,7 @@ jobs: echo 'For 30 minutes, list and delete child clusters that are not failed.' for I in {1..30}; do + $INFRACTL list --all --expired --quiet if [[ $($INFRACTL list --all --expired --status='READY,CREATING,DESTROYING' | tee >(cat >&2) | wc -l) -gt 0 ]]; then echo 'Active clusters found. Deleting ...' else @@ -83,10 +87,43 @@ jobs: echo "(sleep 1 minute then check again. try $I/30)" sleep 60 done + for I in {1..10}; do + $INFRACTL list --all --expired --quiet + count=$($INFRACTL list --all --expired --status='READY,CREATING,DESTROYING' | tee >(cat >&2) | wc -l) + if [[ $count -gt 0 ]]; then + echo 'Active clusters found. Deleting ...' + else + echo 'No active clusters found.' + break + fi + # shellcheck disable=SC2086 + $INFRACTL list --all --expired --status='READY,FAILED,CREATING' | grep '^[^ ]' \ + | xargs -I{} $INFRACTL delete {} + echo "(sleep 1 minute then check again. try $I/30)" + sleep 60 + done + for I in {1..10}; do + $INFRACTL list --all --expired + count=0 + for cluster in $($INFRACTL list --all --quiet --expired --status='READY,CREATING,DESTROYING'); do + echo "$(( ++ count )): ${cluster}" + done + if [[ $count -gt 0 ]]; then + echo 'Active clusters found. Deleting ...' + else + echo 'No active clusters found.' + break + fi + # shellcheck disable=SC2086 + $INFRACTL list --all --expired --status='READY,FAILED,CREATING' | grep '^[^ ]' \ + | xargs -I{} $INFRACTL delete {} + echo "(sleep 1 minute then check again. try $I/30)" + sleep 60 + done kill %1 - name: Destroy PR dev cluster run: | - ~/.local/bin/infractl delete "$CLUSTER_NAME" + echo "would now run:" ~/.local/bin/infractl delete "$CLUSTER_NAME" diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index 4b40d5773..4c19d979f 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -4,6 +4,7 @@ on: pull_request: types: - opened + - synchronize defaults: run: @@ -137,6 +138,25 @@ jobs: kill %1 + - name: create test clusters + run: | + set +e + set -x + kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & + sleep 10 + + $INFRACTL create gke-default tstlonglifer --lifespan 8h + $INFRACTL create gke-default tstshortlifer --lifespan 30s + $INFRACTL create gke-default tstexpired --lifespan 20m + $INFRACTL create gke-default tstdestroyed --lifespan 2h + $INFRACTL create rosahcp tstrosahcp --lifespan 45m + + $INFRACTL delete tstdestroyed --json + + $INFRACTL list --all --expired --quiet + + kill %1 + - name: Comment on PR run: | set -x From 53de1c13732deacd3726bd88fa2369d29324053d Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 23:25:18 -0600 Subject: [PATCH 26/38] wait for cluster --- .github/workflows/PR-close.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 558ef4fed..5a9d3a7dd 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -46,6 +46,14 @@ jobs: - name: Download production infractl uses: stackrox/actions/infra/install-infractl@v1 + - name: Wait for cluster + run: | + set +e + for I in {1..60}; do + ~/.local/bin/infractl get "$CLUSTER_NAME" | tee >( cat >&2 ) | grep READY && break + sleep 60 + done + - name: Download artifacts run: | ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /tmp/ >> "$GITHUB_STEP_SUMMARY" @@ -66,7 +74,6 @@ jobs: run: | set -x set +e - sleep 3600 kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 10 From fdf5b2aeb87c1592214321304c304adbca9318be Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Mon, 24 Mar 2025 23:31:41 -0600 Subject: [PATCH 27/38] wait for infra installed also --- .github/workflows/PR-close.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 5a9d3a7dd..84854ca49 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -53,6 +53,10 @@ jobs: ~/.local/bin/infractl get "$CLUSTER_NAME" | tee >( cat >&2 ) | grep READY && break sleep 60 done + for I in {1..60}; do + kubectl -n infra rollout status deploy/infra-server-service --timeout=60s && break + sleep 60 + done - name: Download artifacts run: | From 52f1a9b13c3ff6c81ef91ad958a9573c0559ff9b Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 25 Mar 2025 00:15:22 -0600 Subject: [PATCH 28/38] dev token for pr dev cluster --- .github/workflows/PR-close.yaml | 3 ++- .github/workflows/PR.yaml | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 84854ca49..645b71a65 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -75,6 +75,8 @@ jobs: kill %1 - name: Destroy created clusters still running + env: + INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} run: | set -x set +e @@ -137,4 +139,3 @@ jobs: - name: Destroy PR dev cluster run: | echo "would now run:" ~/.local/bin/infractl delete "$CLUSTER_NAME" - diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index 4c19d979f..b27bfd884 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -139,6 +139,8 @@ jobs: kill %1 - name: create test clusters + env: + INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} run: | set +e set -x @@ -159,10 +161,8 @@ jobs: - name: Comment on PR run: | - set -x - gh pr comment "${{ github.event.pull_request.html_url }}" \ - --body "Deployment to development cluster completed." || true - echo "${{ github.event.number }}" + gh pr comment "${{ github.event.number }}" \ + --body "Deployment to development cluster completed." - name: Install Argo CLI run: | From ec144c76ead38e3dcc70f157a8aa02ef0e42cab1 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 25 Mar 2025 09:37:33 -0600 Subject: [PATCH 29/38] log cluster check --- .github/workflows/PR-close.yaml | 27 +++++++++++++++++++++++++-- .github/workflows/PR.yaml | 21 --------------------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 645b71a65..9c7089f2a 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -49,15 +49,38 @@ jobs: - name: Wait for cluster run: | set +e + set -x + set -o pipefail for I in {1..60}; do - ~/.local/bin/infractl get "$CLUSTER_NAME" | tee >( cat >&2 ) | grep READY && break + ~/.local/bin/infractl get "$CLUSTER_NAME" | tee >( cat >&2 ) | grep READY && break || echo $? sleep 60 done for I in {1..60}; do - kubectl -n infra rollout status deploy/infra-server-service --timeout=60s && break + kubectl -n infra rollout status deploy/infra-server-service --timeout=60s && break || echo $? sleep 60 done + - name: create test clusters + env: + INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} + run: | + set +e + set -x + kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & + sleep 10 + + $INFRACTL create gke-default tstlonglifer --lifespan 8h + $INFRACTL create gke-default tstshortlifer --lifespan 30s + $INFRACTL create gke-default tstexpired --lifespan 20m + $INFRACTL create gke-default tstdestroyed --lifespan 2h + $INFRACTL create rosahcp tstrosahcp --lifespan 45m + + $INFRACTL delete tstdestroyed --json + + $INFRACTL list --all --expired --quiet + + kill %1 + - name: Download artifacts run: | ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /tmp/ >> "$GITHUB_STEP_SUMMARY" diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index b27bfd884..61dc97cbe 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -138,27 +138,6 @@ jobs: kill %1 - - name: create test clusters - env: - INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} - run: | - set +e - set -x - kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & - sleep 10 - - $INFRACTL create gke-default tstlonglifer --lifespan 8h - $INFRACTL create gke-default tstshortlifer --lifespan 30s - $INFRACTL create gke-default tstexpired --lifespan 20m - $INFRACTL create gke-default tstdestroyed --lifespan 2h - $INFRACTL create rosahcp tstrosahcp --lifespan 45m - - $INFRACTL delete tstdestroyed --json - - $INFRACTL list --all --expired --quiet - - kill %1 - - name: Comment on PR run: | gh pr comment "${{ github.event.number }}" \ From 3718739cf2652c7e83b76332317c0689571e3f6e Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 25 Mar 2025 12:10:48 -0600 Subject: [PATCH 30/38] separate concurrency --- .github/workflows/PR-close.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 9c7089f2a..16c0c06f3 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -10,7 +10,7 @@ defaults: run: working-directory: go/src/github.com/stackrox/infra -concurrency: pr-${{ github.ref }} +concurrency: pr-${{ github.ref }}-close env: CLUSTER_NAME: infra-pr-${{ github.event.pull_request.number }} From aab40746330d0b6bd6937f7d5fa31592750f6565 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 25 Mar 2025 12:43:08 -0600 Subject: [PATCH 31/38] get artifacts before trying --- .github/workflows/PR-close.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 16c0c06f3..baa1b4615 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -55,6 +55,8 @@ jobs: ~/.local/bin/infractl get "$CLUSTER_NAME" | tee >( cat >&2 ) | grep READY && break || echo $? sleep 60 done + ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /tmp/ + kubectl get nodes -o wide || true for I in {1..60}; do kubectl -n infra rollout status deploy/infra-server-service --timeout=60s && break || echo $? sleep 60 @@ -81,11 +83,6 @@ jobs: kill %1 - - name: Download artifacts - run: | - ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /tmp/ >> "$GITHUB_STEP_SUMMARY" - kubectl get nodes -o wide || true - - name: Download branch infractl run: | kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & From 9f123186daea5d99f15ab1008357b5d5e61ed03e Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 25 Mar 2025 13:12:32 -0600 Subject: [PATCH 32/38] check deployment --- .github/workflows/PR-close.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index baa1b4615..2f8930b0a 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -57,8 +57,8 @@ jobs: done ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /tmp/ kubectl get nodes -o wide || true - for I in {1..60}; do - kubectl -n infra rollout status deploy/infra-server-service --timeout=60s && break || echo $? + for I in {1..5}; do + kubectl -n infra rollout status deploy/infra-server-deployment --timeout=300s && break || echo $? sleep 60 done @@ -70,6 +70,7 @@ jobs: set -x kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 10 + $INFRACTL list --all --quiet $INFRACTL create gke-default tstlonglifer --lifespan 8h $INFRACTL create gke-default tstshortlifer --lifespan 30s From ccc1e6e1975d3b257a38dd9035711f3b3f4de292 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 25 Mar 2025 13:21:18 -0600 Subject: [PATCH 33/38] try delete again --- .github/workflows/PR-close.yaml | 56 ++++++++------------------------- 1 file changed, 13 insertions(+), 43 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 2f8930b0a..8f4154d95 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -62,6 +62,17 @@ jobs: sleep 60 done + - name: Download branch infractl + run: | + kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & + sleep 10 + + kubectl -n infra logs -l app=infra-server --tail=-1 + + make pull-infractl-from-dev-server + + kill %1 + - name: create test clusters env: INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} @@ -84,23 +95,11 @@ jobs: kill %1 - - name: Download branch infractl - run: | - kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & - sleep 10 - - kubectl -n infra logs -l app=infra-server --tail=-1 - - make pull-infractl-from-dev-server - - kill %1 - - name: Destroy created clusters still running env: INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} run: | set -x - set +e kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 10 @@ -108,35 +107,6 @@ jobs: echo 'For 30 minutes, list and delete child clusters that are not failed.' for I in {1..30}; do - $INFRACTL list --all --expired --quiet - if [[ $($INFRACTL list --all --expired --status='READY,CREATING,DESTROYING' | tee >(cat >&2) | wc -l) -gt 0 ]]; then - echo 'Active clusters found. Deleting ...' - else - echo 'No active clusters found.' - break - fi - # shellcheck disable=SC2086 - $INFRACTL list --all --expired --status='READY,FAILED,CREATING' | grep '^[^ ]' \ - | xargs -I{} $INFRACTL delete {} - echo "(sleep 1 minute then check again. try $I/30)" - sleep 60 - done - for I in {1..10}; do - $INFRACTL list --all --expired --quiet - count=$($INFRACTL list --all --expired --status='READY,CREATING,DESTROYING' | tee >(cat >&2) | wc -l) - if [[ $count -gt 0 ]]; then - echo 'Active clusters found. Deleting ...' - else - echo 'No active clusters found.' - break - fi - # shellcheck disable=SC2086 - $INFRACTL list --all --expired --status='READY,FAILED,CREATING' | grep '^[^ ]' \ - | xargs -I{} $INFRACTL delete {} - echo "(sleep 1 minute then check again. try $I/30)" - sleep 60 - done - for I in {1..10}; do $INFRACTL list --all --expired count=0 for cluster in $($INFRACTL list --all --quiet --expired --status='READY,CREATING,DESTROYING'); do @@ -149,7 +119,7 @@ jobs: break fi # shellcheck disable=SC2086 - $INFRACTL list --all --expired --status='READY,FAILED,CREATING' | grep '^[^ ]' \ + $INFRACTL list --all --expired --quiet --status='READY,FAILED,CREATING' | grep '^[^ ]' \ | xargs -I{} $INFRACTL delete {} echo "(sleep 1 minute then check again. try $I/30)" sleep 60 @@ -159,4 +129,4 @@ jobs: - name: Destroy PR dev cluster run: | - echo "would now run:" ~/.local/bin/infractl delete "$CLUSTER_NAME" + ~/.local/bin/infractl delete "$CLUSTER_NAME" From 947b1f52c7a41d962e0f7279ed08ef121a51467c Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 25 Mar 2025 13:47:44 -0600 Subject: [PATCH 34/38] check exists before deleting --- .github/workflows/PR-close.yaml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 8f4154d95..28d9870ce 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -70,6 +70,7 @@ jobs: kubectl -n infra logs -l app=infra-server --tail=-1 make pull-infractl-from-dev-server + $INFRACTL version kill %1 @@ -103,14 +104,12 @@ jobs: kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & sleep 10 - $INFRACTL version - echo 'For 30 minutes, list and delete child clusters that are not failed.' for I in {1..30}; do $INFRACTL list --all --expired count=0 for cluster in $($INFRACTL list --all --quiet --expired --status='READY,CREATING,DESTROYING'); do - echo "$(( ++ count )): ${cluster}" + echo "$(( ++count )): ${cluster}" done if [[ $count -gt 0 ]]; then echo 'Active clusters found. Deleting ...' @@ -119,8 +118,10 @@ jobs: break fi # shellcheck disable=SC2086 - $INFRACTL list --all --expired --quiet --status='READY,FAILED,CREATING' | grep '^[^ ]' \ - | xargs -I{} $INFRACTL delete {} + for cluster in $($INFRACTL list --all --expired --quiet --status='READY,CREATING'); do + $INFRACTL get "${cluster}" \ + && $INFRACTL delete "${cluster}" + done echo "(sleep 1 minute then check again. try $I/30)" sleep 60 done @@ -129,4 +130,4 @@ jobs: - name: Destroy PR dev cluster run: | - ~/.local/bin/infractl delete "$CLUSTER_NAME" + echo would ~/.local/bin/infractl delete "$CLUSTER_NAME" From 5fe372499bf31c8014d59d8b52c5df50fe35a5cf Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Tue, 25 Mar 2025 15:32:38 -0600 Subject: [PATCH 35/38] delete dev cluster --- .github/workflows/PR-close.yaml | 35 ++++++++------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index 28d9870ce..a119f1fa8 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -48,9 +48,7 @@ jobs: - name: Wait for cluster run: | - set +e - set -x - set -o pipefail + set -xo pipefail for I in {1..60}; do ~/.local/bin/infractl get "$CLUSTER_NAME" | tee >( cat >&2 ) | grep READY && break || echo $? sleep 60 @@ -74,28 +72,6 @@ jobs: kill %1 - - name: create test clusters - env: - INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} - run: | - set +e - set -x - kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & - sleep 10 - $INFRACTL list --all --quiet - - $INFRACTL create gke-default tstlonglifer --lifespan 8h - $INFRACTL create gke-default tstshortlifer --lifespan 30s - $INFRACTL create gke-default tstexpired --lifespan 20m - $INFRACTL create gke-default tstdestroyed --lifespan 2h - $INFRACTL create rosahcp tstrosahcp --lifespan 45m - - $INFRACTL delete tstdestroyed --json - - $INFRACTL list --all --expired --quiet - - kill %1 - - name: Destroy created clusters still running env: INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} @@ -106,7 +82,7 @@ jobs: echo 'For 30 minutes, list and delete child clusters that are not failed.' for I in {1..30}; do - $INFRACTL list --all --expired + $INFRACTL list --all --quiet count=0 for cluster in $($INFRACTL list --all --quiet --expired --status='READY,CREATING,DESTROYING'); do echo "$(( ++count )): ${cluster}" @@ -130,4 +106,9 @@ jobs: - name: Destroy PR dev cluster run: | - echo would ~/.local/bin/infractl delete "$CLUSTER_NAME" + ~/.local/bin/infractl delete "$CLUSTER_NAME" + + - name: Comment on PR + run: | + gh pr comment "${{ github.event.number }}" \ + --body "Development cluster deleted." From 25a425bb24a2d9cc15fb488e24a58cbbdc384546 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Wed, 26 Mar 2025 14:55:16 -0600 Subject: [PATCH 36/38] try delete after tests --- .github/workflows/PR-close.yaml | 14 +++--- .github/workflows/PR.yaml | 77 +++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 6 deletions(-) diff --git a/.github/workflows/PR-close.yaml b/.github/workflows/PR-close.yaml index a119f1fa8..b364fdbcf 100644 --- a/.github/workflows/PR-close.yaml +++ b/.github/workflows/PR-close.yaml @@ -3,14 +3,13 @@ name: PR Workflow on: pull_request: types: - - synchronize - closed defaults: run: working-directory: go/src/github.com/stackrox/infra -concurrency: pr-${{ github.ref }}-close +concurrency: pr-${{ github.ref }} env: CLUSTER_NAME: infra-pr-${{ github.event.pull_request.number }} @@ -84,20 +83,22 @@ jobs: for I in {1..30}; do $INFRACTL list --all --quiet count=0 - for cluster in $($INFRACTL list --all --quiet --expired --status='READY,CREATING,DESTROYING'); do + # Continue if running or destroying + for cluster in $($INFRACTL list --all --quiet --status='READY,CREATING,DESTROYING'); do echo "$(( ++count )): ${cluster}" done if [[ $count -gt 0 ]]; then - echo 'Active clusters found. Deleting ...' + echo 'Active clusters found. Deleting ...' | tee -a "$GITHUB_STEP_SUMMARY" else echo 'No active clusters found.' break fi # shellcheck disable=SC2086 - for cluster in $($INFRACTL list --all --expired --quiet --status='READY,CREATING'); do + for cluster in $($INFRACTL list --all --quiet --status='READY,CREATING'); do $INFRACTL get "${cluster}" \ && $INFRACTL delete "${cluster}" - done + done \ + | tee -a "$GITHUB_STEP_SUMMARY" echo "(sleep 1 minute then check again. try $I/30)" sleep 60 done @@ -107,6 +108,7 @@ jobs: - name: Destroy PR dev cluster run: | ~/.local/bin/infractl delete "$CLUSTER_NAME" + echo "Deleted PR dev cluster ${CLUSTER_NAME}" | tee -a "$GITHUB_STEP_SUMMARY" - name: Comment on PR run: | diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index 61dc97cbe..278dcca11 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -157,6 +157,25 @@ jobs: run: | make argo-workflow-lint + - name: create example clusters to delete + env: + INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} + run: | + set +e + set -x + kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & + sleep 10 + + $INFRACTL create gke-default tstlonglifer --lifespan 8h + $INFRACTL create gke-default tstshortlifer --lifespan 30s + $INFRACTL create gke-default tstdestroyed --lifespan 2h + + $INFRACTL delete tstdestroyed --json + + $INFRACTL list --all --quiet + + kill %1 + - name: Run Go e2e tests env: INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} @@ -166,3 +185,61 @@ jobs: GOPATH=$(go env GOPATH) export GOPATH make go-e2e-tests + + - name: Wait for cluster + run: | + set -xo pipefail + for I in {1..60}; do + ~/.local/bin/infractl get "$CLUSTER_NAME" | tee >( cat >&2 ) | grep READY && break || echo $? + sleep 60 + done + ~/.local/bin/infractl artifacts "$CLUSTER_NAME" -d /tmp/ + kubectl get nodes -o wide || true + for I in {1..5}; do + kubectl -n infra rollout status deploy/infra-server-deployment --timeout=300s && break || echo $? + sleep 60 + done + + - name: Destroy created clusters still running + env: + INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} + run: | + set -x + kubectl -n infra port-forward svc/infra-server-service 8443:8443 > /dev/null 2>&1 & + sleep 10 + + echo 'For 30 minutes, list and delete child clusters that are not failed.' + for I in {1..30}; do + $INFRACTL list --all --quiet + count=0 + # Continue if running or destroying + for cluster in $($INFRACTL list --all --quiet --status='READY,CREATING,DESTROYING'); do + echo "$(( ++count )): ${cluster}" + done + if [[ $count -gt 0 ]]; then + echo 'Active clusters found. Deleting ...' | tee -a "$GITHUB_STEP_SUMMARY" + else + echo 'No active clusters found.' + break + fi + # shellcheck disable=SC2086 + for cluster in $($INFRACTL list --all --quiet --status='READY,CREATING'); do + $INFRACTL get "${cluster}" \ + && $INFRACTL delete "${cluster}" + done \ + | tee -a "$GITHUB_STEP_SUMMARY" + echo "(sleep 1 minute then check again. try $I/30)" + sleep 60 + done + + kill %1 + + - name: Destroy PR dev cluster + run: | + ~/.local/bin/infractl delete "$CLUSTER_NAME" + echo "Deleted PR dev cluster ${CLUSTER_NAME}" | tee -a "$GITHUB_STEP_SUMMARY" + + - name: Comment on PR + run: | + gh pr comment "${{ github.event.number }}" \ + --body "Development cluster deleted." From 8af088be397ccb59822f38ebe901dd9761e91ec3 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Thu, 27 Mar 2025 07:57:11 -0600 Subject: [PATCH 37/38] always run --- .github/workflows/PR.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index 278dcca11..d352b7493 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -187,6 +187,7 @@ jobs: make go-e2e-tests - name: Wait for cluster + if: always() run: | set -xo pipefail for I in {1..60}; do @@ -201,6 +202,7 @@ jobs: done - name: Destroy created clusters still running + if: always() env: INFRA_TOKEN: ${{ secrets.INFRA_TOKEN_DEV }} run: | @@ -235,11 +237,9 @@ jobs: kill %1 - name: Destroy PR dev cluster + if: always() run: | ~/.local/bin/infractl delete "$CLUSTER_NAME" echo "Deleted PR dev cluster ${CLUSTER_NAME}" | tee -a "$GITHUB_STEP_SUMMARY" - - - name: Comment on PR - run: | gh pr comment "${{ github.event.number }}" \ --body "Development cluster deleted." From 322fcf8ba5a40c351d1ced25ac93e9893a6e9600 Mon Sep 17 00:00:00 2001 From: davdhacs <105243888+davdhacs@users.noreply.github.com> Date: Thu, 27 Mar 2025 08:19:26 -0600 Subject: [PATCH 38/38] list all not quiet --- .github/workflows/PR.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/PR.yaml b/.github/workflows/PR.yaml index d352b7493..21ff23a1c 100644 --- a/.github/workflows/PR.yaml +++ b/.github/workflows/PR.yaml @@ -212,7 +212,7 @@ jobs: echo 'For 30 minutes, list and delete child clusters that are not failed.' for I in {1..30}; do - $INFRACTL list --all --quiet + $INFRACTL list --all count=0 # Continue if running or destroying for cluster in $($INFRACTL list --all --quiet --status='READY,CREATING,DESTROYING'); do