diff --git a/.github/workflows/manual_test.yml b/.github/workflows/manual_test.yml index 7479e79..0b14068 100644 --- a/.github/workflows/manual_test.yml +++ b/.github/workflows/manual_test.yml @@ -5,11 +5,11 @@ on: inputs: machine_zone: description: GCE zone - default: "us-east1-c" + default: "us-central1-c" required: true machine_type: description: "GCE machine type: https://cloud.google.com/compute/docs/machine-types" - default: "n1-standard-4" + default: "t2d-standard-1" required: true disk_size: description: VM disk size. @@ -19,11 +19,15 @@ on: required: false shutdown_timeout: description: "Shutdown grace period (in seconds)." + default: 60 + required: true + finish_timeout: + description: "Shutdown grace period (in seconds) when manually stopped." default: 30 required: true no_external_address: description: Disables external IP address for the worker - default: false + default: true required: true actions_preinstalled: description: "Whether the GitHub actions have already been installed at `/actions-runner`." @@ -37,17 +41,20 @@ jobs: label: ${{ steps.create-runner.outputs.label }} steps: - id: create-runner - uses: related-sciences/gce-github-runner@main + uses: iunu/gce-github-runner@iunu with: - token: ${{ secrets.GH_SA_TOKEN }} + token: ${{ secrets.GH_PAT_TOKEN }} project_id: ${{ secrets.GCP_PROJECT_ID }} service_account_key: ${{ secrets.GCP_SA_KEY }} machine_zone: ${{ inputs.machine_zone }} machine_type: ${{ inputs.machine_type }} disk_size: ${{ inputs.disk_size }} runner_service_account: ${{ inputs.runner_service_account }} + network: 'runner-net' + subnet: 'runner-subnet' + preemptible: true image_project: ubuntu-os-cloud - image_family: ubuntu-2004-lts + image_family: ubuntu-2404-lts-amd64 no_external_address: ${{ inputs.no_external_address }} actions_preinstalled: ${{ inputs.actions_preinstalled }} shutdown_timeout: ${{ inputs.shutdown_timeout }} @@ -57,3 +64,8 @@ jobs: runs-on: ${{ needs.create-runner.outputs.label }} steps: - run: echo "This runs on the GCE runner VM" + - uses: iunu/gce-github-runner@iunu + with: + command: stop + finish_timeout: ${{ inputs.finish_timeout }} + if: always() \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bb62ce5..289b071 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -5,6 +5,7 @@ on: push: branches: - main + - iunu jobs: create-runner: @@ -13,20 +14,39 @@ jobs: label: ${{ steps.create-runner.outputs.label }} steps: - id: create-runner - uses: related-sciences/gce-github-runner@main + uses: iunu/gce-github-runner@iunu with: - token: ${{ secrets.GH_SA_TOKEN }} + token: ${{ secrets.GH_PAT_TOKEN }} project_id: ${{ secrets.GCP_PROJECT_ID }} service_account_key: ${{ secrets.GCP_SA_KEY }} + machine_zone: 'us-central1-c' + machine_type: 't2d-standard-1' image_project: ubuntu-os-cloud - image_family: ubuntu-2004-lts - # NOTE: if you do not want external IPs, you must configure your GCE project - # to allow access to the Internet e.g. via Cloud NAT, if you are fine - # with external IPs, no_external_address should be false (default). + image_family: ubuntu-2404-lts-amd64 + network: 'runner-net' + subnet: 'runner-subnet' + preemptible: true + actions_preinstalled: false no_external_address: true + shutdown_timeout: 30 - test: + test1: needs: create-runner runs-on: ${{ needs.create-runner.outputs.label }} steps: - - run: echo "This runs on the GCE runner VM" + - name: Job 1 Test + run: echo "This runs on the GCE runner VM" + + test2: + needs: create-runner + runs-on: ${{ needs.create-runner.outputs.label }} + steps: + - name: Wait for longer than shutdown timeout + run: sleep 60 #Give time for runner to shutdown from shutdown timeout + + - name: Job 2 Test + run: echo "This also runs on the GCE runner VM" + - uses: iunu/gce-github-runner@iunu + with: + command: stop + if: always() diff --git a/README.md b/README.md index e13e0ee..f484b16 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ jobs: project_id: ${{ secrets.GCP_PROJECT_ID }} service_account_key: ${{ secrets.GCP_SA_KEY }} image_project: ubuntu-os-cloud - image_family: ubuntu-2004-lts + image_family: ubuntu-2404-lts-amd64 test: needs: create-runner diff --git a/action.sh b/action.sh index a114b96..ab0b231 100755 --- a/action.sh +++ b/action.sh @@ -32,6 +32,7 @@ image_family= network= scopes= shutdown_timeout= +finish_timeout= subnet= preemptible= ephemeral= @@ -60,6 +61,7 @@ while getopts_long :h opt \ network optional_argument \ scopes required_argument \ shutdown_timeout required_argument \ + finish_timeout required_argument \ subnet optional_argument \ preemptible required_argument \ ephemeral required_argument \ @@ -120,6 +122,9 @@ do shutdown_timeout) shutdown_timeout=$OPTLARG ;; + finish_timeout) + finish_timeout=$OPTLARG + ;; subnet) subnet=${OPTLARG-$subnet} ;; @@ -201,39 +206,46 @@ function start_vm { # Create a systemd service in charge of shutting down the machine once the workflow has finished cat <<-EOF > /etc/systemd/system/shutdown.sh #!/bin/sh - sleep ${shutdown_timeout} + sleep \${1} gcloud compute instances delete $VM_ID --zone=$machine_zone --quiet EOF - cat <<-EOF > /etc/systemd/system/shutdown.service + cat <<-EOF > /etc/systemd/system/shutdown\@.service [Unit] - Description=Shutdown service + Description=Shutdown service in %i Seconds [Service] - ExecStart=/etc/systemd/system/shutdown.sh + ExecStart=/etc/systemd/system/shutdown.sh %i [Install] WantedBy=multi-user.target EOF chmod +x /etc/systemd/system/shutdown.sh systemctl daemon-reload - systemctl enable shutdown.service cat <<-EOF > /usr/bin/gce_runner_shutdown.sh #!/bin/sh echo \"✅ Self deleting $VM_ID in ${machine_zone} in ${shutdown_timeout} seconds ...\" # We tear down the machine by starting the systemd service that was registered by the startup script - systemctl start shutdown.service + systemctl start shutdown@${shutdown_timeout}.service + EOF + + cat <<-EOF > /usr/bin/gce_cancel_shutdown.sh + #!/bin/sh + echo \"✅ Cancelling deletion of $VM_ID in ${machine_zone}!\" + # Stop the shutdown script + systemctl stop shutdown@${shutdown_timeout}.service EOF # See: https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/running-scripts-before-or-after-a-job echo "ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/usr/bin/gce_runner_shutdown.sh" >.env + echo "ACTIONS_RUNNER_HOOK_JOB_STARTED=/usr/bin/gce_cancel_shutdown.sh" >.env gcloud compute instances add-labels ${VM_ID} --zone=${machine_zone} --labels=gh_ready=0 && \\ RUNNER_ALLOW_RUNASROOT=1 ./config.sh --url https://github.com/${GITHUB_REPOSITORY} --token ${RUNNER_TOKEN} --labels ${VM_ID} --unattended ${ephemeral_flag} --disableupdate && \\ ./svc.sh install && \\ ./svc.sh start && \\ gcloud compute instances add-labels ${VM_ID} --zone=${machine_zone} --labels=gh_ready=1 - # 3 days represents the max workflow runtime. This will shutdown the instance if everything else fails. - nohup sh -c \"sleep 3d && gcloud --quiet compute instances delete ${VM_ID} --zone=${machine_zone}\" > /dev/null & + # 3 days represents the max workflow runtime. We're using 1 day as nothing should run that long before an runner is deleted + nohup sh -c \"sleep 1d && gcloud --quiet compute instances delete ${VM_ID} --zone=${machine_zone}\" > /dev/null & " if $actions_preinstalled ; then @@ -335,14 +347,31 @@ function start_vm { fi } +function stop_vm { + # NOTE: this function runs on the GCE VM + echo "Stopping GCE VM ..." + # NOTE: it would be nice to gracefully shut down the runner, but we actually don't need + # to do that. VM shutdown will disconnect the runner, and GH will unregister it + # in 30 days + # TODO: RUNNER_ALLOW_RUNASROOT=1 /actions-runner/config.sh remove --token $TOKEN + NAME=$(curl -S -s -X GET http://metadata.google.internal/computeMetadata/v1/instance/name -H 'Metadata-Flavor: Google') + ZONE=$(curl -S -s -X GET http://metadata.google.internal/computeMetadata/v1/instance/zone -H 'Metadata-Flavor: Google') + echo "✅ Self deleting $NAME in $ZONE in ${1} seconds ..." + # We tear down the machine by starting the systemd service that was registered by the startup script + systemctl start shutdown@${1}.service +} + safety_on case "$command" in start) start_vm ;; + stop) + stop_vm ${finish_timeout} + ;; *) - echo "Invalid command: \`${command}\`, valid values: start" >&2 + echo "Invalid command: \`${command}\`, valid values: start|stop" >&2 usage exit 1 ;; -esac +esac \ No newline at end of file diff --git a/action.yml b/action.yml index b43ca15..4453c28 100644 --- a/action.yml +++ b/action.yml @@ -7,6 +7,10 @@ branding: icon: triangle color: purple inputs: + command: + description: "`start` or `stop` of the runner" + default: start + required: true token: description: >- GitHub auth token, needs `repo`/`public_repo` scope: https://docs.github.com/en/rest/reference/actions#self-hosted-runners. @@ -26,7 +30,7 @@ inputs: required: true machine_zone: description: GCE zone - default: "us-east1-c" + default: "us-central1-c" required: true machine_type: description: GCE machine type; https://cloud.google.com/compute/docs/machine-types @@ -84,6 +88,10 @@ inputs: required: true shutdown_timeout: description: "Shutdown grace period (in seconds)." + default: 60 + required: true + finish_timeout: + description: "Manual 'shutdown' commands grace period (in seconds) if called." default: 30 required: true actions_preinstalled: @@ -115,7 +123,7 @@ runs: - id: gce-github-runner-script run: > ${{ github.action_path }}/action.sh - --command=start + --command=${{ inputs.command }} --token=${{ inputs.token }} --project_id=${{ inputs.project_id }} --service_account_key='${{ inputs.service_account_key }}' @@ -128,6 +136,7 @@ runs: --disk_size=${{ inputs.disk_size }} --scopes=${{ inputs.scopes }} --shutdown_timeout=${{ inputs.shutdown_timeout }} + --finish_timeout=${{ inputs.finish_timeout }} --runner_service_account=${{ inputs.runner_service_account }} --image_project=${{ inputs.image_project }} --image=${{ inputs.image }}