Skip to content
Closed
24 changes: 18 additions & 6 deletions .github/workflows/manual_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ on:
inputs:
machine_zone:
description: GCE zone
default: "us-east1-c"
default: "us-central1-c"
required: true
machine_type:
description: "GCE machine type: https://cloud.google.com/compute/docs/machine-types"
default: "n1-standard-4"
default: "t2d-standard-1"
required: true
disk_size:
description: VM disk size.
Expand All @@ -19,11 +19,15 @@ on:
required: false
shutdown_timeout:
description: "Shutdown grace period (in seconds)."
default: 60
required: true
finish_timeout:
description: "Shutdown grace period (in seconds) when manually stopped."
default: 30
required: true
no_external_address:
description: Disables external IP address for the worker
default: false
default: true
required: true
actions_preinstalled:
description: "Whether the GitHub actions have already been installed at `/actions-runner`."
Expand All @@ -37,17 +41,20 @@ jobs:
label: ${{ steps.create-runner.outputs.label }}
steps:
- id: create-runner
uses: related-sciences/gce-github-runner@main
uses: iunu/gce-github-runner@iunu
with:
token: ${{ secrets.GH_SA_TOKEN }}
token: ${{ secrets.GH_PAT_TOKEN }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
machine_zone: ${{ inputs.machine_zone }}
machine_type: ${{ inputs.machine_type }}
disk_size: ${{ inputs.disk_size }}
runner_service_account: ${{ inputs.runner_service_account }}
network: 'runner-net'
subnet: 'runner-subnet'
preemptible: true
image_project: ubuntu-os-cloud
image_family: ubuntu-2004-lts
image_family: ubuntu-2404-lts-amd64
no_external_address: ${{ inputs.no_external_address }}
actions_preinstalled: ${{ inputs.actions_preinstalled }}
shutdown_timeout: ${{ inputs.shutdown_timeout }}
Expand All @@ -57,3 +64,8 @@ jobs:
runs-on: ${{ needs.create-runner.outputs.label }}
steps:
- run: echo "This runs on the GCE runner VM"
- uses: iunu/gce-github-runner@iunu
with:
command: stop
finish_timeout: ${{ inputs.finish_timeout }}
if: always()
36 changes: 28 additions & 8 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
push:
branches:
- main
- iunu

jobs:
create-runner:
Expand All @@ -13,20 +14,39 @@ jobs:
label: ${{ steps.create-runner.outputs.label }}
steps:
- id: create-runner
uses: related-sciences/gce-github-runner@main
uses: iunu/gce-github-runner@iunu
with:
token: ${{ secrets.GH_SA_TOKEN }}
token: ${{ secrets.GH_PAT_TOKEN }}
project_id: ${{ secrets.GCP_PROJECT_ID }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
machine_zone: 'us-central1-c'
machine_type: 't2d-standard-1'
image_project: ubuntu-os-cloud
image_family: ubuntu-2004-lts
# NOTE: if you do not want external IPs, you must configure your GCE project
# to allow access to the Internet e.g. via Cloud NAT, if you are fine
# with external IPs, no_external_address should be false (default).
image_family: ubuntu-2404-lts-amd64
network: 'runner-net'
subnet: 'runner-subnet'
preemptible: true
actions_preinstalled: false
no_external_address: true
shutdown_timeout: 30

test:
test1:
needs: create-runner
runs-on: ${{ needs.create-runner.outputs.label }}
steps:
- run: echo "This runs on the GCE runner VM"
- name: Job 1 Test
run: echo "This runs on the GCE runner VM"

test2:
needs: create-runner
runs-on: ${{ needs.create-runner.outputs.label }}
steps:
- name: Wait for longer than shutdown timeout
run: sleep 60 #Give time for runner to shutdown from shutdown timeout

- name: Job 2 Test
run: echo "This also runs on the GCE runner VM"
- uses: iunu/gce-github-runner@iunu
with:
command: stop
if: always()
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
project_id: ${{ secrets.GCP_PROJECT_ID }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
image_project: ubuntu-os-cloud
image_family: ubuntu-2004-lts
image_family: ubuntu-2404-lts-amd64

test:
needs: create-runner
Expand Down
49 changes: 39 additions & 10 deletions action.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ image_family=
network=
scopes=
shutdown_timeout=
finish_timeout=
subnet=
preemptible=
ephemeral=
Expand Down Expand Up @@ -60,6 +61,7 @@ while getopts_long :h opt \
network optional_argument \
scopes required_argument \
shutdown_timeout required_argument \
finish_timeout required_argument \
subnet optional_argument \
preemptible required_argument \
ephemeral required_argument \
Expand Down Expand Up @@ -120,6 +122,9 @@ do
shutdown_timeout)
shutdown_timeout=$OPTLARG
;;
finish_timeout)
finish_timeout=$OPTLARG
;;
subnet)
subnet=${OPTLARG-$subnet}
;;
Expand Down Expand Up @@ -201,39 +206,46 @@ function start_vm {
# Create a systemd service in charge of shutting down the machine once the workflow has finished
cat <<-EOF > /etc/systemd/system/shutdown.sh
#!/bin/sh
sleep ${shutdown_timeout}
sleep \${1}
gcloud compute instances delete $VM_ID --zone=$machine_zone --quiet
EOF

cat <<-EOF > /etc/systemd/system/shutdown.service
cat <<-EOF > /etc/systemd/system/shutdown\@.service
[Unit]
Description=Shutdown service
Description=Shutdown service in %i Seconds
[Service]
ExecStart=/etc/systemd/system/shutdown.sh
ExecStart=/etc/systemd/system/shutdown.sh %i
[Install]
WantedBy=multi-user.target
EOF

chmod +x /etc/systemd/system/shutdown.sh
systemctl daemon-reload
systemctl enable shutdown.service

cat <<-EOF > /usr/bin/gce_runner_shutdown.sh
#!/bin/sh
echo \"✅ Self deleting $VM_ID in ${machine_zone} in ${shutdown_timeout} seconds ...\"
# We tear down the machine by starting the systemd service that was registered by the startup script
systemctl start shutdown.service
systemctl start shutdown@${shutdown_timeout}.service
EOF

cat <<-EOF > /usr/bin/gce_cancel_shutdown.sh
#!/bin/sh
echo \"✅ Cancelling deletion of $VM_ID in ${machine_zone}!\"
# Stop the shutdown script
systemctl stop shutdown@${shutdown_timeout}.service
EOF

# See: https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/running-scripts-before-or-after-a-job
echo "ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/usr/bin/gce_runner_shutdown.sh" >.env
echo "ACTIONS_RUNNER_HOOK_JOB_STARTED=/usr/bin/gce_cancel_shutdown.sh" >.env
gcloud compute instances add-labels ${VM_ID} --zone=${machine_zone} --labels=gh_ready=0 && \\
RUNNER_ALLOW_RUNASROOT=1 ./config.sh --url https://github.com/${GITHUB_REPOSITORY} --token ${RUNNER_TOKEN} --labels ${VM_ID} --unattended ${ephemeral_flag} --disableupdate && \\
./svc.sh install && \\
./svc.sh start && \\
gcloud compute instances add-labels ${VM_ID} --zone=${machine_zone} --labels=gh_ready=1
# 3 days represents the max workflow runtime. This will shutdown the instance if everything else fails.
nohup sh -c \"sleep 3d && gcloud --quiet compute instances delete ${VM_ID} --zone=${machine_zone}\" > /dev/null &
# 3 days represents the max workflow runtime. We're using 1 day as nothing should run that long before an runner is deleted
nohup sh -c \"sleep 1d && gcloud --quiet compute instances delete ${VM_ID} --zone=${machine_zone}\" > /dev/null &
"

if $actions_preinstalled ; then
Expand Down Expand Up @@ -335,14 +347,31 @@ function start_vm {
fi
}

function stop_vm {
# NOTE: this function runs on the GCE VM
echo "Stopping GCE VM ..."
# NOTE: it would be nice to gracefully shut down the runner, but we actually don't need
# to do that. VM shutdown will disconnect the runner, and GH will unregister it
# in 30 days
# TODO: RUNNER_ALLOW_RUNASROOT=1 /actions-runner/config.sh remove --token $TOKEN
NAME=$(curl -S -s -X GET http://metadata.google.internal/computeMetadata/v1/instance/name -H 'Metadata-Flavor: Google')
ZONE=$(curl -S -s -X GET http://metadata.google.internal/computeMetadata/v1/instance/zone -H 'Metadata-Flavor: Google')
echo "✅ Self deleting $NAME in $ZONE in ${1} seconds ..."
# We tear down the machine by starting the systemd service that was registered by the startup script
systemctl start shutdown@${1}.service
}

safety_on
case "$command" in
start)
start_vm
;;
stop)
stop_vm ${finish_timeout}
;;
*)
echo "Invalid command: \`${command}\`, valid values: start" >&2
echo "Invalid command: \`${command}\`, valid values: start|stop" >&2
usage
exit 1
;;
esac
esac
13 changes: 11 additions & 2 deletions action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ branding:
icon: triangle
color: purple
inputs:
command:
description: "`start` or `stop` of the runner"
default: start
required: true
token:
description: >-
GitHub auth token, needs `repo`/`public_repo` scope: https://docs.github.com/en/rest/reference/actions#self-hosted-runners.
Expand All @@ -26,7 +30,7 @@ inputs:
required: true
machine_zone:
description: GCE zone
default: "us-east1-c"
default: "us-central1-c"
required: true
machine_type:
description: GCE machine type; https://cloud.google.com/compute/docs/machine-types
Expand Down Expand Up @@ -84,6 +88,10 @@ inputs:
required: true
shutdown_timeout:
description: "Shutdown grace period (in seconds)."
default: 60
required: true
finish_timeout:
description: "Manual 'shutdown' commands grace period (in seconds) if called."
default: 30
required: true
actions_preinstalled:
Expand Down Expand Up @@ -115,7 +123,7 @@ runs:
- id: gce-github-runner-script
run: >
${{ github.action_path }}/action.sh
--command=start
--command=${{ inputs.command }}
--token=${{ inputs.token }}
--project_id=${{ inputs.project_id }}
--service_account_key='${{ inputs.service_account_key }}'
Expand All @@ -128,6 +136,7 @@ runs:
--disk_size=${{ inputs.disk_size }}
--scopes=${{ inputs.scopes }}
--shutdown_timeout=${{ inputs.shutdown_timeout }}
--finish_timeout=${{ inputs.finish_timeout }}
--runner_service_account=${{ inputs.runner_service_account }}
--image_project=${{ inputs.image_project }}
--image=${{ inputs.image }}
Expand Down
Loading