From 5ae0da50a1475885d9f3f60b47d7e7c990f5bfe3 Mon Sep 17 00:00:00 2001 From: Azreen Zaman Date: Tue, 19 May 2026 16:01:04 -0400 Subject: [PATCH 1/2] Add retry logic to sacctmgr ping in start-services.sh --- azure-slurm-install/start-services.sh | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/azure-slurm-install/start-services.sh b/azure-slurm-install/start-services.sh index 63b29405..e5bd6ddc 100644 --- a/azure-slurm-install/start-services.sh +++ b/azure-slurm-install/start-services.sh @@ -7,13 +7,25 @@ run_slurmdbd_via_systemctl() { echo "Starting slurmdbd via systemctl..." systemctl start slurmdbd - # Verify slurmdbd is responding - sleep 10 - if ! sacctmgr ping > /dev/null 2>&1; then - echo "ERROR: slurmdbd started but is not responding to sacctmgr ping" + # Verify slurmdbd is responding with retry logic + attempts=3 + delay=5 + set +e + for i in $( seq 1 $attempts ); do + echo $i/$attempts sleeping $delay seconds before running sacctmgr ping + sleep $delay + sacctmgr ping > /dev/null 2>&1 + if [ $? == 0 ]; then + echo "slurmdbd is running and responding to ping" + set -e + return 0 + fi + done + + if [ $i == $attempts ] && [ $? != 0 ]; then + echo "ERROR: slurmdbd started but is not responding to sacctmgr ping after $attempts attempts" exit 2 fi - echo "slurmdbd is running and responding to ping" } run_slurmdbd() { From 1adce12dac84033489f7a05a23274c4907974118 Mon Sep 17 00:00:00 2001 From: Azreen Zaman Date: Thu, 21 May 2026 14:30:38 -0400 Subject: [PATCH 2/2] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- azure-slurm-install/start-services.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/azure-slurm-install/start-services.sh b/azure-slurm-install/start-services.sh index e5bd6ddc..70679565 100644 --- a/azure-slurm-install/start-services.sh +++ b/azure-slurm-install/start-services.sh @@ -10,19 +10,21 @@ run_slurmdbd_via_systemctl() { # Verify slurmdbd is responding with retry logic attempts=3 delay=5 + ping_rc=0 set +e for i in $( seq 1 $attempts ); do echo $i/$attempts sleeping $delay seconds before running sacctmgr ping sleep $delay sacctmgr ping > /dev/null 2>&1 - if [ $? == 0 ]; then + ping_rc=$? + if [ "$ping_rc" -eq 0 ]; then echo "slurmdbd is running and responding to ping" set -e return 0 fi done - if [ $i == $attempts ] && [ $? != 0 ]; then + if [ "$i" == "$attempts" ] && [ "$ping_rc" -ne 0 ]; then echo "ERROR: slurmdbd started but is not responding to sacctmgr ping after $attempts attempts" exit 2 fi