From 2971e407ece4a4445f193b0faab50c3f285d02a7 Mon Sep 17 00:00:00 2001 From: Nicholas Chaimov Date: Wed, 1 Oct 2025 11:51:24 -0700 Subject: [PATCH 1/4] Fix syntax error in run_driver_slurm run_driver_slurm used /bin/sh, but one line used == for string equality, which is a bash extension. Use = instead. --- testsuite/run_driver_slurm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/run_driver_slurm b/testsuite/run_driver_slurm index 5b798af..ba5a74c 100644 --- a/testsuite/run_driver_slurm +++ b/testsuite/run_driver_slurm @@ -11,7 +11,7 @@ PROCS=$SPINDLE_TEST_ARGS fi export PROCS -if [ "x$SPINDLE_BGQ_LD_PRELOAD" == "xtrue" ] ; then +if [ "x$SPINDLE_BGQ_LD_PRELOAD" = "xtrue" ] ; then PRELOAD_ARGS="--runjob-opts=--envs LD_PRELOAD=$LIBRARY_LIST" elif [ "x$SPINDLE_LD_PRELOAD" != "x" ] ; then export LD_PRELOAD=$SPINDLE_LD_PRELOAD From 53056b20a877b6c798378cb4701df3226d682b3e Mon Sep 17 00:00:00 2001 From: Nicholas Chaimov Date: Wed, 1 Oct 2025 13:35:47 -0700 Subject: [PATCH 2/4] Slurm testsuite workflow for CI --- .github/workflows/ci.yml | 50 +++++++ containers/spindle-slurm-ubuntu/Dockerfile | 107 +++++++++++++++ .../spindle-slurm-ubuntu/conf/cgroup.conf | 1 + .../spindle-slurm-ubuntu/conf/mpich.conf | 1 + .../spindle-slurm-ubuntu/conf/slurm.conf | 42 ++++++ .../conf/slurmdbd.conf.template | 10 ++ .../spindle-slurm-ubuntu/conf/ssh_config | 3 + .../spindle-slurm-ubuntu/docker-compose.yml | 128 ++++++++++++++++++ .../spindle-slurm-ubuntu/generate_config.sh | 10 ++ .../scripts/add_docker_user.sh | 10 ++ .../scripts/build_mpich.sh | 14 ++ .../scripts/build_slurm.sh | 10 ++ .../scripts/build_spindle.sh | 9 ++ .../scripts/entrypoint.sh | 21 +++ .../scripts/setup_slurm.sh | 11 ++ .../spindle-slurm-ubuntu/scripts/setup_ssh.sh | 9 ++ 16 files changed, 436 insertions(+) create mode 100644 containers/spindle-slurm-ubuntu/Dockerfile create mode 100644 containers/spindle-slurm-ubuntu/conf/cgroup.conf create mode 100644 containers/spindle-slurm-ubuntu/conf/mpich.conf create mode 100644 containers/spindle-slurm-ubuntu/conf/slurm.conf create mode 100644 containers/spindle-slurm-ubuntu/conf/slurmdbd.conf.template create mode 100644 containers/spindle-slurm-ubuntu/conf/ssh_config create mode 100644 containers/spindle-slurm-ubuntu/docker-compose.yml create mode 100755 containers/spindle-slurm-ubuntu/generate_config.sh create mode 100755 containers/spindle-slurm-ubuntu/scripts/add_docker_user.sh create mode 100755 containers/spindle-slurm-ubuntu/scripts/build_mpich.sh create mode 100755 containers/spindle-slurm-ubuntu/scripts/build_slurm.sh create mode 100755 containers/spindle-slurm-ubuntu/scripts/build_spindle.sh create mode 100755 containers/spindle-slurm-ubuntu/scripts/entrypoint.sh create mode 100755 containers/spindle-slurm-ubuntu/scripts/setup_slurm.sh create mode 100755 containers/spindle-slurm-ubuntu/scripts/setup_ssh.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fda3d4e..b2ea0ef 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -104,3 +104,53 @@ jobs: cd containers/spindle-flux-ubuntu docker compose down + spindle-slurm-ubuntu: + name: Testsuite (Slurm, Ubuntu) + environment: Spindle CI + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - name: Check out Spindle + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 + + - name: Setup Docker Compose + uses: docker/setup-compose-action@364cc21a5de5b1ee4a7f5f9d3fa374ce0ccde746 + with: + version: latest + + - name: Generate MariaDB configuration + id: slurm-ubuntu-mariadb + run: | + cd containers/spindle-slurm-ubuntu + ./generate_config.sh + + - name: Build spindle-slurm-ubuntu image + id: slurm-ubuntu-build + run: | + cd containers/spindle-slurm-ubuntu + docker compose --progress=plain build + + - name: Bring spindle-slurm-ubuntu up + id: slurm-ubuntu-up + run: | + cd containers/spindle-slurm-ubuntu + docker compose up -d --wait --wait-timeout 60 + + - name: Verify munge works in spindle-slurm-ubuntu + id: slurm-ubuntu-munge + run: | + docker exec slurm-head bash -c 'munge -n | unmunge' + + - name: Run spindle-slurm-ubuntu testsuite + id: slurm-ubuntu-testsuite + run: | + docker exec slurm-head bash -c 'cd Spindle-build/testsuite && salloc -n${workers} -N${workers} ./runTests ${workers}' + + - name: Bring spindle-slurm-ubuntu down + id: slurm-ubuntu-down + if: ${{ always() }} + continue-on-error: true + run: | + cd containers/spindle-slurm-ubuntu + docker compose down + diff --git a/containers/spindle-slurm-ubuntu/Dockerfile b/containers/spindle-slurm-ubuntu/Dockerfile new file mode 100644 index 0000000..fed354d --- /dev/null +++ b/containers/spindle-slurm-ubuntu/Dockerfile @@ -0,0 +1,107 @@ +ARG UBUNTU_VERSION=noble +FROM ubuntu:${UBUNTU_VERSION} +ARG replicas=4 +ENV workers=${replicas} +USER root + +RUN apt-get update \ + && DEBIAN_FRONTEND="noninteractive" apt-get -qq install -y --no-install-recommends \ + apt-utils + +RUN apt-get update \ + && DEBIAN_FRONTEND="noninteractive" apt-get -qq install -y --no-install-recommends \ + locales \ + ca-certificates \ + wget \ + git \ + ssh \ + sudo \ + build-essential \ + pkg-config \ + autotools-dev \ + libtool \ + autoconf \ + automake \ + make \ + gfortran-13 \ + gcc-13 \ + g++-13 \ + munge \ + libmunge-dev \ + libhwloc-dev \ + python3-dev \ + python3-pip \ + python3-setuptools \ + python3-wheel \ + python-is-python3 \ + openssh-server \ + openssh-client \ + mariadb-client \ + libmariadb-dev \ + libhttp-parser-dev \ + psmisc \ + libjson-c-dev \ + fd-find \ + silversearcher-ag \ + vim + + +# Prevent hwloc from trying to use graphics cards +# as this fails when X is not running. +ENV HWLOC_COMPONENTS=-gl + +# Set up munge +RUN mkdir -p /run/munge && \ + chown munge:munge /run/munge && \ + chmod 0755 /run/munge + +ARG BUILD_ROOT=containers/spindle-slurm-ubuntu +COPY ${BUILD_ROOT}/scripts/add_docker_user.sh /add_docker_user.sh + +# Slurm daemons run as $SLURM_USER +ARG SLURM_USER=slurm +ARG USER=${SLURM_USER} +ARG UID=1002 +RUN /add_docker_user.sh + +# Applications run as $USER +ARG USER=slurmuser +ARG UID=1001 +RUN /add_docker_user.sh + +ARG SLURM_VERSION +COPY ${BUILD_ROOT}/scripts/build_slurm.sh /build_slurm.sh +RUN /build_slurm.sh + +ARG MPICH_VERSION +COPY ${BUILD_ROOT}/scripts/build_mpich.sh /build_mpich.sh +RUN /build_mpich.sh + +COPY ${BUILD_ROOT}/scripts/setup_slurm.sh /setup_slurm.sh +COPY ${BUILD_ROOT}/conf/slurm.conf /home/${SLURM_USER}/slurm.conf +COPY ${BUILD_ROOT}/conf/slurmdbd.conf /home/${SLURM_USER}/slurmdbd.conf +COPY ${BUILD_ROOT}/conf/cgroup.conf /home/${SLURM_USER}/cgroup.conf +RUN /setup_slurm.sh + +COPY ${BUILD_ROOT}/conf/mpich.conf /etc/ld.so.conf.d/mpich.conf +RUN ldconfig + +# Slurm without Spank plugin needs passwordless ssh +USER ${USER} +WORKDIR /home/${USER} +COPY ${BUILD_ROOT}/conf/ssh_config /home/${USER}/ +COPY ${BUILD_ROOT}/scripts/setup_ssh.sh /home/${USER}/ +RUN /home/${USER}/setup_ssh.sh + +RUN mkdir -p /home/${USER}/Spindle +# Copy the Spindle repo into the container +COPY . /home/${USER}/Spindle +COPY ${BUILD_ROOT}/scripts/build_spindle.sh /home/${USER}/build_spindle.sh +RUN ./build_spindle.sh + +COPY ${BUILD_ROOT}/scripts/entrypoint.sh /home/${USER}/entrypoint.sh +ENV PATH /home/${USER}/Spindle-inst/bin:$PATH + +ENV SLURM_MPI_TYPE pmi2 +ENTRYPOINT /bin/bash ./entrypoint.sh + diff --git a/containers/spindle-slurm-ubuntu/conf/cgroup.conf b/containers/spindle-slurm-ubuntu/conf/cgroup.conf new file mode 100644 index 0000000..e59e9ae --- /dev/null +++ b/containers/spindle-slurm-ubuntu/conf/cgroup.conf @@ -0,0 +1 @@ +CgroupPlugin=cgroup/v1 diff --git a/containers/spindle-slurm-ubuntu/conf/mpich.conf b/containers/spindle-slurm-ubuntu/conf/mpich.conf new file mode 100644 index 0000000..0bf940b --- /dev/null +++ b/containers/spindle-slurm-ubuntu/conf/mpich.conf @@ -0,0 +1 @@ +/usr/lib diff --git a/containers/spindle-slurm-ubuntu/conf/slurm.conf b/containers/spindle-slurm-ubuntu/conf/slurm.conf new file mode 100644 index 0000000..abf060d --- /dev/null +++ b/containers/spindle-slurm-ubuntu/conf/slurm.conf @@ -0,0 +1,42 @@ +ClusterName=linux +ControlMachine=slurm-head +ControlAddr=slurm-head +SlurmUser=slurm +SlurmctldPort=6817 +SlurmdPort=6818 +AuthType=auth/munge +StateSaveLocation=/var/lib/slurmd +SlurmdSpoolDir=/var/spool/slurmd +SwitchType=switch/none +MpiDefault=none +SlurmctldPidFile=/var/run/slurmd/slurmctld.pid +SlurmdPidFile=/var/run/slurmd/slurmd.pid +ProctrackType=proctrack/linuxproc +TaskPlugin=task/affinity +ReturnToService=2 +SlurmctldTimeout=300 +SlurmdTimeout=300 +InactiveLimit=0 +MinJobAge=300 +KillWait=30 +Waittime=0 +SchedulerType=sched/backfill +SelectType=select/cons_tres +SelectTypeParameters=CR_Core_Memory +SlurmctldDebug=3 +SlurmctldLogFile=/var/log/slurm/slurmctld.log +SlurmdDebug=3 +SlurmdLogFile=/var/log/slurm/slurmd.log +JobCompType=jobcomp/filetxt +JobCompLoc=/var/log/slurm/jobcomp.log +JobAcctGatherType=jobacct_gather/linux +JobAcctGatherFrequency=30 +AccountingStorageType=accounting_storage/slurmdbd +AccountingStorageHost=slurm-db +AccountingStoragePort=6819 +NodeName=slurm-node-1 NodeAddr=slurm-node-1 CPUs=3 RealMemory=1000 State=UNKNOWN +NodeName=slurm-node-2 NodeAddr=slurm-node-2 CPUs=3 RealMemory=1000 State=UNKNOWN +NodeName=slurm-node-3 NodeAddr=slurm-node-3 CPUs=3 RealMemory=1000 State=UNKNOWN +NodeName=slurm-node-4 NodeAddr=slurm-node-4 CPUs=3 RealMemory=1000 State=UNKNOWN +PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP + diff --git a/containers/spindle-slurm-ubuntu/conf/slurmdbd.conf.template b/containers/spindle-slurm-ubuntu/conf/slurmdbd.conf.template new file mode 100644 index 0000000..0e27411 --- /dev/null +++ b/containers/spindle-slurm-ubuntu/conf/slurmdbd.conf.template @@ -0,0 +1,10 @@ +AuthType=auth/munge +DbdAddr=slurm-db +DbdHost=slurm-db +SlurmUser=slurm +DebugLevel=4 +LogFile=/var/log/slurm/slurmdbd.log +PidFile=/var/run/slurmdbd/slurmdbd.pid +StorageType=accounting_storage/mysql +StorageHost=slurm-mariadb +StorageUser=slurm diff --git a/containers/spindle-slurm-ubuntu/conf/ssh_config b/containers/spindle-slurm-ubuntu/conf/ssh_config new file mode 100644 index 0000000..f293c53 --- /dev/null +++ b/containers/spindle-slurm-ubuntu/conf/ssh_config @@ -0,0 +1,3 @@ +Host slurm-* + StrictHostKeyChecking no + diff --git a/containers/spindle-slurm-ubuntu/docker-compose.yml b/containers/spindle-slurm-ubuntu/docker-compose.yml new file mode 100644 index 0000000..6fe6fad --- /dev/null +++ b/containers/spindle-slurm-ubuntu/docker-compose.yml @@ -0,0 +1,128 @@ +# `replicas` must match the number of nodes defined in the services section +x-shared-workers: + &workers + replicas: 4 + +# Ubuntu version to use (noble = 24.04) +x-shared-build-args: &shared-build-args + UBUNTU_VERSION: noble + SLURM_VERSION: slurm-25-05-3-1 + MPICH_VERSION: 4.2.2 + <<: *workers + +# Docker prohibits copying files from outside of the build context. +# In order to be able to copy the whole repo into the container, +# we have to set the context to be the root of the repo. +# We then have to specify the path from there to the Dockerfile. +#x-shared-build-context: &shared-build-context +# context: ../.. +# dockerfile: containers/spindle-flux-ubuntu/Dockerfile +# args: *shared-build-args + +x-shared-build-context: &shared-build-context + context: ../.. + dockerfile: containers/spindle-slurm-ubuntu/Dockerfile + args: *shared-build-args + +# Name of the head node +x-shared-environment: &shared-environment + SLURM_HEAD_NODE: slurm-head + <<: *workers + +x-worker-environment: &worker-environment + SLURM_ROLE: worker + <<: *shared-environment + +networks: + slurm: + driver: bridge + +# Common parameters for all nodes. +x-shared-node-parameters: &shared-node-parameters + build: *shared-build-context + networks: + - slurm + cap_add: + - SYS_NICE # Required for libnuma + +x-healthcheck-parameters: &healthcheck-parameters + start_period: 3s + interval: 3s + timeout: 5s + retries: 5 + +x-worker-parameters: &worker-node-parameters + <<: *shared-node-parameters + environment: *worker-environment + depends_on: + slurm-head: + condition: service_healthy + healthcheck: + test: ["CMD", "stat", "/var/run/slurmd/slurmd.pid"] + <<: *healthcheck-parameters + +services: + slurm-mariadb: + image: mariadb:12 + networks: + - slurm + hostname: slurm-mariadb + container_name: slurm-mariadb + env_file: mariadb.env + environment: + MYSQL_RANDOM_ROOT_PASSWORD: "yes" + MYSQL_DATABASE: "slurm_acct_db" + MYSQL_USER: "slurm" + healthcheck: + test: ["CMD", "healthcheck.sh", "--connect", "--innodb_initialized"] + <<: *healthcheck-parameters + + slurm-db: + <<: *shared-node-parameters + hostname: slurm-db + container_name: slurm-db + environment: + SLURM_ROLE: db + <<: *shared-environment + depends_on: + slurm-mariadb: + condition: service_healthy + healthcheck: + test: ["CMD", "stat", "/var/run/slurmdbd/slurmdbd.pid"] + <<: *healthcheck-parameters + + slurm-head: + <<: *shared-node-parameters + hostname: slurm-head + container_name: slurm-head + tty: true + environment: + SLURM_ROLE: ctl + <<: *shared-environment + depends_on: + slurm-db: + condition: service_healthy + healthcheck: + test: ["CMD", "stat", "/var/run/slurmd/slurmctld.pid"] + <<: *healthcheck-parameters + + slurm-node-1: + <<: *worker-node-parameters + hostname: slurm-node-1 + container_name: slurm-node-1 + + slurm-node-2: + <<: *worker-node-parameters + hostname: slurm-node-2 + container_name: slurm-node-2 + + slurm-node-3: + <<: *worker-node-parameters + hostname: slurm-node-3 + container_name: slurm-node-3 + + slurm-node-4: + <<: *worker-node-parameters + hostname: slurm-node-4 + container_name: slurm-node-4 + diff --git a/containers/spindle-slurm-ubuntu/generate_config.sh b/containers/spindle-slurm-ubuntu/generate_config.sh new file mode 100755 index 0000000..7f911c3 --- /dev/null +++ b/containers/spindle-slurm-ubuntu/generate_config.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Generate random password for the MariaDB slurm user +# and set it in config files + +MARIADB_PASS=$(openssl rand --base64 16 | head -c -3) +echo "MARIADB_PASSWORD: \"${MARIADB_PASS}\"" > mariadb.env +cp conf/slurmdbd.conf.template conf/slurmdbd.conf +echo "StoragePass=${MARIADB_PASS}" >> conf/slurmdbd.conf + diff --git a/containers/spindle-slurm-ubuntu/scripts/add_docker_user.sh b/containers/spindle-slurm-ubuntu/scripts/add_docker_user.sh new file mode 100755 index 0000000..ace8c61 --- /dev/null +++ b/containers/spindle-slurm-ubuntu/scripts/add_docker_user.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -euxo pipefail + +sudo groupadd -g ${UID} ${USER} +sudo useradd -g ${USER} -u ${UID} -d /home/${USER} -m ${USER} +# Allow user to run as other users so that munge can be started as the munge user +sudo sh -c "printf \"${USER} ALL=(ALL) NOPASSWD: ALL\\n\" >> /etc/sudoers" +sudo adduser ${USER} sudo +sudo usermod -s /bin/bash ${USER} + diff --git a/containers/spindle-slurm-ubuntu/scripts/build_mpich.sh b/containers/spindle-slurm-ubuntu/scripts/build_mpich.sh new file mode 100755 index 0000000..1fe17cc --- /dev/null +++ b/containers/spindle-slurm-ubuntu/scripts/build_mpich.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -euxo pipefail + +mkdir mpich +pushd mpich +wget -O - https://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPICH_VERSION}.tar.gz | tar xvz --strip-components 1 +mkdir -p build +pushd build +../configure --prefix=/usr --disable-fortran --with-slurm=/usr/include/slurm +make -j$(nproc) install +popd +popd +rm -rf mpich + diff --git a/containers/spindle-slurm-ubuntu/scripts/build_slurm.sh b/containers/spindle-slurm-ubuntu/scripts/build_slurm.sh new file mode 100755 index 0000000..9c97a5a --- /dev/null +++ b/containers/spindle-slurm-ubuntu/scripts/build_slurm.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -euxo pipefail + +export SLURM_SRC="/home/${SLURM_USER}/slurm" +git clone -b ${SLURM_VERSION} --single-branch --depth=1 https://github.com/SchedMD/slurm.git ${SLURM_SRC} +cd ${SLURM_SRC} +./configure --enable-debug --prefix=/usr --sysconfdir=/etc/slurm --with-mysql_config=/usr/bin --libdir=/usr/lib +make -j$(nproc) +make install + diff --git a/containers/spindle-slurm-ubuntu/scripts/build_spindle.sh b/containers/spindle-slurm-ubuntu/scripts/build_spindle.sh new file mode 100755 index 0000000..6943e49 --- /dev/null +++ b/containers/spindle-slurm-ubuntu/scripts/build_spindle.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euxo pipefail + +mkdir -p /home/${USER}/Spindle-build +cd /home/${USER}/Spindle-build +/home/${USER}/Spindle/configure --prefix=/home/${USER}/Spindle-inst --enable-sec-munge --with-rm=slurm --with-rsh-launch --with-rsh-cmd=/usr/bin/ssh --with-localstorage=/tmp CFLAGS="-O2 -g" CXXFLAGS="-O2 -g" +make -j$(nproc) +make install + diff --git a/containers/spindle-slurm-ubuntu/scripts/entrypoint.sh b/containers/spindle-slurm-ubuntu/scripts/entrypoint.sh new file mode 100755 index 0000000..54d40e4 --- /dev/null +++ b/containers/spindle-slurm-ubuntu/scripts/entrypoint.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +echo "SLURM_ROLE: ${SLURM_ROLE}" + +echo "Starting sshd..." +sudo service ssh start +echo "Starting munged..." +sudo -u munge /usr/sbin/munged + +if [ "${SLURM_ROLE}" = "db" ]; then + echo "Starting slurmdbd..." + sudo -u slurm /usr/sbin/slurmdbd -Dvvv +elif [ "${SLURM_ROLE}" = "ctl" ] ; then + echo "Starting slurmctld..." + sudo -u slurm /usr/sbin/slurmctld -i -Dvvv +elif [ "${SLURM_ROLE}" = "worker" ] ; then + echo "Starting slurmd..." + sudo /usr/sbin/slurmd -Dvvv +fi + +sleep inf diff --git a/containers/spindle-slurm-ubuntu/scripts/setup_slurm.sh b/containers/spindle-slurm-ubuntu/scripts/setup_slurm.sh new file mode 100755 index 0000000..49e7824 --- /dev/null +++ b/containers/spindle-slurm-ubuntu/scripts/setup_slurm.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -euxo pipefail + +mkdir -p /etc/slurm /etc/sysconfig/slurm /var/spool/slurmd /var/spool/slurmctld /var/run/slurmd /var/run/slurmdbd /var/lib/slurmd /var/log/slurm +touch /var/lib/slurmd/node_state /var/lib/slurmd/front_end_state /var/lib/slurmd/job_state /var/lib/slurmd/resv_state /var/lib/slurmd/trigger_state /var/lib/slurmd/assoc_mgr_state /var/lib/slurmd/assoc_usage /var/lib/slurmd/qos_usage /var/lib/slurmd/fed_mgr_state +cp /home/${SLURM_USER}/slurm.conf /etc/slurm/slurm.conf +cp /home/${SLURM_USER}/slurmdbd.conf /etc/slurm/slurmdbd.conf +cp /home/${SLURM_USER}/cgroup.conf /etc/slurm/cgroup.conf +chown -R slurm:slurm /etc/slurm /etc/sysconfig/slurm /var/spool/slurmd /var/spool/slurmctld /var/run/slurmd /var/run/slurmdbd /var/lib/slurmd /var/log/slurm +chmod 600 /etc/slurm/slurmdbd.conf + diff --git a/containers/spindle-slurm-ubuntu/scripts/setup_ssh.sh b/containers/spindle-slurm-ubuntu/scripts/setup_ssh.sh new file mode 100755 index 0000000..70a68ba --- /dev/null +++ b/containers/spindle-slurm-ubuntu/scripts/setup_ssh.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euxo pipefail + +ssh-keygen -t ed25519 -f /home/${USER}/.ssh/id_ed25519 -N "" -q +cp /home/${USER}/.ssh/id_ed25519.pub /home/${USER}/.ssh/authorized_keys +chmod 600 /home/${USER}/.ssh/authorized_keys +cp /home/${USER}/ssh_config /home/${USER}/.ssh/config +chmod 600 /home/${USER}/.ssh/config +rm -f /home/${USER}/ssh_config From 49db4f5429457dcc23aff1aa25b91f1fe52e26e0 Mon Sep 17 00:00:00 2001 From: Nicholas Chaimov Date: Tue, 7 Oct 2025 15:34:34 -0700 Subject: [PATCH 3/4] Split Slurm workflow into separate base and testing images. Base image has Slurm and MPICH. Testing adds Spindle. --- .github/workflows/ci.yml | 16 ++++++-- .../{ => base}/Dockerfile | 28 +------------ .../{ => base}/conf/mpich.conf | 0 .../base/docker-compose.yml | 11 ++++++ .../{ => base}/scripts/add_docker_user.sh | 0 .../{ => base}/scripts/build_mpich.sh | 0 .../{ => base}/scripts/build_slurm.sh | 0 .../spindle-slurm-ubuntu/testing/Dockerfile | 39 +++++++++++++++++++ .../{ => testing}/conf/cgroup.conf | 0 .../{ => testing}/conf/slurm.conf | 0 .../{ => testing}/conf/slurmdbd.conf.template | 0 .../{ => testing}/conf/ssh_config | 0 .../{ => testing}/docker-compose.yml | 10 ++--- .../{ => testing}/generate_config.sh | 0 .../testing/scripts/add_docker_user.sh | 10 +++++ .../{ => testing}/scripts/build_spindle.sh | 0 .../{ => testing}/scripts/entrypoint.sh | 0 .../{ => testing}/scripts/setup_slurm.sh | 0 .../{ => testing}/scripts/setup_ssh.sh | 0 19 files changed, 77 insertions(+), 37 deletions(-) rename containers/spindle-slurm-ubuntu/{ => base}/Dockerfile (65%) rename containers/spindle-slurm-ubuntu/{ => base}/conf/mpich.conf (100%) create mode 100644 containers/spindle-slurm-ubuntu/base/docker-compose.yml rename containers/spindle-slurm-ubuntu/{ => base}/scripts/add_docker_user.sh (100%) rename containers/spindle-slurm-ubuntu/{ => base}/scripts/build_mpich.sh (100%) rename containers/spindle-slurm-ubuntu/{ => base}/scripts/build_slurm.sh (100%) create mode 100644 containers/spindle-slurm-ubuntu/testing/Dockerfile rename containers/spindle-slurm-ubuntu/{ => testing}/conf/cgroup.conf (100%) rename containers/spindle-slurm-ubuntu/{ => testing}/conf/slurm.conf (100%) rename containers/spindle-slurm-ubuntu/{ => testing}/conf/slurmdbd.conf.template (100%) rename containers/spindle-slurm-ubuntu/{ => testing}/conf/ssh_config (100%) rename containers/spindle-slurm-ubuntu/{ => testing}/docker-compose.yml (94%) rename containers/spindle-slurm-ubuntu/{ => testing}/generate_config.sh (100%) create mode 100755 containers/spindle-slurm-ubuntu/testing/scripts/add_docker_user.sh rename containers/spindle-slurm-ubuntu/{ => testing}/scripts/build_spindle.sh (100%) rename containers/spindle-slurm-ubuntu/{ => testing}/scripts/entrypoint.sh (100%) rename containers/spindle-slurm-ubuntu/{ => testing}/scripts/setup_slurm.sh (100%) rename containers/spindle-slurm-ubuntu/{ => testing}/scripts/setup_ssh.sh (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b2ea0ef..32bc72a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -118,22 +118,30 @@ jobs: with: version: latest + - name: Login to GitHub Container Registry + if: ${{ !env.ACT }} + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Generate MariaDB configuration id: slurm-ubuntu-mariadb run: | - cd containers/spindle-slurm-ubuntu + cd containers/spindle-slurm-ubuntu/testing ./generate_config.sh - name: Build spindle-slurm-ubuntu image id: slurm-ubuntu-build run: | - cd containers/spindle-slurm-ubuntu + cd containers/spindle-slurm-ubuntu/testing docker compose --progress=plain build - name: Bring spindle-slurm-ubuntu up id: slurm-ubuntu-up run: | - cd containers/spindle-slurm-ubuntu + cd containers/spindle-slurm-ubuntu/testing docker compose up -d --wait --wait-timeout 60 - name: Verify munge works in spindle-slurm-ubuntu @@ -151,6 +159,6 @@ jobs: if: ${{ always() }} continue-on-error: true run: | - cd containers/spindle-slurm-ubuntu + cd containers/spindle-slurm-ubuntu/testing docker compose down diff --git a/containers/spindle-slurm-ubuntu/Dockerfile b/containers/spindle-slurm-ubuntu/base/Dockerfile similarity index 65% rename from containers/spindle-slurm-ubuntu/Dockerfile rename to containers/spindle-slurm-ubuntu/base/Dockerfile index fed354d..16c8d53 100644 --- a/containers/spindle-slurm-ubuntu/Dockerfile +++ b/containers/spindle-slurm-ubuntu/base/Dockerfile @@ -1,7 +1,5 @@ ARG UBUNTU_VERSION=noble FROM ubuntu:${UBUNTU_VERSION} -ARG replicas=4 -ENV workers=${replicas} USER root RUN apt-get update \ @@ -55,7 +53,7 @@ RUN mkdir -p /run/munge && \ chown munge:munge /run/munge && \ chmod 0755 /run/munge -ARG BUILD_ROOT=containers/spindle-slurm-ubuntu +ARG BUILD_ROOT=. COPY ${BUILD_ROOT}/scripts/add_docker_user.sh /add_docker_user.sh # Slurm daemons run as $SLURM_USER @@ -77,31 +75,7 @@ ARG MPICH_VERSION COPY ${BUILD_ROOT}/scripts/build_mpich.sh /build_mpich.sh RUN /build_mpich.sh -COPY ${BUILD_ROOT}/scripts/setup_slurm.sh /setup_slurm.sh -COPY ${BUILD_ROOT}/conf/slurm.conf /home/${SLURM_USER}/slurm.conf -COPY ${BUILD_ROOT}/conf/slurmdbd.conf /home/${SLURM_USER}/slurmdbd.conf -COPY ${BUILD_ROOT}/conf/cgroup.conf /home/${SLURM_USER}/cgroup.conf -RUN /setup_slurm.sh - COPY ${BUILD_ROOT}/conf/mpich.conf /etc/ld.so.conf.d/mpich.conf RUN ldconfig -# Slurm without Spank plugin needs passwordless ssh -USER ${USER} -WORKDIR /home/${USER} -COPY ${BUILD_ROOT}/conf/ssh_config /home/${USER}/ -COPY ${BUILD_ROOT}/scripts/setup_ssh.sh /home/${USER}/ -RUN /home/${USER}/setup_ssh.sh - -RUN mkdir -p /home/${USER}/Spindle -# Copy the Spindle repo into the container -COPY . /home/${USER}/Spindle -COPY ${BUILD_ROOT}/scripts/build_spindle.sh /home/${USER}/build_spindle.sh -RUN ./build_spindle.sh - -COPY ${BUILD_ROOT}/scripts/entrypoint.sh /home/${USER}/entrypoint.sh -ENV PATH /home/${USER}/Spindle-inst/bin:$PATH - ENV SLURM_MPI_TYPE pmi2 -ENTRYPOINT /bin/bash ./entrypoint.sh - diff --git a/containers/spindle-slurm-ubuntu/conf/mpich.conf b/containers/spindle-slurm-ubuntu/base/conf/mpich.conf similarity index 100% rename from containers/spindle-slurm-ubuntu/conf/mpich.conf rename to containers/spindle-slurm-ubuntu/base/conf/mpich.conf diff --git a/containers/spindle-slurm-ubuntu/base/docker-compose.yml b/containers/spindle-slurm-ubuntu/base/docker-compose.yml new file mode 100644 index 0000000..1ddcac3 --- /dev/null +++ b/containers/spindle-slurm-ubuntu/base/docker-compose.yml @@ -0,0 +1,11 @@ +services: + spindle-slurm-base: + image: ghcr.io/paratoolsinc/spindle-slurm-base:latest + build: + context: . + dockerfile: Dockerfile + args: + UBUNTU_VERSION: noble + SLURM_VERSION: slurm-25-05-3-1 + MPICH_VERSION: 4.2.2 + diff --git a/containers/spindle-slurm-ubuntu/scripts/add_docker_user.sh b/containers/spindle-slurm-ubuntu/base/scripts/add_docker_user.sh similarity index 100% rename from containers/spindle-slurm-ubuntu/scripts/add_docker_user.sh rename to containers/spindle-slurm-ubuntu/base/scripts/add_docker_user.sh diff --git a/containers/spindle-slurm-ubuntu/scripts/build_mpich.sh b/containers/spindle-slurm-ubuntu/base/scripts/build_mpich.sh similarity index 100% rename from containers/spindle-slurm-ubuntu/scripts/build_mpich.sh rename to containers/spindle-slurm-ubuntu/base/scripts/build_mpich.sh diff --git a/containers/spindle-slurm-ubuntu/scripts/build_slurm.sh b/containers/spindle-slurm-ubuntu/base/scripts/build_slurm.sh similarity index 100% rename from containers/spindle-slurm-ubuntu/scripts/build_slurm.sh rename to containers/spindle-slurm-ubuntu/base/scripts/build_slurm.sh diff --git a/containers/spindle-slurm-ubuntu/testing/Dockerfile b/containers/spindle-slurm-ubuntu/testing/Dockerfile new file mode 100644 index 0000000..ea9cc7e --- /dev/null +++ b/containers/spindle-slurm-ubuntu/testing/Dockerfile @@ -0,0 +1,39 @@ +ARG BASE_VERSION=latest +FROM ghcr.io/paratoolsinc/spindle-slurm-base:${BASE_VERSION} +ARG replicas=4 +ENV workers=${replicas} + +ARG BUILD_ROOT=containers/spindle-slurm-ubuntu/testing + +# Slurm daemons run as $SLURM_USER +ARG SLURM_USER=slurm + +# Applications run as $USER +ARG USER=slurmuser +ARG UID=1001 + +# Set up the Slurm install already present in the base image +COPY ${BUILD_ROOT}/scripts/setup_slurm.sh /setup_slurm.sh +COPY ${BUILD_ROOT}/conf/slurm.conf /home/${SLURM_USER}/slurm.conf +COPY ${BUILD_ROOT}/conf/slurmdbd.conf /home/${SLURM_USER}/slurmdbd.conf +COPY ${BUILD_ROOT}/conf/cgroup.conf /home/${SLURM_USER}/cgroup.conf +RUN /setup_slurm.sh + +# Slurm without Spank plugin needs passwordless ssh +USER ${USER} +WORKDIR /home/${USER} +COPY ${BUILD_ROOT}/conf/ssh_config /home/${USER}/ +COPY ${BUILD_ROOT}/scripts/setup_ssh.sh /home/${USER}/ +RUN /home/${USER}/setup_ssh.sh + +# Copy the Spindle repo into the container and build it +RUN mkdir -p /home/${USER}/Spindle +COPY . /home/${USER}/Spindle +COPY ${BUILD_ROOT}/scripts/build_spindle.sh /home/${USER}/build_spindle.sh +RUN ./build_spindle.sh + +COPY ${BUILD_ROOT}/scripts/entrypoint.sh /home/${USER}/entrypoint.sh +ENV PATH /home/${USER}/Spindle-inst/bin:$PATH + +ENTRYPOINT /bin/bash ./entrypoint.sh + diff --git a/containers/spindle-slurm-ubuntu/conf/cgroup.conf b/containers/spindle-slurm-ubuntu/testing/conf/cgroup.conf similarity index 100% rename from containers/spindle-slurm-ubuntu/conf/cgroup.conf rename to containers/spindle-slurm-ubuntu/testing/conf/cgroup.conf diff --git a/containers/spindle-slurm-ubuntu/conf/slurm.conf b/containers/spindle-slurm-ubuntu/testing/conf/slurm.conf similarity index 100% rename from containers/spindle-slurm-ubuntu/conf/slurm.conf rename to containers/spindle-slurm-ubuntu/testing/conf/slurm.conf diff --git a/containers/spindle-slurm-ubuntu/conf/slurmdbd.conf.template b/containers/spindle-slurm-ubuntu/testing/conf/slurmdbd.conf.template similarity index 100% rename from containers/spindle-slurm-ubuntu/conf/slurmdbd.conf.template rename to containers/spindle-slurm-ubuntu/testing/conf/slurmdbd.conf.template diff --git a/containers/spindle-slurm-ubuntu/conf/ssh_config b/containers/spindle-slurm-ubuntu/testing/conf/ssh_config similarity index 100% rename from containers/spindle-slurm-ubuntu/conf/ssh_config rename to containers/spindle-slurm-ubuntu/testing/conf/ssh_config diff --git a/containers/spindle-slurm-ubuntu/docker-compose.yml b/containers/spindle-slurm-ubuntu/testing/docker-compose.yml similarity index 94% rename from containers/spindle-slurm-ubuntu/docker-compose.yml rename to containers/spindle-slurm-ubuntu/testing/docker-compose.yml index 6fe6fad..6031cd8 100644 --- a/containers/spindle-slurm-ubuntu/docker-compose.yml +++ b/containers/spindle-slurm-ubuntu/testing/docker-compose.yml @@ -3,11 +3,9 @@ x-shared-workers: &workers replicas: 4 -# Ubuntu version to use (noble = 24.04) +# Base image version to use x-shared-build-args: &shared-build-args - UBUNTU_VERSION: noble - SLURM_VERSION: slurm-25-05-3-1 - MPICH_VERSION: 4.2.2 + BASE_VERSION: latest <<: *workers # Docker prohibits copying files from outside of the build context. @@ -20,8 +18,8 @@ x-shared-build-args: &shared-build-args # args: *shared-build-args x-shared-build-context: &shared-build-context - context: ../.. - dockerfile: containers/spindle-slurm-ubuntu/Dockerfile + context: ../../.. + dockerfile: containers/spindle-slurm-ubuntu/testing/Dockerfile args: *shared-build-args # Name of the head node diff --git a/containers/spindle-slurm-ubuntu/generate_config.sh b/containers/spindle-slurm-ubuntu/testing/generate_config.sh similarity index 100% rename from containers/spindle-slurm-ubuntu/generate_config.sh rename to containers/spindle-slurm-ubuntu/testing/generate_config.sh diff --git a/containers/spindle-slurm-ubuntu/testing/scripts/add_docker_user.sh b/containers/spindle-slurm-ubuntu/testing/scripts/add_docker_user.sh new file mode 100755 index 0000000..ace8c61 --- /dev/null +++ b/containers/spindle-slurm-ubuntu/testing/scripts/add_docker_user.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -euxo pipefail + +sudo groupadd -g ${UID} ${USER} +sudo useradd -g ${USER} -u ${UID} -d /home/${USER} -m ${USER} +# Allow user to run as other users so that munge can be started as the munge user +sudo sh -c "printf \"${USER} ALL=(ALL) NOPASSWD: ALL\\n\" >> /etc/sudoers" +sudo adduser ${USER} sudo +sudo usermod -s /bin/bash ${USER} + diff --git a/containers/spindle-slurm-ubuntu/scripts/build_spindle.sh b/containers/spindle-slurm-ubuntu/testing/scripts/build_spindle.sh similarity index 100% rename from containers/spindle-slurm-ubuntu/scripts/build_spindle.sh rename to containers/spindle-slurm-ubuntu/testing/scripts/build_spindle.sh diff --git a/containers/spindle-slurm-ubuntu/scripts/entrypoint.sh b/containers/spindle-slurm-ubuntu/testing/scripts/entrypoint.sh similarity index 100% rename from containers/spindle-slurm-ubuntu/scripts/entrypoint.sh rename to containers/spindle-slurm-ubuntu/testing/scripts/entrypoint.sh diff --git a/containers/spindle-slurm-ubuntu/scripts/setup_slurm.sh b/containers/spindle-slurm-ubuntu/testing/scripts/setup_slurm.sh similarity index 100% rename from containers/spindle-slurm-ubuntu/scripts/setup_slurm.sh rename to containers/spindle-slurm-ubuntu/testing/scripts/setup_slurm.sh diff --git a/containers/spindle-slurm-ubuntu/scripts/setup_ssh.sh b/containers/spindle-slurm-ubuntu/testing/scripts/setup_ssh.sh similarity index 100% rename from containers/spindle-slurm-ubuntu/scripts/setup_ssh.sh rename to containers/spindle-slurm-ubuntu/testing/scripts/setup_ssh.sh From 776469de1873bf2eec6f7a4fde17861f6ffa66c0 Mon Sep 17 00:00:00 2001 From: Nicholas Chaimov Date: Tue, 7 Oct 2025 16:17:12 -0700 Subject: [PATCH 4/4] Add workflow to build Slurm base image --- .github/workflows/container.yml | 30 +++++++++++++++++++ .../spindle-slurm-ubuntu/base/Dockerfile | 4 +-- .../base/docker-compose.yml | 11 ------- 3 files changed, 32 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/container.yml delete mode 100644 containers/spindle-slurm-ubuntu/base/docker-compose.yml diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml new file mode 100644 index 0000000..c27dca0 --- /dev/null +++ b/.github/workflows/container.yml @@ -0,0 +1,30 @@ +name: Build & Push Slurm Base Container +on: + workflow_dispatch: + workflow_call: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 + + - name: Login to GitHub Container Registry + uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build & Push Slurm Base Image + uses: docker/build-push-action@1dc73863535b631f98b2378be8619f83b136f4a0 + with: + context: ./containers/spindle-slurm-ubuntu/base + platforms: linux/amd64 + push: true + tags: ghcr.io/paratoolsinc/spindle-slurm-base:latest + cache-from: type=registry,ref=ghcr.io/paratoolsinc/spindle-slurm-base:buildcache + cache-to: type=registry,ref=ghcr.io/paratoolsinc/spindle-slurm-base:buildcache,mode=max diff --git a/containers/spindle-slurm-ubuntu/base/Dockerfile b/containers/spindle-slurm-ubuntu/base/Dockerfile index 16c8d53..d393f9e 100644 --- a/containers/spindle-slurm-ubuntu/base/Dockerfile +++ b/containers/spindle-slurm-ubuntu/base/Dockerfile @@ -67,11 +67,11 @@ ARG USER=slurmuser ARG UID=1001 RUN /add_docker_user.sh -ARG SLURM_VERSION +ARG SLURM_VERSION=slurm-25-05-3-1 COPY ${BUILD_ROOT}/scripts/build_slurm.sh /build_slurm.sh RUN /build_slurm.sh -ARG MPICH_VERSION +ARG MPICH_VERSION=4.2.2 COPY ${BUILD_ROOT}/scripts/build_mpich.sh /build_mpich.sh RUN /build_mpich.sh diff --git a/containers/spindle-slurm-ubuntu/base/docker-compose.yml b/containers/spindle-slurm-ubuntu/base/docker-compose.yml deleted file mode 100644 index 1ddcac3..0000000 --- a/containers/spindle-slurm-ubuntu/base/docker-compose.yml +++ /dev/null @@ -1,11 +0,0 @@ -services: - spindle-slurm-base: - image: ghcr.io/paratoolsinc/spindle-slurm-base:latest - build: - context: . - dockerfile: Dockerfile - args: - UBUNTU_VERSION: noble - SLURM_VERSION: slurm-25-05-3-1 - MPICH_VERSION: 4.2.2 -