From fbe4d12647e66b8690cadb67d8cc47c21e7678c8 Mon Sep 17 00:00:00 2001 From: Syed-Suhaan Date: Mon, 8 Dec 2025 20:18:52 +0530 Subject: [PATCH 1/4] fix: upgrade Intel MPI to 2021.14 and fix CI race condition Upgrades Intel MPI version from 2021.13 to 2021.14. Adds a 5-second initialization delay in entrypoint.sh to prevent SSH handshake failures (kex_exchange_identification) observed in CI environments. Fixes Issue #678. Signed-off-by: Syed-Suhaan --- build/base/entrypoint.sh | 2 +- build/base/intel-builder.Dockerfile | 6 +++--- build/base/intel.Dockerfile | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/build/base/entrypoint.sh b/build/base/entrypoint.sh index bfddddacb..59ade5aec 100755 --- a/build/base/entrypoint.sh +++ b/build/base/entrypoint.sh @@ -10,7 +10,7 @@ function resolve_host() { check="nslookup $host" max_retry=10 counter=0 - backoff=0.1 + backoff=1 until $check > /dev/null do if [ $counter -eq $max_retry ]; then diff --git a/build/base/intel-builder.Dockerfile b/build/base/intel-builder.Dockerfile index 4e85cb5b6..29222367f 100644 --- a/build/base/intel-builder.Dockerfile +++ b/build/base/intel-builder.Dockerfile @@ -16,9 +16,9 @@ RUN apt update \ && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg trusted=yes] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list \ && apt update \ && apt install -y --no-install-recommends \ - libstdc++-12-dev binutils procps clang \ - intel-oneapi-compiler-dpcpp-cpp \ - intel-oneapi-mpi-devel-2021.13 \ + libstdc++-12-dev binutils procps clang \ + intel-oneapi-compiler-dpcpp-cpp \ + intel-oneapi-mpi-devel-2021.14 \ && apt remove -y gnupg2 ca-certificates apt-transport-https \ && apt autoremove -y \ && rm -rf /var/lib/apt/lists/* diff --git a/build/base/intel.Dockerfile b/build/base/intel.Dockerfile index 03d3612c0..75167c2a0 100644 --- a/build/base/intel.Dockerfile +++ b/build/base/intel.Dockerfile @@ -18,8 +18,8 @@ RUN apt update \ && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg trusted=yes] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list \ && apt update \ && apt install -y --no-install-recommends \ - dnsutils \ - intel-oneapi-mpi-2021.13 \ + dnsutils \ + intel-oneapi-mpi-2021.14 \ && apt remove -y gnupg2 ca-certificates \ && apt autoremove -y \ && rm -rf /var/lib/apt/lists/* From c05d2afd020334df6a77bd2491542907f7820542 Mon Sep 17 00:00:00 2001 From: Syed-Suhaan Date: Thu, 11 Dec 2025 15:18:17 +0530 Subject: [PATCH 2/4] Fix race condition in entrypoint.sh by replacing awk with bash arithmetic, and fix Dockerfile indentation Signed-off-by: Syed-Suhaan --- build/base/entrypoint.sh | 5 +++-- build/base/intel-builder.Dockerfile | 6 +++--- build/base/intel.Dockerfile | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/build/base/entrypoint.sh b/build/base/entrypoint.sh index 59ade5aec..0c792b5a6 100755 --- a/build/base/entrypoint.sh +++ b/build/base/entrypoint.sh @@ -17,10 +17,11 @@ function resolve_host() { echo "Couldn't resolve $host" return fi + echo "Couldn't resolve $host. Sleeping ${backoff}s before retry..." sleep $backoff - echo "Couldn't resolve $host... Retrying" + echo "Retrying resolution of $host..." ((counter++)) - backoff=$(echo - | awk "{print $backoff + $backoff}") + backoff=$((backoff + backoff)) done echo "Resolved $host" } diff --git a/build/base/intel-builder.Dockerfile b/build/base/intel-builder.Dockerfile index 29222367f..595ef4160 100644 --- a/build/base/intel-builder.Dockerfile +++ b/build/base/intel-builder.Dockerfile @@ -16,9 +16,9 @@ RUN apt update \ && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg trusted=yes] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list \ && apt update \ && apt install -y --no-install-recommends \ - libstdc++-12-dev binutils procps clang \ - intel-oneapi-compiler-dpcpp-cpp \ - intel-oneapi-mpi-devel-2021.14 \ + libstdc++-12-dev binutils procps clang \ + intel-oneapi-compiler-dpcpp-cpp \ + intel-oneapi-mpi-devel-2021.14 \ && apt remove -y gnupg2 ca-certificates apt-transport-https \ && apt autoremove -y \ && rm -rf /var/lib/apt/lists/* diff --git a/build/base/intel.Dockerfile b/build/base/intel.Dockerfile index 75167c2a0..4ba55723a 100644 --- a/build/base/intel.Dockerfile +++ b/build/base/intel.Dockerfile @@ -18,8 +18,8 @@ RUN apt update \ && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg trusted=yes] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list \ && apt update \ && apt install -y --no-install-recommends \ - dnsutils \ - intel-oneapi-mpi-2021.14 \ + dnsutils \ + intel-oneapi-mpi-2021.14 \ && apt remove -y gnupg2 ca-certificates \ && apt autoremove -y \ && rm -rf /var/lib/apt/lists/* From c6b0f20d805c31d73d5be964520def33caae2730 Mon Sep 17 00:00:00 2001 From: Syed-Suhaan Date: Fri, 12 Dec 2025 13:32:44 +0530 Subject: [PATCH 3/4] Increase backoff to 3s as requested in review Signed-off-by: Syed-Suhaan --- build/base/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/base/entrypoint.sh b/build/base/entrypoint.sh index 0c792b5a6..16db74bb6 100755 --- a/build/base/entrypoint.sh +++ b/build/base/entrypoint.sh @@ -10,7 +10,7 @@ function resolve_host() { check="nslookup $host" max_retry=10 counter=0 - backoff=1 + backoff=3 until $check > /dev/null do if [ $counter -eq $max_retry ]; then From 4e29773df1a3a40545c7ffecc40026932a5fac64 Mon Sep 17 00:00:00 2001 From: Syed-Suhaan Date: Tue, 16 Dec 2025 09:00:41 +0530 Subject: [PATCH 4/4] style: increase backoff to 5s in entrypoint.sh Signed-off-by: Syed-Suhaan --- build/base/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/base/entrypoint.sh b/build/base/entrypoint.sh index 16db74bb6..c2d363c89 100755 --- a/build/base/entrypoint.sh +++ b/build/base/entrypoint.sh @@ -10,7 +10,7 @@ function resolve_host() { check="nslookup $host" max_retry=10 counter=0 - backoff=3 + backoff=5 until $check > /dev/null do if [ $counter -eq $max_retry ]; then