Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions components/copy_test_file.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ DEST_TEST_DIR=/opt/azurehpc/test

mkdir -p $DEST_TEST_DIR

cp $TEST_DIR/*.* $DEST_TEST_DIR
cp $AZHPC_IMAGES_TEST_DIR/*.* $DEST_TEST_DIR

#Test if nvcc is installed and if so install gpu-copy test.
if test -f "/usr/local/cuda/bin/nvcc"; then
#Compile the gpu-copy benchmark.
NVCC=/usr/local/cuda/bin/nvcc
cufile="$TEST_DIR/health_checks/NDv4/gpu-copy.cu"
outfile="$TEST_DIR/health_checks/NDv4/gpu-copy"
cufile="$AZHPC_IMAGES_TEST_DIR/health_checks/NDv4/gpu-copy.cu"
outfile="$AZHPC_IMAGES_TEST_DIR/health_checks/NDv4/gpu-copy"

#Test if the default gcc compiler is new enough to compile gpu-copy.
#If it is not then use the 9.2 compiler, that should be installed in
Expand All @@ -24,6 +24,6 @@ if test -f "/usr/local/cuda/bin/nvcc"; then
-lnuma $cufile -o $outfile
fi
fi
cp -r $TEST_DIR/health_checks $DEST_TEST_DIR
cp -r $AZHPC_IMAGES_TEST_DIR/health_checks $DEST_TEST_DIR

exit 0
63 changes: 54 additions & 9 deletions components/install_lustre_client.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,63 @@ LUSTRE_VERSION=$(jq -r '.version' <<< $lustre_metadata)
if [[ $DISTRIBUTION == *"ubuntu"* ]]; then
source /etc/lsb-release
UBUNTU_VERSION=$(cat /etc/os-release | grep VERSION_ID | cut -d= -f2 | cut -d\" -f2)

# we need to make a marker package to tell apt that HPC-X provides its own OpenMPI, so that lustre-tests can install properly
apt install -y equivs
cat <<EOF > /tmp/hpcx-provides-openmpi-bin
Section: misc
Priority: optional
Homepage: https://github.com/Azure/azhpc-images
Standards-Version: 3.9.2

Package: hpcx-provides-openmpi-bin
Provides: openmpi-bin, libopenmpi-dev, libopenmpi3, openmpi-common
Conflicts: openmpi-bin, libopenmpi-dev, libopenmpi3, openmpi-common
Version: 4.1
Maintainer: Azure HPC Platform team <hpcplat@microsoft.com>
Description: marker package in Azure HPC Image to indicate that HPC-X provides OpenMPI binaries
Upstream OpenMPI (i.e. OpenMPI packaged by Ubuntu) is unsuitable for HPC purposes, and depends on vulnerable PMIx with fixes behind Ubuntu Pro paywall on Jammy.
EOF

equivs-build /tmp/hpcx-provides-openmpi-bin
dpkg -i ./hpcx-provides-openmpi-bin_4.1_all.deb
rm -f ./hpcx-provides-openmpi-bin_4.1_all.deb
rm -f /tmp/hpcx-provides-openmpi-bin

# use dev headers from HPC-X OpenMPI installation for lustre-tests
source /etc/profile.d/modules.sh
module load mpi/hpcx

# if [ $UBUNTU_VERSION == 24.04 ]; then
# SIGNED_BY="/usr/share/keyrings/microsoft-prod.gpg"
# elif [ $UBUNTU_VERSION == 22.04 ]; then
# SIGNED_BY="/etc/apt/trusted.gpg.d/microsoft-prod.gpg"
# fi
# echo "deb [arch=amd64 signed-by=$SIGNED_BY] https://packages.microsoft.com/repos/amlfs-${DISTRIB_CODENAME}/ ${DISTRIB_CODENAME} main" | sudo tee /etc/apt/sources.list.d/amlfs.list
# # Enable these lines if the MS PMC repo was not already setup.
# #curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > microsoft.gpg
# #cp ./microsoft.gpg /etc/apt/trusted.gpg.d/
# apt-get update
# apt-get install -y amlfs-lustre-client-${LUSTRE_VERSION}=$(uname -r)
# apt-mark hold amlfs-lustre-client-${LUSTRE_VERSION}

# temporary workaround to build AMLFS kmod from source, until we have AMLFS team publish DKMS packages usable on day-1 of new kernel module release
lustre_branch="arsdragonfly/dkms-$LUSTRE_VERSION"
git clone --branch ${lustre_branch} https://github.com/arsdragonfly/amlFilesystem-lustre.git
Copy link
Copy Markdown
Contributor

@ColtonPaul ColtonPaul Dec 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussing offline with the AMLFS team to hopefully merge an upstream PR instead of hardcode a personal fork/branch.
Otherwise, PR looks good to me

pushd amlFilesystem-lustre
sh ./autogen.sh
apt update
if [ $UBUNTU_VERSION == 24.04 ]; then
SIGNED_BY="/usr/share/keyrings/microsoft-prod.gpg"
apt install -y module-assistant libselinux-dev libsnmp-dev mpi-default-dev quilt libssl-dev swig
elif [ $UBUNTU_VERSION == 22.04 ]; then
SIGNED_BY="/etc/apt/trusted.gpg.d/microsoft-prod.gpg"
apt install -y module-assistant dpatch libselinux-dev libsnmp-dev mpi-default-dev quilt libssl-dev swig
fi
echo "deb [arch=$ARCHITECTURE_DISTRO signed-by=$SIGNED_BY] https://packages.microsoft.com/repos/amlfs-${DISTRIB_CODENAME}/ ${DISTRIB_CODENAME} main" | tee /etc/apt/sources.list.d/amlfs.list
# Enable these lines if the MS PMC repo was not already setup.
#curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > microsoft.gpg
#cp ./microsoft.gpg /etc/apt/trusted.gpg.d/
apt-get update
apt-get install -y amlfs-lustre-client-${LUSTRE_VERSION}=$(uname -r)
apt-mark hold amlfs-lustre-client-${LUSTRE_VERSION}
./configure --with-linux=/usr/src/linux-headers-$(uname -r) --disable-server --disable-ldiskfs --disable-zfs --disable-snmp --enable-quota
make dkms-debs
apt install -y ./debs/lustre-*.deb
popd
rm -rf amlFilesystem-lustre
LUSTRE_VERSION=$(dpkg-query -W -f='${Version}\n' lustre-client-utils | cut -d~ -f1)
elif [[ $DISTRIBUTION == almalinux* ]]; then
ALMA_LUSTRE_VERSION=${LUSTRE_VERSION//-/_}
OS_MAJOR_VERSION=$(sed -n 's/^VERSION_ID="\([0-9]\+\).*/\1/p' /etc/os-release)
Expand Down
1 change: 0 additions & 1 deletion distros/ubuntu22.04/disable_auto_upgrade.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/bin/bash
set -e

KERNEL_VERSION=$(uname -r) su -c 'echo linux-image-$KERNEL_VERSION hold | dpkg --set-selections'
sed -i 's/APT::Periodic::Unattended-Upgrade ".*/APT::Periodic::Unattended-Upgrade "0";/' /etc/apt/apt.conf.d/20auto-upgrades

systemctl stop unattended-upgrades.service
Expand Down
7 changes: 4 additions & 3 deletions distros/ubuntu22.04/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ source ../../utils/set_properties.sh
# update cmake
$COMPONENT_DIR/install_cmake.sh

# install Lustre client
$COMPONENT_DIR/install_lustre_client.sh

# install DOCA OFED
$COMPONENT_DIR/install_doca.sh

Expand Down Expand Up @@ -65,6 +62,10 @@ if [ "$GPU" = "AMD" ]; then
$COMPONENT_DIR/install_rccl.sh
fi

# install Lustre client
$COMPONENT_DIR/install_lustre_client.sh


# install AMD libs
$COMPONENT_DIR/install_amd_libs.sh

Expand Down
1 change: 0 additions & 1 deletion distros/ubuntu24.04/disable_auto_upgrade.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/bin/bash
set -e

KERNEL_VERSION=$(uname -r) su -c 'echo linux-image-$KERNEL_VERSION hold | dpkg --set-selections'
sed -i 's/APT::Periodic::Unattended-Upgrade ".*/APT::Periodic::Unattended-Upgrade "0";/' /etc/apt/apt.conf.d/20auto-upgrades

systemctl stop unattended-upgrades.service
Expand Down
10 changes: 6 additions & 4 deletions distros/ubuntu24.04/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ source ../../utils/set_properties.sh
if [ "$SKU" != "GB200" ]; then
# update cmake
$COMPONENT_DIR/install_cmake.sh

# install Lustre client
# Note that lustre client is supported on GB200 but amlfs does not support latest 6.14 kernel so we temporarily skip it
$COMPONENT_DIR/install_lustre_client.sh
fi

# install DOCA OFED
Expand Down Expand Up @@ -81,6 +77,12 @@ if [ "$GPU" = "AMD" ]; then
$COMPONENT_DIR/install_rccl.sh
fi

if [ "$SKU" != "GB200" ]; then
# install Lustre client
# Note that lustre client is supported on GB200 but amlfs does not support latest 6.14 kernel so we temporarily skip it
$COMPONENT_DIR/install_lustre_client.sh
fi

if [ "$ARCHITECTURE" == "x86_64" ]; then

# install AMD libs
Expand Down
2 changes: 1 addition & 1 deletion partners/rhel/rhel-8.x/rhel-8.10-hpc/set_properties.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export TOP_DIR=$(realpath ../../../../)
export COMMON_DIR=$(realpath ../../../../components)
export COMPONENT_DIR=$COMMON_DIR
export RHEL_COMMON_DIR=$(realpath ../../common)
export TEST_DIR=$(realpath ../../../../tests)
export AZHPC_IMAGES_TEST_DIR=$(realpath ../../../../tests)
export UTILS_DIR=$(realpath ../../../../utils)
export DISTRIBUTION=$(. /etc/os-release;echo $ID$VERSION_ID)

Expand Down
8 changes: 1 addition & 7 deletions utils/set_properties.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set -ex

export TOP_DIR="$(dirname "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)")"
export COMPONENT_DIR=$TOP_DIR/components
export TEST_DIR=$TOP_DIR/tests
export AZHPC_IMAGES_TEST_DIR=$TOP_DIR/tests
export UTILS_DIR=$TOP_DIR/utils
export DISTRIBUTION=$(. /etc/os-release;echo $ID$VERSION_ID)

Expand All @@ -15,12 +15,6 @@ fi
export ARCHITECTURE=$(uname -m)

if [[ $DISTRIBUTION == *"ubuntu"* ]]; then
# Don't allow the kernel to be updated
if [ "$SKU" = "GB200" ]; then
apt-mark hold linux-azure-nvidia
else
apt-mark hold linux-azure
fi
# upgrade pre-installed components
apt update
apt upgrade -y
Expand Down
15 changes: 5 additions & 10 deletions versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -550,21 +550,16 @@
}
},
"lustre": {
"common": {
"version": "2.15.6-39-g3e00a10"
"ubuntu22.04": {
"version": "2.15.7"
},
"ubuntu24.04": {
"version": "2.16.1"
},
"almalinux9.7": {
"x86_64":{
"version": "2.15.7-33-g79ddf99"
}
},
"ubuntu24.04": {
"aarch64":{
"version": "2.16.1-14-gbc76088"
},
"x86_64":{
"version": "2.16.1-14-gbc76088"
}
}
},
"aznhc": {
Expand Down