diff --git a/examples/gpu/laghos/Makefile b/examples/gpu/laghos/Makefile index d33e76d..2543e55 100644 --- a/examples/gpu/laghos/Makefile +++ b/examples/gpu/laghos/Makefile @@ -9,10 +9,13 @@ all: @echo " run-short: use hpctoolkit to collect and analyze kernel-level information about gpu performance " @echo " run-long: use hpctoolkit to collect and analyze kernel-level information about gpu performance " @echo " run-pc: use hpctoolkit to collect, analyze, and present instruction-level information about gpu performance " + @echo " run-inst-count: use hpctoolkit to collect and analyze instruction counting information about gpu performance " + @echo @echo " view: use hpcviewer to inspect performance data gathered using 'run'" @echo " view-short: use hpcviewer to inspect performance data gathered using 'run-short'" @echo " view-long: use hpcviewer to inspect performance data gathered using 'run-long'" @echo " view-pc: use hpcviewer to inspect performance data gathered using 'run-pc'" + @echo " view-inst-count: use hpcviewer to inspect performance data gathered using 'run-inst-count'" @echo " clean: delete performance data and logs" @echo " veryclean: delete build, performance data, and logs" @echo @@ -33,6 +36,10 @@ run-pc: @READY=$(RDY) CMD="$(HPCTOOLKIT_LAGHOS_RUN_PC)" \ sh make-scripts/check.sh make-scripts/run_hpcrun_pc.sh +run-inst-count: + @READY=$(RDY) CMD="$(HPCTOOLKIT_LAGHOS_RUN_COUNT)" \ + sh make-scripts/check.sh make-scripts/run_hpcrun_inst_count.sh + view-short: @READY=$(RDY) CMD=sh DB="hpctoolkit-$(EXEC)-short" \ sh make-scripts/check.sh make-scripts/view.sh @@ -45,6 +52,10 @@ view-pc: @READY=$(RDY) CMD=sh DB="hpctoolkit-$(EXEC)-pc" \ sh make-scripts/check.sh make-scripts/view.sh +view-inst-count: + @READY=$(RDY) CMD=sh DB="hpctoolkit-$(EXEC)-inst-count" \ + sh make-scripts/check.sh make-scripts/view.sh + clean: /bin/rm -rf hpctoolkit* /bin/rm -rf log* .build* diff --git a/examples/gpu/laghos/make-scripts/build.sh b/examples/gpu/laghos/make-scripts/build.sh index 99cae14..9809ca7 100644 --- a/examples/gpu/laghos/make-scripts/build.sh +++ b/examples/gpu/laghos/make-scripts/build.sh @@ -9,12 +9,29 @@ pushd laghos # Tested for GCC >= 6.4.0 -# CUDA Laghos +# RAJA +if [ -n "${HPCTOOLKIT_LAGHOS_RAJA_BUILD}" ]; then + RAJA_BLD=${HPCTOOLKIT_LAGHOS_RAJA_ROOT}/build + RAJA_PFX=$(pwd)/raja + git clone -b ${HPCTOOLKIT_LAGHOS_RAJA_VER} --depth 1 --recurse-submodules --shallow-submodules https://github.com/LLNL/RAJA.git ${HPCTOOLKIT_LAGHOS_RAJA_ROOT} + mkdir -p ${RAJA_BLD} + pushd ${RAJA_BLD} + cmake \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=${RAJA_PFX} \ + -DCMAKE_CXX_COMPILER=${HPCTOOLKIT_LAGHOS_CXX_COMPILER} -DCMAKE_CXX_FLAGS="${HPCTOOLKIT_LAGHOS_CXXFLAGS}" \ + ${HPCTOOLKIT_LAGHOS_RAJA_FLAGS} \ + ${HPCTOOLKIT_LAGHOS_RAJA_ROOT} + make -j 8 + make install + popd +fi + +# Laghos git clone https://github.com/CEED/Laghos.git pushd Laghos git checkout v3.1 git apply ../../make-scripts/tutorial.patch -make setup NPROC=16 MFEM_BUILD="${HPCTOOLKIT_LAGHOS_MFEM_FLAGS}" +make setup NPROC=16 CXXFLAGS="-g -O2 ${HPCTOOLKIT_LAGHOS_CXXFLAGS}" MFEM_BUILD="${HPCTOOLKIT_LAGHOS_MFEM_FLAGS}" make -j 8 popd popd diff --git a/examples/gpu/laghos/make-scripts/check.sh b/examples/gpu/laghos/make-scripts/check.sh index 50ba5b3..6e0ed31 100644 --- a/examples/gpu/laghos/make-scripts/check.sh +++ b/examples/gpu/laghos/make-scripts/check.sh @@ -10,4 +10,4 @@ then exit fi -$CMD $1 +$CMD $1 diff --git a/examples/gpu/laghos/make-scripts/run_hpcrun_inst_count.sh b/examples/gpu/laghos/make-scripts/run_hpcrun_inst_count.sh new file mode 100644 index 0000000..f501d2e --- /dev/null +++ b/examples/gpu/laghos/make-scripts/run_hpcrun_inst_count.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +$HPCTOOLKIT_LAGHOS_MODULES_BUILD +$HPCTOOLKIT_MODULES_USE +$HPCTOOLKIT_MODULES_HPCTOOLKIT + +BINARY=laghos +LAGHOS_DIR=laghos/Laghos +EXEC=${LAGHOS_DIR}/$BINARY +OUT=hpctoolkit-laghos-inst-count + +CMD="rm -rf $OUT.m $OUT.d" +echo $CMD +$CMD + +# measure an execution of laghos +CMD="time ${HPCTOOLKIT_LAGHOS_LAUNCH} ${HPCTOOLKIT_LAGHOS_LAUNCH_ARGS} hpcrun -o $OUT.m -e gpu=level0,inst=count -t ${LAGHOS_DIR}/laghos -p 0 -dim 2 -rs 1 -tf 0.05 -pa -d raja" +echo $CMD +eval $CMD + +# compute program structure information for the laghos cpu and gpu binaries +CMD="hpcstruct yes $OUT.m" +echo $CMD +$CMD + +# combine the measurements with the program structure information +CMD="hpcprof -o $OUT.d $OUT.m" +echo $CMD +$CMD + +touch log.run-inst-count.done diff --git a/examples/gpu/laghos/make-scripts/run_hpcrun_long.sh b/examples/gpu/laghos/make-scripts/run_hpcrun_long.sh index 2267848..62c5e10 100644 --- a/examples/gpu/laghos/make-scripts/run_hpcrun_long.sh +++ b/examples/gpu/laghos/make-scripts/run_hpcrun_long.sh @@ -1,6 +1,7 @@ #!/bin/bash $HPCTOOLKIT_LAGHOS_MODULES_BUILD +$HPCTOOLKIT_MODULES_USE $HPCTOOLKIT_MODULES_HPCTOOLKIT export CUPTI_DEVICE_NUM=1 diff --git a/examples/gpu/laghos/make-scripts/run_hpcrun_pc.sh b/examples/gpu/laghos/make-scripts/run_hpcrun_pc.sh index a33e49c..027455c 100644 --- a/examples/gpu/laghos/make-scripts/run_hpcrun_pc.sh +++ b/examples/gpu/laghos/make-scripts/run_hpcrun_pc.sh @@ -1,6 +1,7 @@ #!/bin/bash $HPCTOOLKIT_LAGHOS_MODULES_BUILD +$HPCTOOLKIT_MODULES_USE $HPCTOOLKIT_MODULES_HPCTOOLKIT export CUPTI_DEVICE_NUM=1 diff --git a/examples/gpu/laghos/make-scripts/run_hpcrun_short.sh b/examples/gpu/laghos/make-scripts/run_hpcrun_short.sh index 01c6698..1514ad5 100644 --- a/examples/gpu/laghos/make-scripts/run_hpcrun_short.sh +++ b/examples/gpu/laghos/make-scripts/run_hpcrun_short.sh @@ -1,6 +1,7 @@ #!/bin/bash $HPCTOOLKIT_LAGHOS_MODULES_BUILD +$HPCTOOLKIT_MODULES_USE $HPCTOOLKIT_MODULES_HPCTOOLKIT export CUPTI_DEVICE_NUM=1 diff --git a/examples/gpu/laghos/make-scripts/tutorial.patch b/examples/gpu/laghos/make-scripts/tutorial.patch index 88dbdd4..e34f5ba 100644 --- a/examples/gpu/laghos/make-scripts/tutorial.patch +++ b/examples/gpu/laghos/make-scripts/tutorial.patch @@ -1,3 +1,16 @@ +diff --git a/laghos.cpp b/laghos.cpp +index 3d756ea..2ac8c04 100644 +--- a/laghos.cpp ++++ b/laghos.cpp +@@ -84,7 +84,7 @@ static void Checks(const int dim, const int ti, const double norm, int &checks); + int main(int argc, char *argv[]) + { + // Initialize MPI. +- MPI_Session mpi(argc, argv); ++ mfem::MPI_Session mpi(argc, argv); + const int myid = mpi.WorldRank(); + + // Print the banner. diff --git a/makefile b/makefile index a59f844..dbeba13 100644 --- a/makefile diff --git a/examples/gpu/laghos/make-scripts/unsupported-inst-count.sh b/examples/gpu/laghos/make-scripts/unsupported-inst-count.sh new file mode 100644 index 0000000..d79c3a3 --- /dev/null +++ b/examples/gpu/laghos/make-scripts/unsupported-inst-count.sh @@ -0,0 +1,7 @@ +#!/bin/bash +echo +echo '***********************************************************************************' +echo '* Instruction counting is only available for Intel GPUs. *' +echo '***********************************************************************************' +echo +exit diff --git a/examples/gpu/laghos/make-scripts/unsupported-pc.sh b/examples/gpu/laghos/make-scripts/unsupported-pc.sh new file mode 100644 index 0000000..7d90241 --- /dev/null +++ b/examples/gpu/laghos/make-scripts/unsupported-pc.sh @@ -0,0 +1,7 @@ +#!/bin/bash +echo +echo '***********************************************************************************' +echo "* PC sampling capability is not yet available for $1 GPUs" +echo '***********************************************************************************' +echo +exit diff --git a/examples/gpu/laghos/setup-env/crusher.sh b/examples/gpu/laghos/setup-env/crusher.sh index 891d11f..3b30288 100644 --- a/examples/gpu/laghos/setup-env/crusher.sh +++ b/examples/gpu/laghos/setup-env/crusher.sh @@ -31,9 +31,9 @@ else module load PrgEnv-amd amd/5.4.3 cray-mpich craype-x86-trento craype-accel-amd-gfx90a # modules for hpctoolkit -# module use /gpfs/alpine/csc322/world-shared/modulefiles/x86_64 -# export HPCTOOLKIT_MODULES_HPCTOOLKIT="module load hpctoolkit/default" - export HPCTOOLKIT_MODULES_HPCTOOLKIT="module load hpctoolkit/develop" + export HPCTOOLKIT_MODULES_USE="module use /gpfs/alpine/csc322/world-shared/modulefiles/x86_64" + export HPCTOOLKIT_MODULES_HPCTOOLKIT="module load hpctoolkit/default" + $HPCTOOLKIT_MODULES_USE $HPCTOOLKIT_MODULES_HPCTOOLKIT # environment settings for this example @@ -47,7 +47,8 @@ else export HPCTOOLKIT_LAGHOS_SUBMIT="sbatch $HPCTOOLKIT_PROJECTID -t 5 -N 1 $HPCTOOLKIT_RESERVATION" export HPCTOOLKIT_LAGHOS_RUN_SHORT="$HPCTOOLKIT_LAGHOS_SUBMIT -J laghos-run-short -o log.run-short.out -e log.run-short.error" export HPCTOOLKIT_LAGHOS_RUN_LONG="$HPCTOOLKIT_LAGHOS_SUBMIT -J laghos-run-long -o log.run-long.out -e log.run-long.error" - export HPCTOOLKIT_LAGHOS_RUN_PC="sh make-scripts/unsupported-amd.sh" + export HPCTOOLKIT_LAGHOS_RUN_PC="sh make-scripts/unsupported-pc.sh AMD" + export HPCTOOLKIT_LAGHOS_RUN_COUNT="sh make-scripts/unsupported-inst-count.sh" export HPCTOOLKIT_LAGHOS_BUILD="sh" export HPCTOOLKIT_LAGHOS_LAUNCH="srun -n 8 -c 1 --gpus-per-node=8 --gpu-bind=closest" diff --git a/examples/gpu/laghos/setup-env/perlmutter.sh b/examples/gpu/laghos/setup-env/perlmutter.sh index fe85c38..d524bca 100644 --- a/examples/gpu/laghos/setup-env/perlmutter.sh +++ b/examples/gpu/laghos/setup-env/perlmutter.sh @@ -31,8 +31,9 @@ else module load gpu PrgEnv-nvidia nvidia/22.7 cray-mpich # modules for hpctoolkit - module use /global/common/software/m3977/hpctoolkit/latest/perlmutter/modulefiles + export HPCTOOLKIT_MODULES_USE="module use /global/common/software/m3977/hpctoolkit/latest/perlmutter/modulefiles" export HPCTOOLKIT_MODULES_HPCTOOLKIT="module load hpctoolkit/default" + $HPCTOOLKIT_MODULES_USE $HPCTOOLKIT_MODULES_HPCTOOLKIT # environment settings for this example @@ -49,6 +50,7 @@ else export HPCTOOLKIT_LAGHOS_RUN_SHORT="$HPCTOOLKIT_LAGHOS_SUBMIT -J laghos-run-short -o log.run-short.out -e log.run-short.error" export HPCTOOLKIT_LAGHOS_RUN_LONG="$HPCTOOLKIT_LAGHOS_SUBMIT -J laghos-run-long -o log.run-long.out -e log.run-long.error" export HPCTOOLKIT_LAGHOS_RUN_PC="$HPCTOOLKIT_LAGHOS_SUBMIT -J laghos-run-pc -o log.run-pc.out -e log.run-pc.error" + export HPCTOOLKIT_LAGHOS_RUN_COUNT="sh make-scripts/unsupported-inst-count.sh" export HPCTOOLKIT_LAGHOS_BUILD="sh" export HPCTOOLKIT_LAGHOS_LAUNCH="srun -n 4 -c 1 --gpus-per-node=4 --gpu-bind=closest" diff --git a/examples/gpu/laghos/setup-env/summit.sh b/examples/gpu/laghos/setup-env/summit.sh index 8fffb12..83c2576 100644 --- a/examples/gpu/laghos/setup-env/summit.sh +++ b/examples/gpu/laghos/setup-env/summit.sh @@ -31,8 +31,9 @@ else module load gcc spectrum-mpi cuda/11.5.2 # modules for hpctoolkit - module use /gpfs/alpine/csc322/world-shared/modulefiles/ppc64le + export HPCTOOLKIT_MODULES_USE="module use /gpfs/alpine/csc322/world-shared/modulefiles/ppc64le" export HPCTOOLKIT_MODULES_HPCTOOLKIT="module load hpctoolkit/default" + $HPCTOOLKIT_MODULES_USE $HPCTOOLKIT_MODULES_HPCTOOLKIT # environment settings for this example @@ -47,6 +48,7 @@ else export HPCTOOLKIT_LAGHOS_RUN_SHORT="$HPCTOOLKIT_LAGHOS_SUBMIT -J laghos-run-short -o log.run-short.out -e log.run-short.error" export HPCTOOLKIT_LAGHOS_RUN_LONG="$HPCTOOLKIT_LAGHOS_SUBMIT -J laghos-run-long -o log.run-long.out -e log.run-long.error" export HPCTOOLKIT_LAGHOS_RUN_PC="$HPCTOOLKIT_LAGHOS_SUBMIT -J laghos-run-pc -o log.run-pc.out -e log.run-pc.error" + export HPCTOOLKIT_LAGHOS_RUN_COUNT="sh make-scripts/unsupported-inst-count.sh" export HPCTOOLKIT_LAGHOS_BUILD="sh" export HPCTOOLKIT_LAGHOS_LAUNCH="jsrun -n 6 -g 1 -a 1 -c 1 -bpacked:7" export HPCTOOLKIT_LAGHOS_LAUNCH_ARGS="--smpiargs \"-x PAMI_DISABLE_CUDA_HOOK=1 -disable_gpu_hooks\"" diff --git a/examples/gpu/laghos/setup-env/sunspot.sh b/examples/gpu/laghos/setup-env/sunspot.sh new file mode 100644 index 0000000..63c83ec --- /dev/null +++ b/examples/gpu/laghos/setup-env/sunspot.sh @@ -0,0 +1,63 @@ +export HPCTOOLKIT_TUTORIAL_RESERVATION=default + +if [ -z "$HPCTOOLKIT_TUTORIAL_PROJECTID" ] +then + echo "Please set environment variable HPCTOOLKIT_TUTORIAL_PROJECTID to your project id" + echo " 'default' to run with your default project id unset" +elif [ -z "$HPCTOOLKIT_TUTORIAL_RESERVATION" ] +then + echo "Please set environment variable HPCTOOLKIT_TUTORIAL_RESERVATION to an appropriate value:" +# echo " 'hpctoolkit1' for day 1" +# echo " 'hpctoolkit2' for day 2" + echo " 'default' to run without the reservation" +else + if test "$HPCTOOLKIT_TUTORIAL_PROJECTID" != "default" + then + export HPCTOOLKIT_PROJECTID="-A ${HPCTOOLKIT_TUTORIAL_PROJECTID}" + else + unset HPCTOOLKIT_PROJECTID + fi + if test "$HPCTOOLKIT_TUTORIAL_RESERVATION" != "default" + then + export HPCTOOLKIT_RESERVATION="-q $HPCTOOLKIT_TUTORIAL_RESERVATION" + else + export HPCTOOLKIT_RESERVATION="-q workq" + fi + + # cleanse environment + module purge + + # load modules needed to build and run laghos + module load oneapi spack cmake + + # modules for hpctoolkit + export HPCTOOLKIT_MODULES_USE="module use /soft/perftools/hpctoolkit/modulefiles" + export HPCTOOLKIT_MODULES_HPCTOOLKIT="module load hpctoolkit/latest" + $HPCTOOLKIT_MODULES_USE + $HPCTOOLKIT_MODULES_HPCTOOLKIT + + # environment settings for this example + export HPCTOOLKIT_GPU_PLATFORM=level0 + export HPCTOOLKIT_MPI_CC=mpicc + export HPCTOOLKIT_MPI_CXX=mpicxx + export HPCTOOLKIT_LAGHOS_MODULES_BUILD="" + export HPCTOOLKIT_LAGHOS_C_COMPILER=icx + export HPCTOOLKIT_LAGHOS_CXX_COMPILER=icpx + export HPCTOOLKIT_LAGHOS_CXXFLAGS="-fsycl -std=c++17" + export HPCTOOLKIT_LAGHOS_RAJA_BUILD=1 + export HPCTOOLKIT_LAGHOS_RAJA_VER=v2022.10.5 + export HPCTOOLKIT_LAGHOS_RAJA_ROOT=$(pwd)/laghos/RAJA-${HPCTOOLKIT_LAGHOS_RAJA_VER} + export HPCTOOLKIT_LAGHOS_RAJA_FLAGS="-DRAJA_ENABLE_SYCL=ON -DENABLE_TESTS=OFF" + export HPCTOOLKIT_LAGHOS_MFEM_FLAGS="parallel MFEM_USE_RAJA=YES RAJA_LIB=$HPCTOOLKIT_LAGHOS_RAJA_ROOT/lib BASE_FLAGS='-std=c++17 -g'" + export HPCTOOLKIT_LAGHOS_SUBMIT="qsub $HPCTOOLKIT_PROJECTID -l walltime=00:15:00 -l select=1:system=sunspot -l filesystems=home -k doe $HPCTOOLKIT_RESERVATION" + export HPCTOOLKIT_LAGHOS_RUN_SHORT="$HPCTOOLKIT_LAGHOS_SUBMIT -N laghos-run-short -o log.run-short.out -e log.run-short.error" + export HPCTOOLKIT_LAGHOS_RUN_LONG="$HPCTOOLKIT_LAGHOS_SUBMIT -N laghos-run-long -o log.run-long.out -e log.run-long.error" + export HPCTOOLKIT_LAGHOS_RUN_PC="sh make-scripts/unsupported-pc.sh Intel" + export HPCTOOLKIT_LAGHOS_RUN_COUNT="$HPCTOOLKIT_LAGHOS_SUBMIT -N laghos-run-inst-count -o log.run-inst-count.out -e log.run-inst-count.error" + export HPCTOOLKIT_LAGHOS_BUILD="sh" + export HPCTOOLKIT_LAGHOS_LAUNCH="mpirun -n 12" + + # mark configuration for this example + export HPCTOOLKIT_EXAMPLE=laghos + +fi