@@ -20,33 +20,27 @@ jobs:
2020 vendor : nvidia
2121 runner : cern-nextgen-h100
2222 cmake_args : -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90
23- profiler : nsys profile -o nvidia-h100
24- profiler_post : nsys stats --report cuda_gpu_kern_sum --timeunit us --force-export=true --format csv nvidia-h100.nsys-rep >
2523 - name : nvidia-l40s
2624 vendor : nvidia
2725 runner : cern-nextgen-l40s
2826 cmake_args : -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89
29- profiler : nsys profile -o nvidia-l40s
30- profiler_post : nsys stats --report cuda_gpu_kern_sum --force-export=true --format csv nvidia-l40s.nsys-rep >
3127 - name : amd-mi300x
3228 vendor : amd
3329 runner : cern-nextgen-mi300x
3430 cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942
35- profiler : rocprofv2 --basenames --output-directory /root --output-file-name amd-mi300x
36- profiler_post : touch
3731 - name : amd-w7900
3832 vendor : amd
3933 runner : cern-nextgen-w7900
4034 cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100
41- profiler : rocprofv2 --basenames --output-directory /root --output-file-name amd-w7900
42- profiler_post : touch
35+
4336 env :
4437 WORK_DIR : /cvmfs/alice.cern.ch
4538 ALIBUILD_ARCH_PREFIX : el9-x86_64/Packages
4639 MODULEPATH : /cvmfs/alice.cern.ch/etc/toolchain/modulefiles/el9-x86_64:/cvmfs/alice.cern.ch/el9-x86_64/Modules/modulefiles
4740 STANDALONE_DIR : /root/standalone
4841 BENCHMARK_CSV : ${{ matrix.name }}.csv
4942 PROFILER_CSV : results_${{ matrix.name }}.csv
43+ TIMING_CA : ./ca -e 50kHz -g --seed 0 --memSize 15000000000 --sync --debug 1 # Add --PROCdebugMarkdown 1 --runs 42 --runsInit 2 --PROCresetTimers 1 for benchmark runs
5044 LD_LIBRARY_PATH : /usr/local/cuda-13.0/compat
5145
5246 name : ${{ matrix.name }}
5751 - name : Download Files
5852 run : |
5953 mkdir -p ${STANDALONE_DIR}
60-
61- if [[ "${{ matrix.vendor }}" == "nvidia" ]]; then
62- curl -fL --retry 3 -o ${STANDALONE_DIR}/nsys.rpm https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2026_2/NsightSystems-linux-cli-public-2026.2.1.210-3763964.rpm
63- dnf install -y ${STANDALONE_DIR}/nsys.rpm
64- rm -f ${STANDALONE_DIR}/nsys.rpm
65- fi
6654
6755 curl -fL --retry 3 -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
6856
69- mkdir -p ${STANDALONE_DIR}/baseline
70- curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${PROFILER_CSV}
71- curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${BENCHMARK_CSV}
72-
7357 mkdir -p ${STANDALONE_DIR}/events
7458 curl -fL --retry 3 -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
7559 tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events
@@ -107,23 +91,45 @@ jobs:
10791 source /etc/profile.d/modules.sh
10892 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
10993 cd ${STANDALONE_DIR}
110- ${{ matrix.profiler }} ${STANDALONE_DIR}/ca -e 50kHz -g --memSize 15000000000 --sync --debug 1 --runs 42 --runsInit 2 --PROCdebugMarkdown 1 --PROCresetTimers 1 --PROCdebugCSV /root/${BENCHMARK_CSV}
111- ${{ matrix.profiler_post }} /root/${PROFILER_CSV}
94+ ${TIMING_CA} --debug 1 --runs 42 --runsInit 2 --PROCdebugMarkdown 1 --PROCresetTimers 1 --PROCdebugCSV /root/${BENCHMARK_CSV}
95+ python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input /root/${BENCHMARK_CSV} --output /root/${BENCHMARK_CSV}
96+
97+ - name : Profiler - Nsight Compute
98+ if : ${{ matrix.vendor == 'nvidia' }}
99+ run : |
100+ dnf install -y cuda-nsight-compute-13-1
101+ source /etc/profile.d/modules.sh
102+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
103+ cd ${STANDALONE_DIR}
104+ ncu --set none --metrics gpu__time_duration.avg --export ${{ matrix.name }} --clock-control none --force-overwrite ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.ncu-rep
105+ ncu --import ${STANDALONE_DIR}/${{ matrix.name }}.ncu-rep --print-units base --csv > /root/${PROFILER_CSV}
112106 rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
107+ python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_${{ matrix.vendor }}.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
113108
114- - name : Display table on GitHub web
109+ - name : Profiler - rocprofv2
110+ if : ${{ matrix.vendor == 'amd' }}
115111 run : |
116112 source /etc/profile.d/modules.sh
117113 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
114+ cd ${STANDALONE_DIR}
115+ rocprofv2 --output-directory /root --output-file-name ${{ matrix.name }} ${TIMING_CA} --runs 42 --debug 1 --PROCdebugMarkdown 1 # Generates results_${{ matrix.name }}.csv == ${PROFILER_CSV}
116+ rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
118117 python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_${{ matrix.vendor }}.py --runs 42 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
119- python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input /root/${BENCHMARK_CSV} --output /root/${BENCHMARK_CSV}
120- python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${PROFILER_CSV} --current /root/${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY}
121- echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY}
122- python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} --current /root/${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}
123- rm -rf ${STANDALONE_DIR}/baseline
124118
125119 - name : Upload Artifact
126120 uses : actions/upload-artifact@v6
127121 with :
128122 name : ${{ matrix.name }}-artifact
129123 path : " /root/*.csv"
124+
125+ - name : Display table on GitHub web
126+ run : |
127+ mkdir -p ${STANDALONE_DIR}/baseline
128+ curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${PROFILER_CSV}
129+ curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${BENCHMARK_CSV}
130+ #source /etc/profile.d/modules.sh
131+ #module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
132+ python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${PROFILER_CSV} --current /root/${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY}
133+ echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY}
134+ python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} --current /root/${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}
135+ rm -rf ${STANDALONE_DIR}/baseline
0 commit comments