@@ -14,26 +14,46 @@ jobs:
1414 strategy :
1515 fail-fast : false
1616 matrix :
17- name : [nvidia-h100, nvidia-l40s, amd-mi300x, amd-w7900]
17+ name : [cpu, nvidia-h100, nvidia-l40s, amd-mi300x, amd-w7900]
1818 include :
19+ - name : cpu
20+ runner : cern-nextgen-h100
21+ cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=0
22+ profiler_runs : 42
23+ standalone_runs : 42
24+ cpu_gpu : " -c"
1925 - name : nvidia-h100
2026 runner : cern-nextgen-h100
2127 cmake_args : -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90
28+ profiler_runs : 21
29+ standalone_runs : 42
30+ cpu_gpu : " -g --memSize 20000000000"
2231 - name : nvidia-l40s
2332 runner : cern-nextgen-l40s
2433 cmake_args : -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89
34+ profiler_runs : 42
35+ standalone_runs : 42
36+ cpu_gpu : " -g --memSize 20000000000"
2537 - name : amd-mi300x
2638 runner : cern-nextgen-mi300x
2739 cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942
40+ profiler_runs : 42
41+ standalone_runs : 42
42+ cpu_gpu : " -g --memSize 20000000000"
2843 - name : amd-w7900
2944 runner : cern-nextgen-w7900
3045 cmake_args : -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100
46+ profiler_runs : 42
47+ standalone_runs : 42
48+ cpu_gpu : " -g --memSize 20000000000"
3149 env :
3250 WORK_DIR : /cvmfs/alice.cern.ch
3351 ALIBUILD_ARCH_PREFIX : el9-x86_64/Packages
3452 MODULEPATH : /cvmfs/alice.cern.ch/etc/toolchain/modulefiles/el9-x86_64:/cvmfs/alice.cern.ch/el9-x86_64/Modules/modulefiles
3553 STANDALONE_DIR : /root/standalone
36- BENCHMARK_CSV : /root/${{ matrix.name }}.csv
54+ BENCHMARK_CSV : standalone_${{ matrix.name }}.csv
55+ PROFILER_CSV : profiler_${{ matrix.name }}.csv
56+ TIMING_CA : ./ca -e 50kHz ${{ matrix.cpu_gpu }} --seed 0 --sync --debug 1 # Add --PROCdebugMarkdown 1 --runs 42 --runsInit 2 --PROCresetTimers 1 for benchmark runs
3757 LD_LIBRARY_PATH : /usr/local/cuda-13.0/compat
3858
3959 name : ${{ matrix.name }}
4767
4868 curl -fL --retry 3 -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
4969
50- mkdir -p ${STANDALONE_DIR}/baseline
51- curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${{ matrix.name }}.csv https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${{ matrix.name }}.csv
52-
5370 mkdir -p ${STANDALONE_DIR}/events
5471 curl -fL --retry 3 -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz
5572 tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events
@@ -68,38 +85,79 @@ jobs:
6885 env :
6986 DETERMINISTIC_MODE : GPU
7087
71- - name : Test GPU Track Reconstruction
88+ - name : Test Track Reconstruction
7289 run : |
7390 source /etc/profile.d/modules.sh
7491 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
7592 cd ${STANDALONE_DIR}
76- ${STANDALONE_DIR}/ca -e o2-simple -g --seed 0 --memSize 20000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptConstexpr 1 --RTCoptSpecialCode 1 --debug 6
77- cmp ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU .out
78- rm -rf ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU .out ${STANDALONE_DIR}/events/o2-simple ${STANDALONE_DIR}/build
93+ ${STANDALONE_DIR}/ca -e o2-simple ${{ matrix.cpu_gpu }} --seed 0 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptConstexpr 1 --RTCoptSpecialCode 1 --debug 6
94+ cmp ${STANDALONE_DIR}/* .out
95+ rm -rf ${STANDALONE_DIR}/* .out ${STANDALONE_DIR}/events/o2-simple ${STANDALONE_DIR}/build
7996
8097 - name : Build Non-Deterministic
8198 run : *build
8299 env :
83100 DETERMINISTIC_MODE : OFF
84101
85- - name : Benchmark GPU Track Reconstruction
102+ - name : Benchmark Track Reconstruction
103+ run : |
104+ source /etc/profile.d/modules.sh
105+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
106+ cd ${STANDALONE_DIR}
107+ ${TIMING_CA} --debug 1 --runs ${{ matrix.standalone_runs }} --runsInit 0 --PROCdebugMarkdown 1 --PROCresetTimers 1 --PROCdebugCSV /root/${BENCHMARK_CSV}
108+ python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_standalone.py --discard 0 --input /root/${BENCHMARK_CSV} --output /root/summary_${BENCHMARK_CSV}
109+
110+ - name : Profiler - Nsight Compute
111+ if : ${{ matrix.name == 'nvidia-h100' }}
86112 run : |
113+ dnf install -y cuda-nsight-compute-13-1
87114 source /etc/profile.d/modules.sh
88115 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
89116 cd ${STANDALONE_DIR}
90- ${STANDALONE_DIR}/ca -e 50kHz -g --memSize 15000000000 --sync --runs 12 --debug 1 --PROCtimingCSV ${BENCHMARK_CSV}
117+ ncu --set none --metrics gpu__time_duration.avg --export ${{ matrix.name }} --clock-control none --force-overwrite ${TIMING_CA} --runs ${{ matrix.profiler_runs }} --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.ncu-rep
118+ ncu --import ${STANDALONE_DIR}/${{ matrix.name }}.ncu-rep --print-units base --csv > /root/${PROFILER_CSV}
91119 rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
120+ python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_ncu.py --input /root/${PROFILER_CSV} --output /root/summary_${PROFILER_CSV}
92121
93- - name : Display table on GitHub web
122+ - name : Profiler - Nsight Systems
123+ if : ${{ matrix.name == 'nvidia-l40s' }}
94124 run : |
125+ dnf config-manager --add-repo "https://developer.download.nvidia.com/devtools/repos/rhel$(source /etc/os-release; echo ${VERSION_ID%%.*})/$(rpm --eval '%{_arch}' | sed s/aarch/arm/)/"
126+ dnf install --nogpgcheck -y nsight-systems-cli-2026.2.1
95127 source /etc/profile.d/modules.sh
96128 module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
97- python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input ${BENCHMARK_CSV} --output ${BENCHMARK_CSV}
98- python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${{ matrix.name }}.csv --current ${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}
99- rm -rf ${STANDALONE_DIR}/baseline
129+ cd ${STANDALONE_DIR}
130+ nsys profile -o ${{ matrix.name }} ${TIMING_CA} --runs ${{ matrix.profiler_runs }} --debug 1 --PROCdebugMarkdown 1 # Generates ${{ matrix.name }}.nsys-rep
131+ nsys stats --report cuda_gpu_kern_sum --timeunit usec --force-export=true --format csv ${{ matrix.name }}.nsys-rep > /root/${PROFILER_CSV}
132+ rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
133+ python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_nsys.py --input /root/${PROFILER_CSV} --output /root/summary_${PROFILER_CSV}
134+
135+ - name : Profiler - rocprofv2
136+ if : ${{ matrix.name == 'amd-mi300x' || matrix.name == 'amd-w7900' }}
137+ run : |
138+ source /etc/profile.d/modules.sh
139+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
140+ cd ${STANDALONE_DIR}
141+ rocprofv2 --output-directory /root --output-file-name ${{ matrix.name }} ${TIMING_CA} --runs ${{ matrix.standalone_runs }} --debug 1 --PROCdebugMarkdown 1 # Generates results_${{ matrix.name }}.csv
142+ rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
143+ mv /root/results_${{ matrix.name }}.csv /root/${PROFILER_CSV}
144+ python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_rocprofv2.py --input /root/${PROFILER_CSV} --output /root/summary_${PROFILER_CSV}
100145
101146 - name : Upload Artifact
102147 uses : actions/upload-artifact@v6
103148 with :
104149 name : ${{ matrix.name }}-artifact
105- path : /root/${{ matrix.name }}.csv
150+ path : " /root/*.csv"
151+
152+ - name : Display table on GitHub web
153+ run : |
154+ source /etc/profile.d/modules.sh
155+ module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
156+ mkdir -p ${STANDALONE_DIR}/baseline
157+ curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/summary_${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/summary_${PROFILER_CSV}
158+ curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/summary_${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/summary_${BENCHMARK_CSV}
159+ python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --runs ${{ matrix.profiler_runs }} --baseline ${STANDALONE_DIR}/baseline/summary_${PROFILER_CSV} --current /root/summary_${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY}
160+ echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY}
161+ python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --runs ${{ matrix.standalone_runs }} --baseline ${STANDALONE_DIR}/baseline/summary_${BENCHMARK_CSV} --current /root/summary_${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY}
162+ rm -rf ${STANDALONE_DIR}/baseline
163+ if : ${{ matrix.name != 'cpu' }}
0 commit comments