sort nvidia csv #204
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Standalone Benchmark | |
| on: | |
| workflow_dispatch: | |
| pull_request: | |
| push: | |
| branches: | |
| - '**' | |
| jobs: | |
| benchmark: | |
| runs-on: ${{ matrix.runner }} | |
| container: registry.cern.ch/alisw/slc9-gpu-builder@sha256:ea3443f9dfbc770e4b4bce0d1a9ecc0b7a7c16e9f76e416b796d170877220820 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| name: [nvidia-h100, nvidia-l40s, amd-mi300x, amd-w7900] | |
| include: | |
| - name: nvidia-h100 | |
| vendor: nvidia | |
| runner: cern-nextgen-h100 | |
| cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90 | |
| profiler: nsys profile -o nvidia-h100 | |
| profiler_post: nsys stats --report cuda_gpu_kern_sum --force-export=true --format csv nvidia-h100.nsys-rep > | |
| - name: nvidia-l40s | |
| vendor: nvidia | |
| runner: cern-nextgen-l40s | |
| cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89 | |
| profiler: nsys profile -o nvidia-l40s | |
| profiler_post: nsys stats --report cuda_gpu_kern_sum --force-export=true --format csv nvidia-l40s.nsys-rep > | |
| - name: amd-mi300x | |
| vendor: amd | |
| runner: cern-nextgen-mi300x | |
| cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942 | |
| profiler: rocprofv2 --basenames --output-directory /root --output-file-name amd-mi300x | |
| profiler_post: touch | |
| - name: amd-w7900 | |
| vendor: amd | |
| runner: cern-nextgen-w7900 | |
| cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100 | |
| profiler: rocprofv2 --basenames --output-directory /root --output-file-name amd-w7900 | |
| profiler_post: touch | |
| env: | |
| WORK_DIR: /cvmfs/alice.cern.ch | |
| ALIBUILD_ARCH_PREFIX: el9-x86_64/Packages | |
| MODULEPATH: /cvmfs/alice.cern.ch/etc/toolchain/modulefiles/el9-x86_64:/cvmfs/alice.cern.ch/el9-x86_64/Modules/modulefiles | |
| STANDALONE_DIR: /root/standalone | |
| BENCHMARK_CSV: ${{ matrix.name }}.csv | |
| PROFILER_CSV: results_${{ matrix.name }}.csv | |
| LD_LIBRARY_PATH: /usr/local/cuda-13.0/compat | |
| name: ${{ matrix.name }} | |
| steps: | |
| - name: Checkout Repository | |
| uses: actions/checkout@v6 | |
| - name: Download Files | |
| run: | | |
| mkdir -p ${STANDALONE_DIR} | |
| if [[ "${{ matrix.vendor }}" == "nvidia" ]]; then | |
| curl -fL --retry 3 -o ${STANDALONE_DIR}/nsys.rpm https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2026_2/NsightSystems-linux-cli-public-2026.2.1.210-3763964.rpm | |
| dnf install -y ${STANDALONE_DIR}/nsys.rpm | |
| rm -f ${STANDALONE_DIR}/nsys.rpm | |
| fi | |
| curl -fL --retry 3 -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out | |
| mkdir -p ${STANDALONE_DIR}/baseline | |
| #curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${PROFILER_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${PROFILER_CSV} | |
| curl -fL --retry 3 -o ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/baseline/${BENCHMARK_CSV} | |
| mkdir -p ${STANDALONE_DIR}/events | |
| curl -fL --retry 3 -o ${STANDALONE_DIR}/events/o2-simple.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/o2-simple.tar.xz | |
| tar -xf ${STANDALONE_DIR}/events/o2-simple.tar.xz -C ${STANDALONE_DIR}/events | |
| curl -fL --retry 3 -o ${STANDALONE_DIR}/events/50kHz.tar.xz https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/events/50kHz.tar.xz | |
| tar -xf ${STANDALONE_DIR}/events/50kHz.tar.xz -C ${STANDALONE_DIR}/events | |
| - name: Build Deterministic | |
| run: &build | | |
| source /etc/profile.d/modules.sh | |
| module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25 | |
| mkdir -p ${STANDALONE_DIR} | |
| cmake -B ${STANDALONE_DIR}/build ${{ matrix.cmake_args }} -DENABLE_OPENCL=0 -DGPUCA_BUILD_EVENT_DISPLAY=0 -DGPUCA_DETERMINISTIC_MODE=${DETERMINISTIC_MODE} -DCMAKE_INSTALL_PREFIX=${STANDALONE_DIR} ${GITHUB_WORKSPACE}/GPU/GPUTracking/Standalone/ | |
| cmake --build ${STANDALONE_DIR}/build --target install -j 8 | |
| env: | |
| DETERMINISTIC_MODE: GPU | |
| - name: Test GPU Track Reconstruction | |
| run: | | |
| source /etc/profile.d/modules.sh | |
| module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25 | |
| cd ${STANDALONE_DIR} | |
| ${STANDALONE_DIR}/ca -e o2-simple -g --seed 0 --memSize 20000000000 --sync --runs 1 --RTCenable --PROCdeterministicGPUReconstruction 1 --RTCoptConstexpr 1 --RTCoptSpecialCode 1 --debug 6 | |
| cmp ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out | |
| rm -rf ${STANDALONE_DIR}/GPU.out ${STANDALONE_DIR}/o2-simple-GPU.out ${STANDALONE_DIR}/events/o2-simple ${STANDALONE_DIR}/build | |
| - name: Build Non-Deterministic | |
| run: *build | |
| env: | |
| DETERMINISTIC_MODE: OFF | |
| - name: Benchmark GPU Track Reconstruction | |
| run: | | |
| source /etc/profile.d/modules.sh | |
| module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25 | |
| cd ${STANDALONE_DIR} | |
| ${{ matrix.profiler }} ${STANDALONE_DIR}/ca -e 50kHz -g --memSize 15000000000 --sync --debug 1 --runs 12 --runsInit 2 --PROCresetTimers 1 --PROCtimingCSV /root/${BENCHMARK_CSV} | |
| ${{ matrix.profiler_post }} /root/${PROFILER_CSV} | |
| rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build | |
| - name: Display table on GitHub web | |
| run: | | |
| source /etc/profile.d/modules.sh | |
| module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25 | |
| python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_${{ matrix.vendor }}.py --runs 12 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV} | |
| python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input /root/${BENCHMARK_CSV} --output /root/${BENCHMARK_CSV} | |
| #python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${PROFILER_CSV} --current /root/${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY} | |
| #echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY} | |
| python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${BENCHMARK_CSV} --current /root/${BENCHMARK_CSV} >> ${GITHUB_STEP_SUMMARY} | |
| rm -rf ${STANDALONE_DIR}/baseline | |
| - name: Upload Artifact | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| name: ${{ matrix.name }}-artifact | |
| path: "/root/*.csv" |