Skip to content

Commit b66d684

Browse files
Add nsys profiler
1 parent e4bf935 commit b66d684

File tree

3 files changed

+56
-3
lines changed

3 files changed

+56
-3
lines changed

.github/scripts/profiler_nvidia.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import argparse
2+
import csv
3+
import statistics
4+
5+
parser = argparse.ArgumentParser()
6+
parser.add_argument('-r', '--runs', type=int, required=True, help='Number of runs')
7+
parser.add_argument('-i', '--input', required=True, help='Input CSV file')
8+
parser.add_argument('-o', '--output', required=True, help='Output CSV file')
9+
args = parser.parse_args()
10+
11+
ntsi_list = []
12+
with open(args.input) as csv_file:
13+
csv_reader = csv.reader(csv_file)
14+
next(csv_reader)
15+
next(csv_reader)
16+
next(csv_reader)
17+
for row in csv_reader:
18+
if row:
19+
full_name = row[8]
20+
instances = int(row[2])
21+
time = int(row[1]) / 1000.0
22+
sigma = float(row[7]) / 1000.0
23+
if len(full_name) > 5 and full_name[:5] == "krnl_":
24+
name = full_name[5:]
25+
ntsi_list.append([name, time, sigma, instances])
26+
27+
data = [["name", "time", "stdev"]]
28+
for name, time, sigma, instances in ntsi_list:
29+
count = instances / args.runs
30+
mean = time * count
31+
stdev = sigma * count
32+
data.append([name, mean, stdev])
33+
34+
with open(args.output, 'w') as csv_file:
35+
csv_writer = csv.writer(csv_file)
36+
csv_writer.writerows(data)
37+

.github/workflows/standalone-benchmark.yml

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,30 @@ jobs:
1717
name: [nvidia-h100, nvidia-l40s, amd-mi300x, amd-w7900]
1818
include:
1919
- name: nvidia-h100
20+
vendor: nvidia
2021
runner: cern-nextgen-h100
2122
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=90
22-
profiler: ""
23+
vendor: nvidia
24+
profiler: nsys profile -o nvidia-h100
25+
profiler_post: nsys stats --report cuda_gpu_kern_sum --force-export=true --format csv nvidia-h100.nsys-rep >
2326
- name: nvidia-l40s
27+
vendor: nvidia
2428
runner: cern-nextgen-l40s
2529
cmake_args: -DENABLE_CUDA=1 -DENABLE_HIP=0 -DCUDA_COMPUTETARGET=89
26-
profiler: ""
30+
profiler: nsys profile -o nvidia-l40s
31+
profiler_post: nsys stats --report cuda_gpu_kern_sum --force-export=true --format csv nvidia-l40s.nsys-rep >
2732
- name: amd-mi300x
33+
vendor: amd
2834
runner: cern-nextgen-mi300x
2935
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx942
3036
profiler: rocprofv2 --basenames --output-directory /root --output-file-name amd-mi300x
37+
profiler_post: touch
3138
- name: amd-w7900
39+
vendor: amd
3240
runner: cern-nextgen-w7900
3341
cmake_args: -DENABLE_CUDA=0 -DENABLE_HIP=1 -DHIP_AMDGPUTARGET=gfx1100
3442
profiler: rocprofv2 --basenames --output-directory /root --output-file-name amd-w7900
43+
profiler_post: touch
3544
env:
3645
WORK_DIR: /cvmfs/alice.cern.ch
3746
ALIBUILD_ARCH_PREFIX: el9-x86_64/Packages
@@ -50,6 +59,10 @@ jobs:
5059
run: |
5160
mkdir -p ${STANDALONE_DIR}
5261
62+
curl -fL --retry 3 -o ${STANDALONE_DIR}/nsys.rpm https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2026_2/NsightSystems-linux-cli-public-2026.2.1.210-3763964.rpm
63+
dnf install -y nsys.rpm
64+
rm -f ${STANDALONE_DIR}/nsys.rpm
65+
5366
curl -fL --retry 3 -o ${STANDALONE_DIR}/o2-simple-GPU.out https://cernbox.cern.ch/remote.php/dav/public-files/SfYXgQOHFga2w75/o2-simple-GPU.out
5467
5568
mkdir -p ${STANDALONE_DIR}/baseline
@@ -90,17 +103,20 @@ jobs:
90103

91104
- name: Benchmark GPU Track Reconstruction
92105
run: |
106+
dnf install -y
93107
source /etc/profile.d/modules.sh
94108
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
95109
cd ${STANDALONE_DIR}
96110
${{ matrix.profiler }} ${STANDALONE_DIR}/ca -e 50kHz -g --memSize 15000000000 --sync --debug 1 --runs 12 --runsInit 2 --PROCresetTimers 1 --PROCtimingCSV /root/${BENCHMARK_CSV}
111+
${{ matrix.profiler_post }} /root/${PROFILER_CSV}
97112
rm -rf ${STANDALONE_DIR}/events/50kHz ${STANDALONE_DIR}/build
98113
99114
- name: Display table on GitHub web
100115
run: |
101116
source /etc/profile.d/modules.sh
102117
module load ninja/fortran-v1.11.1.g9-15 Vc/1.4.5-10 boost/v1.83.0-alice2-57 fmt/11.1.2-14 CMake/v3.31.6-10 ms_gsl/4.2.1-3 Clang/v20.1.7-9 TBB/v2022.3.0-3 ROOT/v6-36-04-alice9-15 ONNXRuntime/v1.22.0-71 GLFW/3.3.2-25
103-
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_rocm.py --runs 12 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
118+
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_${{ matrix.vendor }}.py --runs 12 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
119+
python3 ${GITHUB_WORKSPACE}/.github/scripts/profiler_${{ matrix.vendor }}.py --runs 12 --input /root/${PROFILER_CSV} --output /root/${PROFILER_CSV}
104120
python3 ${GITHUB_WORKSPACE}/.github/scripts/merge_runs.py --discard 2 --input /root/${BENCHMARK_CSV} --output /root/${BENCHMARK_CSV}
105121
python3 ${GITHUB_WORKSPACE}/.github/scripts/csv_to_md.py --baseline ${STANDALONE_DIR}/baseline/${PROFILER_CSV} --current /root/${PROFILER_CSV} >> ${GITHUB_STEP_SUMMARY}
106122
echo -e "\n\n" >> ${GITHUB_STEP_SUMMARY}

0 commit comments

Comments
 (0)