Skip to content

Commit 905ce7f

Browse files
committed
Removed diagnostic debug hooks and finalized AMD PCIe offset timer
1 parent eb1bf7f commit 905ce7f

3 files changed

Lines changed: 3 additions & 94 deletions

File tree

src/extract_gpuinfo.c

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,6 @@ static LIST_HEAD(gpu_vendors);
5454
void register_gpu_vendor(struct gpu_vendor *vendor) { list_add(&vendor->list, &gpu_vendors); }
5555

5656
bool gpuinfo_init_info_extraction(unsigned *monitored_dev_count, struct list_head *devices) {
57-
struct timespec t_start, t_end;
58-
clock_gettime(CLOCK_MONOTONIC, &t_start);
59-
6057
struct gpu_vendor *vendor;
6158

6259
*monitored_dev_count = 0;
@@ -74,11 +71,6 @@ bool gpuinfo_init_info_extraction(unsigned *monitored_dev_count, struct list_hea
7471
*monitored_dev_count += vendor_devices_count;
7572
}
7673

77-
clock_gettime(CLOCK_MONOTONIC, &t_end);
78-
double elapsed = (t_end.tv_sec - t_start.tv_sec) * 1000.0 + (t_end.tv_nsec - t_start.tv_nsec) / 1000000.0;
79-
if (elapsed > 0.0)
80-
fprintf(stderr, "[DEBUG] gpuinfo_init_info_extraction took %.2f ms\n", elapsed);
81-
8274
return true;
8375
}
8476

@@ -97,18 +89,9 @@ bool gpuinfo_shutdown_info_extraction(struct list_head *devices) {
9789
}
9890

9991
bool gpuinfo_populate_static_infos(struct list_head *devices) {
100-
struct timespec t_start, t_end;
101-
clock_gettime(CLOCK_MONOTONIC, &t_start);
102-
10392
struct gpu_info *device;
10493

10594
list_for_each_entry(device, devices, list) { device->vendor->populate_static_info(device); }
106-
107-
clock_gettime(CLOCK_MONOTONIC, &t_end);
108-
double elapsed = (t_end.tv_sec - t_start.tv_sec) * 1000.0 + (t_end.tv_nsec - t_start.tv_nsec) / 1000000.0;
109-
if (elapsed > 0.0)
110-
fprintf(stderr, "[DEBUG] gpuinfo_populate_static_infos took %.2f ms\n", elapsed);
111-
11295
return true;
11396
}
11497

@@ -126,21 +109,12 @@ static void calculate_effective_load(struct gpuinfo_dynamic_info *dynamic_info)
126109
}
127110

128111
bool gpuinfo_refresh_dynamic_info(struct list_head *devices) {
129-
struct timespec t_start, t_end;
130-
clock_gettime(CLOCK_MONOTONIC, &t_start);
131-
132112
struct gpu_info *device;
133113

134114
list_for_each_entry(device, devices, list) {
135115
device->vendor->refresh_dynamic_info(device);
136116
calculate_effective_load(&device->dynamic_info);
137117
}
138-
139-
clock_gettime(CLOCK_MONOTONIC, &t_end);
140-
double elapsed = (t_end.tv_sec - t_start.tv_sec) * 1000.0 + (t_end.tv_nsec - t_start.tv_nsec) / 1000000.0;
141-
if (elapsed > 0.0)
142-
fprintf(stderr, "[DEBUG] gpuinfo_refresh_dynamic_info took %.2f ms\n", elapsed);
143-
144118
return true;
145119
}
146120

@@ -250,9 +224,6 @@ bool gpuinfo_fix_dynamic_info_from_process_info(struct list_head *devices) {
250224
#undef MYMIN
251225

252226
static void gpuinfo_populate_process_info(struct gpu_info *device) {
253-
struct timespec t_start, t_end;
254-
clock_gettime(CLOCK_MONOTONIC, &t_start);
255-
256227
for (unsigned j = 0; j < device->processes_count; ++j) {
257228
pid_t current_pid = device->processes[j].pid;
258229
struct process_info_cache *cached_pid_info;
@@ -315,11 +286,6 @@ static void gpuinfo_populate_process_info(struct gpu_info *device) {
315286
}
316287
}
317288
}
318-
319-
clock_gettime(CLOCK_MONOTONIC, &t_end);
320-
double elapsed = (t_end.tv_sec - t_start.tv_sec) * 1000.0 + (t_end.tv_nsec - t_start.tv_nsec) / 1000000.0;
321-
if (elapsed > 0.0)
322-
fprintf(stderr, "[DEBUG] gpuinfo_populate_process_info took %.2f ms\n", elapsed);
323289
}
324290

325291
static void gpuinfo_clean_old_cache(void) {
@@ -335,9 +301,6 @@ static void gpuinfo_clean_old_cache(void) {
335301
}
336302

337303
bool gpuinfo_refresh_processes(struct list_head *devices) {
338-
struct timespec t_proc_start, t_proc_end;
339-
clock_gettime(CLOCK_MONOTONIC, &t_proc_start);
340-
341304
struct gpu_info *device;
342305

343306
list_for_each_entry(device, devices, list) { device->processes_count = 0; }
@@ -351,12 +314,6 @@ bool gpuinfo_refresh_processes(struct list_head *devices) {
351314
}
352315
gpuinfo_clean_old_cache();
353316

354-
clock_gettime(CLOCK_MONOTONIC, &t_proc_end);
355-
double elapsed_proc =
356-
(t_proc_end.tv_sec - t_proc_start.tv_sec) * 1000.0 + (t_proc_end.tv_nsec - t_proc_start.tv_nsec) / 1000000.0;
357-
if (elapsed_proc > 0.0)
358-
fprintf(stderr, "[DEBUG] gpuinfo_refresh_processes took %.2f ms\n", elapsed_proc);
359-
360317
return true;
361318
}
362319

src/extract_gpuinfo_amdgpu.c

Lines changed: 3 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -652,9 +652,6 @@ static void gpuinfo_amdgpu_populate_static_info(struct gpu_info *_gpu_info) {
652652
}
653653

654654
static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) {
655-
struct timespec t_query_start, t_query_end;
656-
clock_gettime(CLOCK_MONOTONIC, &t_query_start);
657-
658655
struct gpu_info_amdgpu *gpu_info = container_of(_gpu_info, struct gpu_info_amdgpu, base);
659656
struct gpuinfo_dynamic_info *dynamic_info = &gpu_info->base.dynamic_info;
660657
bool info_query_success = false;
@@ -666,12 +663,6 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) {
666663
if (libdrm_amdgpu_handle && _amdgpu_query_gpu_info)
667664
info_query_success = !_amdgpu_query_gpu_info(gpu_info->amdgpu_device, &info);
668665

669-
clock_gettime(CLOCK_MONOTONIC, &t_query_end);
670-
double elapsed_q =
671-
(t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 + (t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0;
672-
if (elapsed_q > 5.0)
673-
fprintf(stderr, "[DEBUG] AMD _amdgpu_query_gpu_info took %.2f ms\n", elapsed_q);
674-
675666
// GPU current speed
676667
if (libdrm_amdgpu_handle && _amdgpu_query_sensor_info)
677668
last_libdrm_return_status =
@@ -714,15 +705,9 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) {
714705

715706
// Memory usage
716707
struct drm_amdgpu_memory_info memory_info;
717-
clock_gettime(CLOCK_MONOTONIC, &t_query_start);
718708
if (libdrm_amdgpu_handle && _amdgpu_query_info)
719709
last_libdrm_return_status =
720710
_amdgpu_query_info(gpu_info->amdgpu_device, AMDGPU_INFO_MEMORY, sizeof(memory_info), &memory_info);
721-
clock_gettime(CLOCK_MONOTONIC, &t_query_end);
722-
elapsed_q =
723-
(t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 + (t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0;
724-
if (elapsed_q > 5.0)
725-
fprintf(stderr, "[DEBUG] AMD _amdgpu_query_info(AMDGPU_INFO_MEMORY) took %.2f ms\n", elapsed_q);
726711
else
727712
last_libdrm_return_status = 1;
728713
if (!last_libdrm_return_status) {
@@ -754,13 +739,7 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) {
754739

755740
// Fan speed
756741
unsigned currentFanSpeed;
757-
clock_gettime(CLOCK_MONOTONIC, &t_query_start);
758742
int patternsMatched = rewindAndReadPattern(gpu_info->fanSpeedFILE, "%u", &currentFanSpeed);
759-
clock_gettime(CLOCK_MONOTONIC, &t_query_end);
760-
elapsed_q =
761-
(t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 + (t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0;
762-
if (elapsed_q > 5.0)
763-
fprintf(stderr, "[DEBUG] AMD rewindAndReadPattern(fanSpeedFILE) took %.2f ms\n", elapsed_q);
764743
if (patternsMatched == 1) {
765744
SET_GPUINFO_DYNAMIC(dynamic_info, fan_speed, currentFanSpeed * 100 / gpu_info->maxFanValue);
766745
}
@@ -789,19 +768,13 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) {
789768
// to function without hanging for 1,000ms+ on launch, we skip reading this file
790769
// purely on the very first polling cycle.
791770
if (gpu_info->PCIeBW && GPUINFO_DYNAMIC_FIELD_VALID(dynamic_info, used_memory)) {
792-
// We confirm this is not the first cycle by checking if used_memory and
793-
// memory thresholds have successfully been evaluated in prior polling cycles
794-
// before we allow the PCIe bandwidth kernel lock.
771+
// According to https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/pm/amdgpu_pm.c, under the pcie_bw
772+
// section, we should be able to read the number of packets received and sent by the GPU and get the maximum payload
773+
// size during the last second. This is untested but should work when the file is populated by the driver.
795774
uint64_t received, transmitted;
796775
int maxPayloadSize;
797-
clock_gettime(CLOCK_MONOTONIC, &t_query_start);
798776
int NreadPatterns =
799777
rewindAndReadPattern(gpu_info->PCIeBW, "%" SCNu64 " %" SCNu64 " %i", &received, &transmitted, &maxPayloadSize);
800-
clock_gettime(CLOCK_MONOTONIC, &t_query_end);
801-
elapsed_q = (t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 +
802-
(t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0;
803-
if (elapsed_q > 5.0)
804-
fprintf(stderr, "[DEBUG] AMD rewindAndReadPattern(PCIeBW) took %.2f ms\n", elapsed_q);
805778
if (NreadPatterns == 3) {
806779
received *= maxPayloadSize;
807780
transmitted *= maxPayloadSize;
@@ -816,13 +789,7 @@ static void gpuinfo_amdgpu_refresh_dynamic_info(struct gpu_info *_gpu_info) {
816789
if (gpu_info->powerCap) {
817790
// The power cap in microwatts
818791
unsigned powerCap;
819-
clock_gettime(CLOCK_MONOTONIC, &t_query_start);
820792
int NreadPatterns = rewindAndReadPattern(gpu_info->powerCap, "%u", &powerCap);
821-
clock_gettime(CLOCK_MONOTONIC, &t_query_end);
822-
elapsed_q = (t_query_end.tv_sec - t_query_start.tv_sec) * 1000.0 +
823-
(t_query_end.tv_nsec - t_query_start.tv_nsec) / 1000000.0;
824-
if (elapsed_q > 5.0)
825-
fprintf(stderr, "[DEBUG] AMD rewindAndReadPattern(powerCap) took %.2f ms\n", elapsed_q);
826793
if (NreadPatterns == 1) {
827794
SET_GPUINFO_DYNAMIC(dynamic_info, power_draw_max, powerCap / 1000);
828795
}

src/nvtop.c

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,6 @@ int main(int argc, char **argv) {
234234
return EXIT_SUCCESS;
235235
}
236236

237-
struct timespec t_prewarm_start, t_prewarm_end;
238-
clock_gettime(CLOCK_MONOTONIC, &t_prewarm_start);
239-
240237
// Pre-warm the cycle-based metrics by taking an initial reading here.
241238
// This allows the ensuing setup time (e.g. sysfs parsing, curses init) to
242239
// count towards the 100ms time delta needed to calculate load percentages
@@ -245,12 +242,6 @@ int main(int argc, char **argv) {
245242
gpuinfo_refresh_processes(&monitoredGpus);
246243
gpuinfo_utilisation_rate(&monitoredGpus);
247244

248-
clock_gettime(CLOCK_MONOTONIC, &t_prewarm_end);
249-
double elapsed_prewarm = (t_prewarm_end.tv_sec - t_prewarm_start.tv_sec) * 1000.0 +
250-
(t_prewarm_end.tv_nsec - t_prewarm_start.tv_nsec) / 1000000.0;
251-
if (elapsed_prewarm > 0.0)
252-
fprintf(stderr, "[DEBUG] nvtop.c prewarming block took %.2f ms\n", elapsed_prewarm);
253-
254245
nvtop_time time_startup_refresh;
255246
nvtop_get_current_time(&time_startup_refresh);
256247

@@ -269,19 +260,13 @@ int main(int argc, char **argv) {
269260

270261
if (startup_elapsed_ms < update_interval_option) {
271262
double remaining_ms = update_interval_option - startup_elapsed_ms;
272-
fprintf(stderr, "[DEBUG] SLEEPING FOR: %f remaining ms\n", remaining_ms);
273263
#if _POSIX_C_SOURCE >= 199309L
274-
fprintf(stderr, "[DEBUG] USING nanosleep\n");
275264
struct timespec tv = {.tv_sec = (long)(remaining_ms / 1000.0),
276265
.tv_nsec = (long)(fmod(remaining_ms, 1000.0) * 1000000.0)};
277266
nanosleep(&tv, &tv);
278267
#else
279-
fprintf(stderr, "[DEBUG] USING usleep\n");
280268
usleep((useconds_t)(remaining_ms * 1000.0));
281269
#endif
282-
} else {
283-
fprintf(stderr, "[DEBUG] SKIPPING SLEEP: startup_elapsed_ms (%f) >= update_interval_option (%d)\n",
284-
startup_elapsed_ms, update_interval_option);
285270
}
286271
first_snapshot = false;
287272
} else {

0 commit comments

Comments
 (0)