-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathCUDATimer.h
More file actions
executable file
·122 lines (110 loc) · 4.28 KB
/
CUDATimer.h
File metadata and controls
executable file
·122 lines (110 loc) · 4.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#ifndef CUDATimer_h
#define CUDATimer_h
#include <cuda_runtime.h>
#include <string>
#include <vector>
struct TimingInfo {
cudaEvent_t startEvent;
cudaEvent_t endEvent;
float duration;
std::string eventName;
};
/** Copied wholesale from mLib, so nvcc doesn't choke. */
template<class T>
int findFirstIndex(const std::vector<T> &collection, const T &value)
{
int index = 0;
for (const auto &element : collection)
{
if (element == value)
return index;
index++;
}
return -1;
}
class CUDATimer {
public:
std::vector<TimingInfo> timingEvents;
int currentIteration;
CUDATimer() : currentIteration(0) {}
void nextIteration() {
++currentIteration;
}
void reset() {
currentIteration = 0;
timingEvents.clear();
}
void startEvent(const std::string& name) {
TimingInfo timingInfo;
cudaEventCreate(&timingInfo.startEvent);
//cudaEventCreate(&timingInfo.endEvent);
cudaEventCreateWithFlags(&timingInfo.endEvent, cudaEventBlockingSync);
cudaEventRecord(timingInfo.startEvent);
timingInfo.eventName = name;
timingEvents.push_back(timingInfo);
}
void endEvent() {
TimingInfo& timingInfo = timingEvents[timingEvents.size() - 1];
cudaEventRecord(timingInfo.endEvent, 0);
}
void evaluate(bool showSum = false, bool showMax = false) {
std::vector<std::string> aggregateTimingNames;
std::vector<float> aggregateTimes;
std::vector<int> aggregateCounts;
std::vector<float> maxTimes;
for (int i = 0; i < timingEvents.size(); ++i) {
TimingInfo& eventInfo = timingEvents[i];
cudaEventSynchronize(eventInfo.endEvent);
cudaEventElapsedTime(&eventInfo.duration, eventInfo.startEvent, eventInfo.endEvent);
int index = findFirstIndex(aggregateTimingNames, eventInfo.eventName);
if (index < 0) {
aggregateTimingNames.push_back(eventInfo.eventName);
aggregateTimes.push_back(eventInfo.duration);
aggregateCounts.push_back(1);
maxTimes.push_back(eventInfo.duration);
}
else {
aggregateTimes[index] = aggregateTimes[index] + eventInfo.duration;
aggregateCounts[index] = aggregateCounts[index] + 1;
if (maxTimes[index] < eventInfo.duration)
maxTimes[index] = eventInfo.duration;
}
//if (eventInfo.eventName == "MultiplyDescriptor") {
// printf("time %f\n", eventInfo.duration);
//}
}
printf("------------------------------------------------------------\n");
printf(" Kernel | Count | Total | Average \n");
printf("--------------------------+----------+-----------+----------\n");
for (int i = 0; i < aggregateTimingNames.size(); ++i) {
printf("--------------------------+----------+-----------+----------\n");
printf(" %-24s | %4d | %8.3fms| %7.4fms\n", aggregateTimingNames[i].c_str(), aggregateCounts[i], aggregateTimes[i], aggregateTimes[i] / aggregateCounts[i]);
}
printf("------------------------------------------------------------\n\n");
if (showMax) {
printf("------------------------------------------------------------\n");
printf(" Kernel | Count | Total | Max \n");
printf("--------------------------+----------+-----------+----------\n");
for (int i = 0; i < aggregateTimingNames.size(); ++i) {
printf("--------------------------+----------+-----------+----------\n");
printf(" %-24s | %4d | %8.3fms| %7.4fms\n", aggregateTimingNames[i].c_str(), aggregateCounts[i], aggregateTimes[i], maxTimes[i]);
}
printf("------------------------------------------------------------\n\n");
}
if (showSum) {
int sumCount = 0;
float sumAvg = 0.0f;
for (unsigned int i = 0; i < aggregateTimingNames.size(); i++) {
sumCount += aggregateCounts[i];
sumAvg += aggregateTimes[i] / aggregateCounts[i];
}
printf("\n");
printf("-----------------------------------------------------\n");
printf(" TOTAL | Count | Total/Run \n");
printf("--------------------------+----------+---------------\n");
printf(" %-24s | %4d | %8.3fms\n", "TOTAL Avg", sumCount, sumAvg);
printf("-----------------------------------------------------\n");
}
}
};
#endif