Skip to content

Commit 881895b

Browse files
Stop querying gpu frequency during each profiling data calculation
change type of profiling timer resolution in device info to double Change-Id: I41a67ecf61cd3bdc5a997b1f083b9998063f4f7f
1 parent a3b782f commit 881895b

File tree

10 files changed

+47
-16
lines changed

10 files changed

+47
-16
lines changed

runtime/compiler_interface/compiler_interface.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ CIF::RAII::UPtr_t<IGC::IgcOclTranslationCtxTagOCL> CompilerInterface::createIgcT
418418
return nullptr;
419419
}
420420

421-
newDeviceCtx->SetProfilingTimerResolution(static_cast<float>(device.getDeviceInfo().profilingTimerResolution));
421+
newDeviceCtx->SetProfilingTimerResolution(static_cast<float>(device.getDeviceInfo().outProfilingTimerResolution));
422422
auto igcPlatform = newDeviceCtx->GetPlatformHandle();
423423
auto igcGtSystemInfo = newDeviceCtx->GetGTSystemInfoHandle();
424424
auto igcFeWa = newDeviceCtx->GetIgcFeaturesAndWorkaroundsHandle();

runtime/device/device_caps.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,8 @@ void Device::initializeCaps() {
208208
deviceInfo.globalMemSize = alignDown(deviceInfo.globalMemSize, MemoryConstants::pageSize);
209209

210210
deviceInfo.globalMemCacheType = CL_READ_WRITE_CACHE;
211-
deviceInfo.profilingTimerResolution = static_cast<size_t>(getProfilingTimerResolution());
211+
deviceInfo.profilingTimerResolution = getProfilingTimerResolution();
212+
deviceInfo.outProfilingTimerResolution = static_cast<size_t>(deviceInfo.profilingTimerResolution);
212213
deviceInfo.memBaseAddressAlign = 1024;
213214
deviceInfo.minDataTypeAlignSize = 128;
214215

runtime/device/device_info.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ struct DeviceInfo {
9292
cl_device_local_mem_type localMemType;
9393
cl_ulong localMemSize;
9494
cl_bool errorCorrectionSupport;
95-
size_t profilingTimerResolution;
95+
double profilingTimerResolution;
96+
size_t outProfilingTimerResolution;
9697
cl_bool endianLittle;
9798
cl_bool deviceAvailable;
9899
cl_bool compilerAvailable;

runtime/device/device_info_map.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, Intel Corporation
2+
* Copyright (c) 2017 - 2018, Intel Corporation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -133,7 +133,7 @@ template<> struct Map<CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG > : pu
133133
template<> struct Map<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT > : public MapBase<CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, uint32_t, &DeviceInfo::preferredVectorWidthShort> {};
134134
template<> struct Map<CL_DEVICE_PRINTF_BUFFER_SIZE > : public MapBase<CL_DEVICE_PRINTF_BUFFER_SIZE, size_t, &DeviceInfo::printfBufferSize> {};
135135
template<> struct Map<CL_DEVICE_PROFILE > : public MapBase<CL_DEVICE_PROFILE, const char *, &DeviceInfo::profile> {};
136-
template<> struct Map<CL_DEVICE_PROFILING_TIMER_RESOLUTION > : public MapBase<CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_t, &DeviceInfo::profilingTimerResolution> {};
136+
template<> struct Map<CL_DEVICE_PROFILING_TIMER_RESOLUTION > : public MapBase<CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_t, &DeviceInfo::outProfilingTimerResolution> {};
137137
template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE > : public MapBase<CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, uint32_t, &DeviceInfo::queueOnDeviceMaxSize> {};
138138
template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE > : public MapBase<CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, uint32_t, &DeviceInfo::queueOnDevicePreferredSize> {};
139139
template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES > : public MapBase<CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, uint64_t, &DeviceInfo::queueOnDeviceProperties> {};

runtime/event/event.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ bool Event::calcProfilingData() {
259259

260260
int64_t c0 = 0;
261261
if (!dataCalculated && timeStampNode && !profilingCpuPath) {
262-
double frequency = cmdQueue->getDevice().getProfilingTimerResolution();
262+
double frequency = cmdQueue->getDevice().getDeviceInfo().profilingTimerResolution;
263263
/* calculation based on equation
264264
CpuTime = GpuTime * scalar + const( == c0)
265265
scalar = DeltaCpu( == dCpu) / DeltaGpu( == dGpu)

unit_tests/gen8/test_device_caps.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ GEN8TEST_F(Gen8DeviceCaps, image3DDimensions) {
7777

7878
BDWTEST_F(Gen8DeviceCaps, BdwProfilingTimerResolution) {
7979
const auto &caps = pDevice->getDeviceInfo();
80-
EXPECT_EQ(83u, caps.profilingTimerResolution);
80+
EXPECT_EQ(83u, caps.outProfilingTimerResolution);
8181
}
8282

8383
typedef Test<DeviceFixture> BdwUsDeviceIdTest;

unit_tests/gen9/bxt/test_device_caps_bxt.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ BXTTEST_F(BxtDeviceCaps, reportsOcl12) {
3535

3636
BXTTEST_F(BxtDeviceCaps, BxtProfilingTimerResolution) {
3737
const auto &caps = pDevice->getDeviceInfo();
38-
EXPECT_EQ(52u, caps.profilingTimerResolution);
38+
EXPECT_EQ(52u, caps.outProfilingTimerResolution);
3939
}
4040

4141
BXTTEST_F(BxtDeviceCaps, BxtClVersionSupport) {

unit_tests/gen9/glk/test_device_caps_glk.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ typedef Test<DeviceFixture> Gen9DeviceCaps;
2929

3030
GLKTEST_F(Gen9DeviceCaps, GlkProfilingTimerResolution) {
3131
const auto &caps = pDevice->getDeviceInfo();
32-
EXPECT_EQ(52u, caps.profilingTimerResolution);
32+
EXPECT_EQ(52u, caps.outProfilingTimerResolution);
3333
}
3434

3535
GLKTEST_F(Gen9DeviceCaps, givenGlkDeviceWhenAskedForDoubleSupportThenTrueIsReturned) {

unit_tests/gen9/skl/test_device_caps_skl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ SKLTEST_F(SklDeviceCaps, reportsOcl21) {
3535

3636
SKLTEST_F(SklDeviceCaps, SklProfilingTimerResolution) {
3737
const auto &caps = pDevice->getDeviceInfo();
38-
EXPECT_EQ(83u, caps.profilingTimerResolution);
38+
EXPECT_EQ(83u, caps.outProfilingTimerResolution);
3939
}
4040

4141
SKLTEST_F(SklDeviceCaps, givenSklDeviceWhenAskedFor32BitSupportThenFalseIsReturned) {

unit_tests/profiling/profiling_tests.cpp

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -402,13 +402,44 @@ struct MockTagNode : public TagNode<TagType> {
402402
}
403403
};
404404

405-
TEST(EventProfilingTest, calcProfilingDataSetsEndTimestampInCompleteTimestampWhenCompleteIsZero) {
406-
MockDevice *device = DeviceHelper<>::create();
405+
class MyOSTime : public OSTime {
406+
public:
407+
static int instanceNum;
408+
MyOSTime() {
409+
instanceNum++;
410+
}
411+
double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override {
412+
EXPECT_FALSE(true);
413+
return 1.0;
414+
}
415+
bool getCpuGpuTime(TimeStampData *pGpuCpuTime) override {
416+
EXPECT_FALSE(true);
417+
return false;
418+
}
419+
bool getCpuTime(uint64_t *timeStamp) override {
420+
EXPECT_FALSE(true);
421+
return false;
422+
};
423+
double getHostTimerResolution() const override {
424+
EXPECT_FALSE(true);
425+
return 0;
426+
}
427+
uint64_t getCpuRawTimestamp() override {
428+
EXPECT_FALSE(true);
429+
return 0;
430+
}
431+
};
432+
int MyOSTime::instanceNum = 0;
433+
TEST(EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) {
434+
std::unique_ptr<MockDevice> device(DeviceHelper<>::create());
435+
MyOSTime::instanceNum = 0;
436+
device->setOSTime(new MyOSTime());
437+
EXPECT_EQ(1, MyOSTime::instanceNum);
407438
MockContext context;
408439
cl_command_queue_properties props[5] = {0, 0, 0, 0, 0};
409-
MockCommandQueue cmdQ(&context, device, props);
440+
MockCommandQueue cmdQ(&context, device.get(), props);
410441
cmdQ.setProfilingEnabled();
411-
cmdQ.device = device;
442+
cmdQ.device = device.get();
412443

413444
HwTimeStamps timestamp;
414445
timestamp.GlobalStartTS = 10;
@@ -429,7 +460,6 @@ TEST(EventProfilingTest, calcProfilingDataSetsEndTimestampInCompleteTimestampWhe
429460

430461
EXPECT_EQ(timestamp.ContextEndTS, timestamp.ContextCompleteTS);
431462
cmdQ.device = nullptr;
432-
delete device;
433463
}
434464

435465
struct ProfilingWithPerfCountersTests : public ProfilingTests,
@@ -774,5 +804,4 @@ HWTEST_F(ProfilingWithPerfCountersTests, GIVENCommandQueueWithProfilingPerfCount
774804

775805
pCmdQ->setPerfCountersEnabled(false, UINT32_MAX);
776806
}
777-
778807
} // namespace OCLRT

0 commit comments

Comments
 (0)