Skip to content

Commit 91c69e0

Browse files
Patch global variables and constants once
This change replaces mechanism of patching global constants and variables in kernel per relocation to patching them only once. This would improve linking time performance for kernels with multiple global symbols. Signed-off-by: Luzynski, Sebastian Jozef <sebastian.jozef.luzynski@intel.com>
1 parent 257967e commit 91c69e0

File tree

11 files changed

+161
-119
lines changed

11 files changed

+161
-119
lines changed

level_zero/core/source/module/module_imp.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -893,7 +893,9 @@ bool ModuleImp::linkBinary() {
893893
globalsForPatching, constantsForPatching,
894894
isaSegmentsForPatching, unresolvedExternalsInfo, this->device->getNEODevice(),
895895
translationUnit->programInfo.globalConstants.initData,
896+
translationUnit->programInfo.globalConstants.size,
896897
translationUnit->programInfo.globalVariables.initData,
898+
translationUnit->programInfo.globalVariables.size,
897899
kernelDescriptors, translationUnit->programInfo.externalFunctions);
898900
this->symbols = linker.extractRelocatedSymbols();
899901
if (LinkingStatus::LinkedFully != linkStatus) {

opencl/source/program/process_device_binary.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ const KernelInfo *Program::getKernelInfo(size_t ordinal, uint32_t rootDeviceInde
5757
return kernelInfoArray[ordinal];
5858
}
5959

60-
cl_int Program::linkBinary(Device *pDevice, const void *constantsInitData, const void *variablesInitData,
60+
cl_int Program::linkBinary(Device *pDevice, const void *constantsInitData, size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize,
6161
const ProgramInfo::GlobalSurfaceInfo &stringsInfo, std::vector<NEO::ExternalFunctionInfo> &extFuncInfos) {
6262
auto linkerInput = getLinkerInput(pDevice->getRootDeviceIndex());
6363
if (linkerInput == nullptr) {
@@ -112,7 +112,8 @@ cl_int Program::linkBinary(Device *pDevice, const void *constantsInitData, const
112112
bool linkSuccess = LinkingStatus::LinkedFully == linker.link(globals, constants, exportedFunctions, strings,
113113
globalsForPatching, constantsForPatching,
114114
isaSegmentsForPatching, unresolvedExternalsInfo,
115-
pDevice, constantsInitData, variablesInitData,
115+
pDevice, constantsInitData, constantsInitDataSize,
116+
variablesInitData, variablesInitDataSize,
116117
kernelDescriptors, extFuncInfos);
117118
setSymbols(rootDeviceIndex, linker.extractRelocatedSymbols());
118119
if (false == linkSuccess) {
@@ -264,7 +265,8 @@ cl_int Program::processProgramInfo(ProgramInfo &src, const ClDevice &clDevice) {
264265
kernelInfo->apply(deviceInfoConstants);
265266
}
266267

267-
return linkBinary(&clDevice.getDevice(), src.globalConstants.initData, src.globalVariables.initData, src.globalStrings, src.externalFunctions);
268+
return linkBinary(&clDevice.getDevice(), src.globalConstants.initData, src.globalConstants.size, src.globalVariables.initData,
269+
src.globalVariables.size, src.globalStrings, src.externalFunctions);
268270
}
269271

270272
void Program::processDebugData(uint32_t rootDeviceIndex) {

opencl/source/program/program.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,9 @@ class Program : public BaseObject<_cl_program> {
297297

298298
cl_int packDeviceBinary(ClDevice &clDevice);
299299

300-
MOCKABLE_VIRTUAL cl_int linkBinary(Device *pDevice, const void *constantsInitData, const void *variablesInitData,
301-
const ProgramInfo::GlobalSurfaceInfo &stringInfo, std::vector<NEO::ExternalFunctionInfo> &extFuncInfos);
300+
MOCKABLE_VIRTUAL cl_int linkBinary(Device *pDevice, const void *constantsInitData, size_t constantsInitDataSize, const void *variablesInitData,
301+
size_t variablesInitDataSize, const ProgramInfo::GlobalSurfaceInfo &stringInfo,
302+
std::vector<NEO::ExternalFunctionInfo> &extFuncInfos);
302303

303304
void updateNonUniformFlag();
304305
void updateNonUniformFlag(const Program **inputProgram, size_t numInputPrograms);

opencl/test/unit_test/command_queue/blit_enqueue_2_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWithoutCpuAc
605605

606606
auto initialTaskCount = bcsMockContext->bcsCsr->peekTaskCount();
607607

608-
auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, externalFunctions);
608+
auto ret = program.linkBinary(&device->getDevice(), nullptr, 0, nullptr, 0, {}, externalFunctions);
609609
EXPECT_EQ(CL_SUCCESS, ret);
610610

611611
EXPECT_EQ(initialTaskCount + 1, bcsMockContext->bcsCsr->peekTaskCount());
@@ -614,4 +614,4 @@ HWTEST_TEMPLATED_F(BlitCopyTests, givenKernelAllocationInLocalMemoryWithoutCpuAc
614614
device->getMemoryManager()->freeGraphicsMemory(kernelInfo.kernelAllocation);
615615
}
616616

617-
} // namespace NEO
617+
} // namespace NEO

opencl/test/unit_test/program/program_data_tests.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -491,9 +491,10 @@ TEST_F(ProgramDataTest, GivenProgramWith32bitPointerOptWhenProgramScopeConstantB
491491
constantSurfaceStorage[1] = sentinel;
492492

493493
programInfo.globalConstants.initData = constantSurface.mockGfxAllocation.getUnderlyingBuffer();
494+
programInfo.globalConstants.size = constantSurface.mockGfxAllocation.getUnderlyingBufferSize();
494495

495496
pProgram->setLinkerInput(pClDevice->getRootDeviceIndex(), std::move(programInfo.linkerInput));
496-
pProgram->linkBinary(&pClDevice->getDevice(), programInfo.globalConstants.initData, programInfo.globalVariables.initData, {}, prog->externalFunctions);
497+
pProgram->linkBinary(&pClDevice->getDevice(), programInfo.globalConstants.initData, programInfo.globalConstants.size, programInfo.globalVariables.initData, programInfo.globalVariables.size, {}, prog->externalFunctions);
497498
uint32_t expectedAddr = static_cast<uint32_t>(constantSurface.getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch());
498499
EXPECT_EQ(expectedAddr, constantSurfaceStorage[0]);
499500
EXPECT_EQ(sentinel, constantSurfaceStorage[1]);
@@ -537,9 +538,10 @@ TEST_F(ProgramDataTest, GivenProgramWith32bitPointerOptWhenProgramScopeGlobalPoi
537538
globalSurfaceStorage[1] = sentinel;
538539

539540
programInfo.globalVariables.initData = globalSurface.mockGfxAllocation.getUnderlyingBuffer();
541+
programInfo.globalVariables.size = globalSurface.mockGfxAllocation.getUnderlyingBufferSize();
540542

541543
pProgram->setLinkerInput(pClDevice->getRootDeviceIndex(), std::move(programInfo.linkerInput));
542-
pProgram->linkBinary(&pClDevice->getDevice(), programInfo.globalConstants.initData, programInfo.globalVariables.initData, {}, prog->externalFunctions);
544+
pProgram->linkBinary(&pClDevice->getDevice(), programInfo.globalConstants.initData, programInfo.globalConstants.size, programInfo.globalVariables.initData, programInfo.globalVariables.size, {}, prog->externalFunctions);
543545
uint32_t expectedAddr = static_cast<uint32_t>(globalSurface.getGraphicsAllocation(pClDevice->getRootDeviceIndex())->getGpuAddressToPatch());
544546
EXPECT_EQ(expectedAddr, globalSurfaceStorage[0]);
545547
EXPECT_EQ(sentinel, globalSurfaceStorage[1]);
@@ -566,7 +568,7 @@ TEST(ProgramLinkBinaryTest, whenLinkerInputEmptyThenLinkSuccessful) {
566568
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
567569
MockProgram program{nullptr, false, toClDeviceVector(*device)};
568570
program.setLinkerInput(device->getRootDeviceIndex(), std::move(linkerInput));
569-
auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, program.externalFunctions);
571+
auto ret = program.linkBinary(&device->getDevice(), nullptr, 0, nullptr, 0, {}, program.externalFunctions);
570572
EXPECT_EQ(CL_SUCCESS, ret);
571573
}
572574

@@ -592,7 +594,7 @@ TEST(ProgramLinkBinaryTest, whenLinkerUnresolvedExternalThenLinkFailedAndBuildLo
592594

593595
std::string buildLog = program.getBuildLog(device->getRootDeviceIndex());
594596
EXPECT_TRUE(buildLog.empty());
595-
auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, program.externalFunctions);
597+
auto ret = program.linkBinary(&device->getDevice(), nullptr, 0, nullptr, 0, {}, program.externalFunctions);
596598
EXPECT_NE(CL_SUCCESS, ret);
597599
program.getKernelInfoArray(rootDeviceIndex).clear();
598600
buildLog = program.getBuildLog(rootDeviceIndex);
@@ -640,7 +642,7 @@ TEST_F(ProgramDataTest, whenLinkerInputValidThenIsaIsProperlyPatched) {
640642
buildInfo.globalSurface = new MockGraphicsAllocation(globalVariablesBuffer.data(), globalVariablesBuffer.size());
641643
buildInfo.constantSurface = new MockGraphicsAllocation(globalConstantsBuffer.data(), globalConstantsBuffer.size());
642644

643-
auto ret = program.linkBinary(&pClDevice->getDevice(), globalConstantsInitData.data(), globalVariablesInitData.data(), {}, program.externalFunctions);
645+
auto ret = program.linkBinary(&pClDevice->getDevice(), globalConstantsInitData.data(), globalConstantsInitData.size(), globalVariablesInitData.data(), globalVariablesInitData.size(), {}, program.externalFunctions);
644646
EXPECT_EQ(CL_SUCCESS, ret);
645647

646648
linkerInput.reset(static_cast<WhiteBox<LinkerInput> *>(buildInfo.linkerInput.release()));
@@ -688,7 +690,7 @@ TEST_F(ProgramDataTest, whenRelocationsAreNotNeededThenIsaIsPreserved) {
688690
buildInfo.globalSurface = new MockGraphicsAllocation(globalVariablesBuffer.data(), globalVariablesBuffer.size());
689691
buildInfo.constantSurface = new MockGraphicsAllocation(globalConstantsBuffer.data(), globalConstantsBuffer.size());
690692

691-
auto ret = program.linkBinary(&pClDevice->getDevice(), globalConstantsInitData.data(), globalVariablesInitData.data(), {}, program.externalFunctions);
693+
auto ret = program.linkBinary(&pClDevice->getDevice(), globalConstantsInitData.data(), globalConstantsInitData.size(), globalVariablesInitData.data(), globalVariablesInitData.size(), {}, program.externalFunctions);
692694
EXPECT_EQ(CL_SUCCESS, ret);
693695
EXPECT_EQ(kernelHeapData, kernelHeap);
694696

@@ -729,7 +731,7 @@ TEST(ProgramStringSectionTest, WhenConstStringBufferIsPresentThenUseItForLinking
729731
const char constStringData[] = "Hello World!\n";
730732
auto stringsAddr = reinterpret_cast<uintptr_t>(constStringData);
731733

732-
auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {constStringData, sizeof(constStringData)}, program.externalFunctions);
734+
auto ret = program.linkBinary(&device->getDevice(), nullptr, 0, nullptr, 0, {constStringData, sizeof(constStringData)}, program.externalFunctions);
733735
EXPECT_EQ(CL_SUCCESS, ret);
734736
EXPECT_EQ(static_cast<size_t>(stringsAddr), *reinterpret_cast<size_t *>(patchAddr));
735737

@@ -754,7 +756,7 @@ TEST(ProgramImplicitArgsTest, givenImplicitRelocationAndStackCallsThenKernelRequ
754756
linkerInput->textRelocations.push_back({{implicitArgsRelocationSymbolName, 0x8, LinkerInput::RelocationInfo::Type::AddressLow, SegmentType::Instructions}});
755757
linkerInput->traits.requiresPatchingOfInstructionSegments = true;
756758
program.setLinkerInput(rootDeviceIndex, std::move(linkerInput));
757-
auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, program.externalFunctions);
759+
auto ret = program.linkBinary(&device->getDevice(), nullptr, 0, nullptr, 0, {}, program.externalFunctions);
758760
EXPECT_EQ(CL_SUCCESS, ret);
759761

760762
EXPECT_TRUE(kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
@@ -786,7 +788,7 @@ TEST(ProgramImplicitArgsTest, givenImplicitRelocationAndEnabledDebuggerThenKerne
786788
linkerInput->textRelocations.push_back({{implicitArgsRelocationSymbolName, 0x8, LinkerInput::RelocationInfo::Type::AddressLow, SegmentType::Instructions}});
787789
linkerInput->traits.requiresPatchingOfInstructionSegments = true;
788790
program.setLinkerInput(rootDeviceIndex, std::move(linkerInput));
789-
auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, program.externalFunctions);
791+
auto ret = program.linkBinary(&device->getDevice(), nullptr, 0, nullptr, 0, {}, program.externalFunctions);
790792
EXPECT_EQ(CL_SUCCESS, ret);
791793

792794
EXPECT_TRUE(kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
@@ -812,7 +814,7 @@ TEST(ProgramImplicitArgsTest, givenImplicitRelocationAndNoStackCallsAndDisabledD
812814
linkerInput->textRelocations.push_back({{implicitArgsRelocationSymbolName, 0x8, LinkerInput::RelocationInfo::Type::AddressLow, SegmentType::Instructions}});
813815
linkerInput->traits.requiresPatchingOfInstructionSegments = true;
814816
program.setLinkerInput(rootDeviceIndex, std::move(linkerInput));
815-
auto ret = program.linkBinary(&device->getDevice(), nullptr, nullptr, {}, program.externalFunctions);
817+
auto ret = program.linkBinary(&device->getDevice(), nullptr, 0, nullptr, 0, {}, program.externalFunctions);
816818
EXPECT_EQ(CL_SUCCESS, ret);
817819

818820
EXPECT_FALSE(kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);

shared/source/compiler_interface/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ set(NEO_CORE_COMPILER_INTERFACE
1717
${CMAKE_CURRENT_SOURCE_DIR}/external_functions.h
1818
${CMAKE_CURRENT_SOURCE_DIR}/intermediate_representations.h
1919
${CMAKE_CURRENT_SOURCE_DIR}/linker.h
20-
${CMAKE_CURRENT_SOURCE_DIR}/linker.inl
2120
${CMAKE_CURRENT_SOURCE_DIR}/linker.cpp
2221
${CMAKE_CURRENT_SOURCE_DIR}/compiler_options.h
2322
${CMAKE_CURRENT_SOURCE_DIR}/compiler_options.cpp

0 commit comments

Comments
 (0)