Skip to content

Commit 5fc295d

Browse files
committed
GPU: Use GPUCommonAlignedAlloc.h in more places
1 parent f58f435 commit 5fc295d

File tree

5 files changed

+23
-21
lines changed

5 files changed

+23
-21
lines changed

GPU/Common/GPUCommonAlignedAlloc.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,17 @@ namespace o2::gpu
2222

2323
template <typename T, std::size_t MIN_ALIGN = 0>
2424
struct alignedDeleter {
25-
void operator()(void* ptr) { ::operator delete(ptr, std::align_val_t(std::max(MIN_ALIGN, alignof(T)))); };
25+
void operator()(void* ptr) { ::operator delete(ptr, std::align_val_t(std::max(MIN_ALIGN, alignof(T)))); }; // TODO: Make this static once we go to C++ 23
2626
};
2727

2828
template <typename T, std::size_t MIN_ALIGN = 0>
2929
struct alignedAllocator {
3030
using value_type = T;
31-
T* allocate(std::size_t n)
31+
static T* allocate(std::size_t n)
3232
{
3333
return (T*)::operator new(n, std::align_val_t(std::max(MIN_ALIGN, alignof(T))));
3434
}
35-
void deallocate(T* ptr, std::size_t)
35+
static void deallocate(T* ptr, std::size_t)
3636
{
3737
alignedDeleter<T, MIN_ALIGN>()(ptr);
3838
}

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ int32_t GPUReconstruction::Exit()
467467
if (mMemoryResources[i].mReuse >= 0) {
468468
continue;
469469
}
470-
::operator delete(mMemoryResources[i].mPtrDevice, std::align_val_t(constants::GPU_BUFFER_ALIGNMENT));
470+
alignedDefaultBufferDeleter()(mMemoryResources[i].mPtrDevice);
471471
mMemoryResources[i].mPtr = mMemoryResources[i].mPtrDevice = nullptr;
472472
}
473473
}
@@ -630,7 +630,7 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res,
630630
if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && (control == nullptr || control->useInternal())) {
631631
if (!(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) {
632632
if (res->mPtrDevice && res->mReuse < 0) {
633-
::operator delete(res->mPtrDevice, std::align_val_t(constants::GPU_BUFFER_ALIGNMENT));
633+
alignedDefaultBufferDeleter()(res->mPtrDevice);
634634
}
635635
res->mSize = std::max((size_t)res->SetPointers((void*)1) - 1, res->mOverrideSize);
636636
if (res->mReuse >= 0) {
@@ -640,7 +640,7 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res,
640640
}
641641
res->mPtrDevice = mMemoryResources[res->mReuse].mPtrDevice;
642642
} else {
643-
res->mPtrDevice = ::operator new(res->mSize + constants::GPU_BUFFER_ALIGNMENT, std::align_val_t(constants::GPU_BUFFER_ALIGNMENT));
643+
res->mPtrDevice = alignedDefaultBufferAllocator<char>(res->mSize + constants::GPU_BUFFER_ALIGNMENT);
644644
}
645645
res->mPtr = GPUProcessor::alignPointer<constants::GPU_BUFFER_ALIGNMENT>(res->mPtrDevice);
646646
res->SetPointers(res->mPtr);
@@ -731,7 +731,7 @@ void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type)
731731
{
732732
stdspinlock spinlock(mMemoryMutex);
733733
if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) {
734-
char* retVal = new (std::align_val_t(constants::GPU_BUFFER_ALIGNMENT)) char[size];
734+
char* retVal = alignedDefaultBufferAllocator<char>(size);
735735
if ((type & GPUMemoryResource::MEMORY_STACK)) {
736736
mNonPersistentIndividualDirectAllocations.emplace_back(retVal, alignedDefaultBufferDeleter());
737737
} else {
@@ -796,7 +796,7 @@ void* GPUReconstruction::AllocateVolatileMemory(size_t size, bool device)
796796
if (device) {
797797
return AllocateVolatileDeviceMemory(size);
798798
}
799-
char* retVal = new (std::align_val_t(constants::GPU_BUFFER_ALIGNMENT)) char[size];
799+
char* retVal = alignedDefaultBufferAllocator<char>(size);
800800
stdspinlock spinlock(mMemoryMutex);
801801
mVolatileChunks.emplace_back(retVal, alignedDefaultBufferDeleter());
802802
return retVal;
@@ -876,7 +876,7 @@ void GPUReconstruction::FreeRegisteredMemory(GPUMemoryResource* res)
876876
std::cout << "Freeing " << res->mName << ": size " << res->mSize << " (reused " << res->mReuse << ")\n";
877877
}
878878
if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && res->mReuse < 0) {
879-
::operator delete(res->mPtrDevice, std::align_val_t(constants::GPU_BUFFER_ALIGNMENT));
879+
alignedDefaultBufferDeleter()(res->mPtrDevice);
880880
}
881881
res->mPtr = nullptr;
882882
res->mPtrDevice = nullptr;
@@ -916,7 +916,7 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag, cons
916916
std::cout << "Freeing NonPersistent " << res->mName << ": size " << res->mSize << " (reused " << res->mReuse << ")\n";
917917
}
918918
if (res->mReuse < 0) {
919-
::operator delete(res->mPtrDevice, std::align_val_t(constants::GPU_BUFFER_ALIGNMENT));
919+
alignedDefaultBufferDeleter()(res->mPtrDevice);
920920
}
921921
res->mPtr = nullptr;
922922
res->mPtrDevice = nullptr;

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,13 @@ class GPUReconstruction
253253
static int32_t getHostThreadIndex();
254254
int32_t GetMaxBackendThreads() const { return mMaxBackendThreads; }
255255

256+
using alignedDefaultBufferDeleter = alignedDeleter<char, constants::GPU_BUFFER_ALIGNMENT>;
257+
template <typename T>
258+
static T* alignedDefaultBufferAllocator(size_t n)
259+
{
260+
return alignedAllocator<char, constants::GPU_BUFFER_ALIGNMENT>::allocate(n); // Note that char is correct, since the buffer is a char buffer
261+
}
262+
256263
protected:
257264
void AllocateRegisteredMemoryInternal(GPUMemoryResource* res, GPUOutputControl* control, GPUReconstruction* recPool);
258265
void FreeRegisteredMemory(GPUMemoryResource* res);
@@ -387,7 +394,6 @@ class GPUReconstruction
387394
std::unordered_map<GPUMemoryReuse::ID, MemoryReuseMeta> mMemoryReuse1to1;
388395
std::vector<std::tuple<void*, void*, size_t, size_t, uint64_t>> mNonPersistentMemoryStack; // hostPoolAddress, devicePoolAddress, individualAllocationCount, directIndividualAllocationCound, tag
389396
std::vector<GPUMemoryResource*> mNonPersistentIndividualAllocations;
390-
using alignedDefaultBufferDeleter = alignedDeleter<char, constants::GPU_BUFFER_ALIGNMENT>;
391397
std::vector<std::unique_ptr<char[], alignedDefaultBufferDeleter>> mNonPersistentIndividualDirectAllocations;
392398
std::vector<std::unique_ptr<char[], alignedDefaultBufferDeleter>> mDirectMemoryChunks;
393399
std::vector<std::unique_ptr<char[], alignedDefaultBufferDeleter>> mVolatileChunks;

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ int32_t GPUReconstructionCPU::InitDevice()
189189
if (mDeviceMemorySize > mHostMemorySize) {
190190
mHostMemorySize = mDeviceMemorySize;
191191
}
192-
mHostMemoryBase = ::operator new(mHostMemorySize, std::align_val_t(constants::GPU_BUFFER_ALIGNMENT));
192+
mHostMemoryBase = alignedDefaultBufferAllocator<char>(mHostMemorySize);
193193
}
194194
mHostMemoryPermanent = mHostMemoryBase;
195195
ClearAllocatedMemory();
@@ -205,7 +205,7 @@ int32_t GPUReconstructionCPU::ExitDevice()
205205
{
206206
if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
207207
if (mMaster == nullptr) {
208-
::operator delete(mHostMemoryBase, std::align_val_t(constants::GPU_BUFFER_ALIGNMENT));
208+
alignedDefaultBufferDeleter()(mHostMemoryBase);
209209
}
210210
mHostMemoryPool = mHostMemoryBase = mHostMemoryPoolEnd = mHostMemoryPermanent = nullptr;
211211
mHostMemorySize = 0;

GPU/GPUTracking/Standalone/Benchmark/standalone.cxx

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,7 @@ uint32_t syncAsyncDecodedClusters = 0;
8181
GPUChainTracking *chainTracking, *chainTrackingAsync, *chainTrackingPipeline;
8282
GPUChainITS *chainITS, *chainITSAsync, *chainITSPipeline;
8383
std::string eventsDir;
84-
void unique_ptr_aligned_delete(char* v)
85-
{
86-
::operator delete(v, std::align_val_t(constants::GPU_BUFFER_ALIGNMENT));
87-
}
88-
std::unique_ptr<char, void (*)(char*)> outputmemory(nullptr, unique_ptr_aligned_delete), outputmemoryPipeline(nullptr, unique_ptr_aligned_delete), inputmemory(nullptr, unique_ptr_aligned_delete);
84+
std::unique_ptr<char, GPUReconstruction::alignedDefaultBufferDeleter> outputmemory(nullptr, GPUReconstruction::alignedDefaultBufferDeleter()), outputmemoryPipeline(nullptr, GPUReconstruction::alignedDefaultBufferDeleter()), inputmemory(nullptr, GPUReconstruction::alignedDefaultBufferDeleter());
8985
std::unique_ptr<GPUDisplayFrontendInterface> eventDisplay;
9086
std::unique_ptr<GPUReconstructionTimeframe> tf;
9187
int32_t nEventsInDirectory = 0;
@@ -251,20 +247,20 @@ int32_t ReadConfiguration(int argc, char** argv)
251247

252248
if (configStandalone.outputcontrolmem) {
253249
bool forceEmptyMemory = getenv("LD_PRELOAD") && strstr(getenv("LD_PRELOAD"), "valgrind") != nullptr;
254-
outputmemory.reset((char*)::operator new(configStandalone.outputcontrolmem, std::align_val_t(constants::GPU_BUFFER_ALIGNMENT)));
250+
outputmemory.reset(GPUReconstruction::alignedDefaultBufferAllocator<char>(configStandalone.outputcontrolmem));
255251
if (forceEmptyMemory) {
256252
printf("Valgrind detected, emptying GPU output memory to avoid false positive undefined reads");
257253
memset(outputmemory.get(), 0, configStandalone.outputcontrolmem);
258254
}
259255
if (configStandalone.proc.doublePipeline) {
260-
outputmemoryPipeline.reset((char*)::operator new(configStandalone.outputcontrolmem, std::align_val_t(constants::GPU_BUFFER_ALIGNMENT)));
256+
outputmemoryPipeline.reset(GPUReconstruction::alignedDefaultBufferAllocator<char>(configStandalone.outputcontrolmem));
261257
if (forceEmptyMemory) {
262258
memset(outputmemoryPipeline.get(), 0, configStandalone.outputcontrolmem);
263259
}
264260
}
265261
}
266262
if (configStandalone.inputcontrolmem) {
267-
inputmemory.reset((char*)::operator new(configStandalone.inputcontrolmem, std::align_val_t(constants::GPU_BUFFER_ALIGNMENT)));
263+
inputmemory.reset(GPUReconstruction::alignedDefaultBufferAllocator<char>(configStandalone.inputcontrolmem));
268264
}
269265

270266
configStandalone.proc.showOutputStat = true;

0 commit comments

Comments
 (0)