@@ -663,13 +663,13 @@ void Device::finalizeRayTracing() {
663663 if (rtDispatchGlobalsInfo == nullptr ) {
664664 continue ;
665665 }
666- for (size_t j = 0 ; j < rtDispatchGlobalsInfo->rtStacks .size (); j++) {
667- getMemoryManager ()->freeGraphicsMemory (rtDispatchGlobalsInfo->rtStacks [j]);
668- rtDispatchGlobalsInfo->rtStacks [j] = nullptr ;
666+ for (size_t j = 0 ; j < rtDispatchGlobalsInfo->rtDispatchGlobals .size (); j++) {
667+ getMemoryManager ()->freeGraphicsMemory (rtDispatchGlobalsInfo->rtDispatchGlobals [j]);
668+ rtDispatchGlobalsInfo->rtDispatchGlobals [j] = nullptr ;
669669 }
670670
671- getMemoryManager ()->freeGraphicsMemory (rtDispatchGlobalsInfo->rtDispatchGlobalsArray );
672- rtDispatchGlobalsInfo->rtDispatchGlobalsArray = nullptr ;
671+ getMemoryManager ()->freeGraphicsMemory (rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation );
672+ rtDispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = nullptr ;
673673
674674 delete rtDispatchGlobalsInfos[i];
675675 rtDispatchGlobalsInfos[i] = nullptr ;
@@ -749,55 +749,37 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
749749
750750 uint32_t extraBytesLocal = 0 ;
751751 uint32_t extraBytesGlobal = 0 ;
752- uint32_t dispatchGlobalsStride = MemoryConstants::pageSize64k;
753- UNRECOVERABLE_IF (RayTracingHelper::getDispatchGlobalSize () > dispatchGlobalsStride);
754-
755- bool allocFailed = false ;
752+ auto size = RayTracingHelper::getDispatchGlobalSize (*this , maxBvhLevels, extraBytesLocal, extraBytesGlobal);
756753
757754 const auto deviceCount = HwHelper::getSubDevicesCount (executionEnvironment->rootDeviceEnvironments [getRootDeviceIndex ()]->getHardwareInfo ());
758- auto dispatchGlobalsSize = deviceCount * dispatchGlobalsStride;
759- auto rtStackSize = RayTracingHelper::getRTStackSizePerTile (*this , deviceCount, maxBvhLevels, extraBytesLocal, extraBytesGlobal);
760755
761- std::unique_ptr<RTDispatchGlobalsInfo> dispatchGlobalsInfo = std::make_unique< RTDispatchGlobalsInfo>( );
756+ auto dispatchGlobalsInfo = new RTDispatchGlobalsInfo ( nullptr );
762757 if (dispatchGlobalsInfo == nullptr ) {
763758 return ;
764759 }
765760
766761 auto &hwInfo = getHardwareInfo ();
767762 auto &hwInfoConfig = *HwInfoConfig::get (hwInfo.platform .eProductFamily );
768763
769- GraphicsAllocation *dispatchGlobalsArrayAllocation = nullptr ;
770-
771- AllocationProperties arrayAllocProps (getRootDeviceIndex (), true , dispatchGlobalsSize,
772- AllocationType::BUFFER, true , getDeviceBitfield ());
773- arrayAllocProps.flags .resource48Bit = true ;
774- arrayAllocProps.flags .isUSMDeviceAllocation = true ;
775- dispatchGlobalsArrayAllocation = getMemoryManager ()->allocateGraphicsMemoryWithProperties (arrayAllocProps);
776-
777- if (dispatchGlobalsArrayAllocation == nullptr ) {
778- return ;
779- }
764+ std::vector<uint64_t > gpuAddressVector;
765+ bool allocFailed = false ;
780766
781767 for (unsigned int tile = 0 ; tile < deviceCount; tile++) {
782- DeviceBitfield deviceBitfield =
783- (deviceCount == 1 )
784- ? this ->getDeviceBitfield ()
785- : subdevices[tile]->getDeviceBitfield ();
786-
787- AllocationProperties allocProps (getRootDeviceIndex (), true , rtStackSize, AllocationType::BUFFER, true , deviceBitfield);
768+ AllocationProperties allocProps (getRootDeviceIndex (), true , size, AllocationType::BUFFER, true , getDeviceBitfield ());
788769 allocProps.flags .resource48Bit = true ;
789770 allocProps.flags .isUSMDeviceAllocation = true ;
790771
791- auto rtStackAllocation = getMemoryManager ()->allocateGraphicsMemoryWithProperties (allocProps);
772+ auto dispatchGlobalsAllocation = getMemoryManager ()->allocateGraphicsMemoryWithProperties (allocProps);
792773
793- if (rtStackAllocation == nullptr ) {
774+ if (dispatchGlobalsAllocation == nullptr ) {
794775 allocFailed = true ;
795776 break ;
796777 }
797778
779+ auto dispatchGlobalsPtr = dispatchGlobalsAllocation->getGpuAddress ();
798780 struct RTDispatchGlobals dispatchGlobals = {0 };
799781
800- dispatchGlobals.rtMemBasePtr = rtStackAllocation-> getGpuAddress () ;
782+ dispatchGlobals.rtMemBasePtr = size + dispatchGlobalsPtr ;
801783 dispatchGlobals.callStackHandlerKSP = reinterpret_cast <uint64_t >(nullptr );
802784 dispatchGlobals.stackSizePerRay = 0 ;
803785 dispatchGlobals.numDSSRTStacks = RayTracingHelper::stackDssMultiplier;
@@ -806,27 +788,45 @@ void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) {
806788 uint32_t *dispatchGlobalsAsArray = reinterpret_cast <uint32_t *>(&dispatchGlobals);
807789 dispatchGlobalsAsArray[7 ] = 1 ;
808790
809- MemoryTransferHelper::transferMemoryToAllocation (hwInfoConfig.isBlitCopyRequiredForLocalMemory (this ->getHardwareInfo (), *dispatchGlobalsArrayAllocation ),
791+ MemoryTransferHelper::transferMemoryToAllocation (hwInfoConfig.isBlitCopyRequiredForLocalMemory (this ->getHardwareInfo (), *dispatchGlobalsAllocation ),
810792 *this ,
811- dispatchGlobalsArrayAllocation ,
812- tile * dispatchGlobalsStride ,
793+ dispatchGlobalsAllocation ,
794+ 0 ,
813795 &dispatchGlobals,
814796 sizeof (RTDispatchGlobals));
815797
816- dispatchGlobalsInfo->rtStacks .push_back (rtStackAllocation);
798+ dispatchGlobalsInfo->rtDispatchGlobals .push_back (dispatchGlobalsAllocation);
799+ gpuAddressVector.push_back (dispatchGlobalsAllocation->getGpuAddress ());
817800 }
818801
819- if (allocFailed) {
820- for (auto allocation : dispatchGlobalsInfo->rtStacks ) {
802+ GraphicsAllocation *dispatchGlobalsArrayAllocation = nullptr ;
803+ size_t arrayAllocSize = sizeof (uint64_t ) * deviceCount;
804+
805+ if (!allocFailed) {
806+ AllocationProperties arrayAllocProps (getRootDeviceIndex (), true , arrayAllocSize,
807+ AllocationType::BUFFER, true , getDeviceBitfield ());
808+ arrayAllocProps.flags .resource48Bit = true ;
809+ arrayAllocProps.flags .isUSMDeviceAllocation = true ;
810+ dispatchGlobalsArrayAllocation = getMemoryManager ()->allocateGraphicsMemoryWithProperties (arrayAllocProps);
811+ }
812+
813+ if (dispatchGlobalsArrayAllocation == nullptr ) {
814+ for (auto allocation : dispatchGlobalsInfo->rtDispatchGlobals ) {
821815 getMemoryManager ()->freeGraphicsMemory (allocation);
822816 }
823-
824- getMemoryManager ()->freeGraphicsMemory (dispatchGlobalsArrayAllocation);
817+ delete dispatchGlobalsInfo;
825818 return ;
826819 }
827820
828- dispatchGlobalsInfo->rtDispatchGlobalsArray = dispatchGlobalsArrayAllocation;
829- rtDispatchGlobalsInfos[maxBvhLevels] = dispatchGlobalsInfo.release ();
821+ MemoryTransferHelper::transferMemoryToAllocation (hwInfoConfig.isBlitCopyRequiredForLocalMemory (this ->getHardwareInfo (), *dispatchGlobalsArrayAllocation),
822+ *this ,
823+ dispatchGlobalsArrayAllocation,
824+ 0 ,
825+ gpuAddressVector.data (),
826+ arrayAllocSize);
827+
828+ dispatchGlobalsInfo->rtDispatchGlobalsArrayAllocation = dispatchGlobalsArrayAllocation;
829+ rtDispatchGlobalsInfos[maxBvhLevels] = dispatchGlobalsInfo;
830830}
831831
832832} // namespace NEO
0 commit comments