11/*
2- * Copyright (C) 2020-2023 Intel Corporation
2+ * Copyright (C) 2020-2024 Intel Corporation
33 *
44 * SPDX-License-Identifier: MIT
55 *
@@ -499,6 +499,55 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit, bo
499499 return ret;
500500}
501501
502+ template <typename GfxFamily, typename Dispatcher>
503+ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
504+ if (ringStart) {
505+ return true ;
506+ }
507+
508+ size_t startSize = getSizeSemaphoreSection (false );
509+ if (!this ->partitionConfigSet ) {
510+ startSize += getSizePartitionRegisterConfigurationSection ();
511+ }
512+ if (this ->miMemFenceRequired && !this ->systemMemoryFenceAddressSet ) {
513+ startSize += getSizeSystemMemoryFenceAddress ();
514+ }
515+ if (this ->relaxedOrderingEnabled && !this ->relaxedOrderingInitialized ) {
516+ startSize += RelaxedOrderingHelper::getSizeRegistersInit<GfxFamily>();
517+ }
518+
519+ size_t requiredSize = startSize + getSizeDispatch (false , false , dispatchMonitorFenceRequired (true )) + getSizeEnd (false );
520+ if (ringCommandStream.getAvailableSpace () < requiredSize) {
521+ switchRingBuffers (nullptr );
522+ }
523+ uint64_t gpuStartVa = ringCommandStream.getCurrentGpuAddressPosition ();
524+
525+ if (!this ->partitionConfigSet ) {
526+ dispatchPartitionRegisterConfiguration ();
527+ this ->partitionConfigSet = true ;
528+ }
529+
530+ if (this ->miMemFenceRequired && !this ->systemMemoryFenceAddressSet ) {
531+ dispatchSystemMemoryFenceAddress ();
532+ this ->systemMemoryFenceAddressSet = true ;
533+ }
534+
535+ if (this ->relaxedOrderingEnabled && !this ->relaxedOrderingInitialized ) {
536+ preinitializeRelaxedOrderingSections ();
537+ dispatchStaticRelaxedOrderingScheduler ();
538+ initRelaxedOrderingRegisters ();
539+
540+ this ->relaxedOrderingInitialized = true ;
541+ }
542+
543+ currentQueueWorkCount++;
544+ dispatchSemaphoreSection (currentQueueWorkCount);
545+
546+ ringStart = submit (gpuStartVa, startSize);
547+
548+ return ringStart;
549+ }
550+
502551template <typename GfxFamily, typename Dispatcher>
503552bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer(bool blocking) {
504553 if (!ringStart) {
@@ -891,46 +940,15 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::copyCommandBufferIntoRing(BatchB
891940 return ret;
892941}
893942
894- template <typename GfxFamily, typename Dispatcher>
895- size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getUllsStateSize() {
896- size_t startSize = 0u ;
897- if (!this ->partitionConfigSet ) {
898- startSize += getSizePartitionRegisterConfigurationSection ();
899- }
900- if (this ->miMemFenceRequired && !this ->systemMemoryFenceAddressSet ) {
901- startSize += getSizeSystemMemoryFenceAddress ();
902- }
903- if (this ->relaxedOrderingEnabled && !this ->relaxedOrderingInitialized ) {
904- startSize += RelaxedOrderingHelper::getSizeRegistersInit<GfxFamily>();
905- }
906- return startSize;
907- }
908-
909- template <typename GfxFamily, typename Dispatcher>
910- void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchUllsState() {
911- if (!this ->partitionConfigSet ) {
912- dispatchPartitionRegisterConfiguration ();
913- this ->partitionConfigSet = true ;
914- }
915- if (this ->miMemFenceRequired && !this ->systemMemoryFenceAddressSet ) {
916- dispatchSystemMemoryFenceAddress ();
917- this ->systemMemoryFenceAddressSet = true ;
918- }
919- if (this ->relaxedOrderingEnabled && !this ->relaxedOrderingInitialized ) {
920- preinitializeRelaxedOrderingSections ();
921- dispatchStaticRelaxedOrderingScheduler ();
922- initRelaxedOrderingRegisters ();
923-
924- this ->relaxedOrderingInitialized = true ;
925- }
926- }
927-
928943template <typename GfxFamily, typename Dispatcher>
929944bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffer &batchBuffer, FlushStampTracker &flushStamp) {
930945 if (batchBuffer.ringBufferRestartRequest ) {
931946 this ->stopRingBuffer (false );
932947 }
933948
949+ if (!this ->startRingBuffer ()) {
950+ return false ;
951+ }
934952 lastSubmittedThrottle = batchBuffer.throttle ;
935953 bool relaxedOrderingSchedulerWillBeNeeded = (this ->relaxedOrderingSchedulerRequired || batchBuffer.hasRelaxedOrderingDependencies );
936954 bool inputRequiredMonitorFence = false ;
@@ -941,7 +959,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
941959 }
942960 bool dispatchMonitorFence = this ->dispatchMonitorFenceRequired (inputRequiredMonitorFence);
943961
944- size_t dispatchSize = this -> getUllsStateSize () + getSizeDispatch (relaxedOrderingSchedulerWillBeNeeded, batchBuffer.hasRelaxedOrderingDependencies , dispatchMonitorFence);
962+ size_t dispatchSize = getSizeDispatch (relaxedOrderingSchedulerWillBeNeeded, batchBuffer.hasRelaxedOrderingDependencies , dispatchMonitorFence);
945963
946964 if (this ->copyCommandBufferIntoRing (batchBuffer)) {
947965 dispatchSize += (batchBuffer.stream ->getUsed () - batchBuffer.startOffset ) - 2 * getSizeStartSection ();
@@ -960,14 +978,8 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
960978 }
961979 }
962980
963- auto needStart = !this ->ringStart ;
964- this ->ringStart = true ;
965- auto startVA = ringCommandStream.getCurrentGpuAddressPosition ();
966-
967981 this ->switchRingBuffersNeeded (requiredMinimalSize, batchBuffer.allocationsForResidency );
968982
969- this ->dispatchUllsState ();
970-
971983 if (this ->relaxedOrderingEnabled && batchBuffer.hasStallingCmds && this ->relaxedOrderingSchedulerRequired ) {
972984 dispatchRelaxedOrderingQueueStall ();
973985 }
@@ -979,10 +991,9 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
979991 void *currentPosition = dispatchWorkloadSection (batchBuffer, dispatchMonitorFence);
980992
981993 cpuCachelineFlush (currentPosition, dispatchSize);
994+ handleResidency ();
982995
983- if (!this ->submitCommandBufferToGpu (needStart, startVA, requiredMinimalSize)) {
984- return false ;
985- }
996+ this ->unblockGpu ();
986997
987998 cpuCachelineFlush (semaphorePtr, MemoryConstants::cacheLineSize);
988999 currentQueueWorkCount++;
@@ -997,17 +1008,6 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
9971008 return ringStart;
9981009}
9991010
1000- template <typename GfxFamily, typename Dispatcher>
1001- bool DirectSubmissionHw<GfxFamily, Dispatcher>::submitCommandBufferToGpu(bool needStart, uint64_t gpuAddress, size_t size) {
1002- if (needStart) {
1003- return this ->submit (gpuAddress, size);
1004- } else {
1005- handleResidency ();
1006- this ->unblockGpu ();
1007- return true ;
1008- }
1009- }
1010-
10111011template <typename GfxFamily, typename Dispatcher>
10121012inline void DirectSubmissionHw<GfxFamily, Dispatcher>::setReturnAddress(void *returnCmd, uint64_t returnAddress) {
10131013 using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
0 commit comments