@@ -34,24 +34,27 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
3434 memcpy (m_addresses->data (), addresses, count * sizeof (value_type));
3535 }
3636
37- // Just does the de-allocation
38- inline void operator ()()
37+ //
38+ inline auto getWorstCaseCount () const {return m_addresses->size ();}
39+
40+ // Just does the de-allocation, note that the parameter is a reference
41+ inline void operator ()(IGPUDescriptorSet::SDropDescriptorSet* &outNullify)
3942 {
40- // isn't assert already debug-only?
4143 #ifdef _NBL_DEBUG
4244 assert (m_composed);
4345 #endif // _NBL_DEBUG
44- m_composed->multi_deallocate (m_binding, m_addresses->size (), m_addresses->data ());
46+ outNullify = m_composed->multi_deallocate (outNullify, m_binding, m_addresses->size (), m_addresses->data ());
47+ m_composed->m_totalDeferredFrees -= getWorstCaseCount ();
4548 }
4649
4750 // Takes count of allocations we want to free up as reference, true is returned if
4851 // the amount of allocations freed was >= allocationsToFreeUp
4952 // False is returned if there are more allocations to free up
50- inline bool operator ()(size_type& allocationsToFreeUp)
53+ inline bool operator ()(size_type& allocationsToFreeUp, IGPUDescriptorSet::SDropDescriptorSet* &outNullify )
5154 {
52- auto prevCount = m_addresses-> size () ;
53- operator ()();
54- auto totalFreed = m_addresses-> size () - prevCount ;
55+ auto prevNullify = outNullify ;
56+ operator ()(outNullify );
57+ auto totalFreed = outNullify-prevNullify ;
5558
5659 // This does the same logic as bool operator()(size_type&) on
5760 // CAsyncSingleBufferSubAllocator
@@ -63,7 +66,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
6366 }
6467 protected:
6568 core::smart_refctd_dynamic_array<value_type> m_addresses;
66- SubAllocatedDescriptorSet* m_composed;
69+ SubAllocatedDescriptorSet* m_composed; // TODO: shouldn't be called `composed`, maybe `parent` or something
6770 uint32_t m_binding;
6871 };
6972 using EventHandler = MultiTimelineEventHandlerST<DeferredFreeFunctor>;
@@ -93,6 +96,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
9396 std::map<uint32_t , SubAllocDescriptorSetRange> m_allocatableRanges = {};
9497 core::smart_refctd_ptr<video::IGPUDescriptorSet> m_descriptorSet;
9598 core::smart_refctd_ptr<video::ILogicalDevice> m_logicalDevice;
99+ value_type m_totalDeferredFrees = 0 ;
96100
97101 #ifdef _NBL_DEBUG
98102 std::recursive_mutex stAccessVerfier;
@@ -141,7 +145,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
141145 m_descriptorSet = std::move (descriptorSet);
142146 }
143147
144- ~SubAllocatedDescriptorSet ()
148+ inline ~SubAllocatedDescriptorSet ()
145149 {
146150 for (uint32_t i = 0 ; i < m_allocatableRanges.size (); i++)
147151 {
@@ -155,9 +159,9 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
155159 }
156160
157161 // whether that binding index can be sub-allocated
158- bool isBindingAllocatable (uint32_t binding) { return m_allocatableRanges.find (binding) != m_allocatableRanges.end (); }
162+ inline bool isBindingAllocatable (uint32_t binding) { return m_allocatableRanges.find (binding) != m_allocatableRanges.end (); }
159163
160- AddressAllocator* getBindingAllocator (uint32_t binding)
164+ inline AddressAllocator* getBindingAllocator (uint32_t binding)
161165 {
162166 auto range = m_allocatableRanges.find (binding);
163167 // Check if this binding has an allocator
@@ -169,36 +173,26 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
169173 // main methods
170174
171175#ifdef _NBL_DEBUG
172- std::unique_lock<std::recursive_mutex> stAccessVerifyDebugGuard ()
176+ inline std::unique_lock<std::recursive_mutex> stAccessVerifyDebugGuard ()
173177 {
174178 std::unique_lock<std::recursive_mutex> tLock (stAccessVerfier,std::try_to_lock_t ());
175179 assert (tLock.owns_lock ());
176180 return tLock;
177181 }
178182#else
179- bool stAccessVerifyDebugGuard () { return false ; }
183+ inline bool stAccessVerifyDebugGuard () { return false ; }
180184#endif
181185
182- video::IGPUDescriptorSet* getDescriptorSet () { return m_descriptorSet.get (); }
186+ inline video::IGPUDescriptorSet* getDescriptorSet () { return m_descriptorSet.get (); }
183187
184188 // ! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value`
185- inline size_type try_multi_allocate (
186- uint32_t binding,
187- size_type count,
188- video::IGPUDescriptorSet::SDescriptorInfo* descriptors,
189- video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites,
190- value_type* outAddresses
191- )
189+ inline size_type try_multi_allocate (const uint32_t binding, const size_type count, value_type* outAddresses) noexcept
192190 {
193191 auto debugGuard = stAccessVerifyDebugGuard ();
194192
193+ // we assume you've validated that the binding is allocatable before trying this
195194 auto allocator = getBindingAllocator (binding);
196195
197- std::vector<video::IGPUDescriptorSet::SWriteDescriptorSet> writes;
198- std::vector<video::IGPUDescriptorSet::SDescriptorInfo> infos;
199- writes.reserve (count);
200- infos.reserve (count);
201-
202196 size_type unallocatedSize = 0u ;
203197 for (size_type i=0 ; i<count; i++)
204198 {
@@ -211,35 +205,13 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
211205 unallocatedSize = count - i;
212206 break ;
213207 }
214-
215- auto & descriptor = descriptors[i];
216-
217- video::IGPUDescriptorSet::SWriteDescriptorSet write;
218- {
219- write.dstSet = m_descriptorSet.get ();
220- write.binding = binding;
221- write.arrayElement = outAddresses[i];
222- write.count = 1u ;
223- // descriptors could be a const pointer, but the problem is that this pointer in
224- // SWriteDescriptorSet.info isn't const
225- // can we change it?
226- write.info = &descriptor;
227- }
228- outDescriptorWrites[i] = write;
229208 }
230209
231210 return unallocatedSize;
232211 }
233212
234213 template <class Clock =typename std::chrono::steady_clock>
235- inline size_type multi_allocate (
236- const std::chrono::time_point<Clock>& maxWaitPoint,
237- uint32_t binding,
238- size_type count,
239- video::IGPUDescriptorSet::SDescriptorInfo* descriptors,
240- video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites,
241- value_type* outAddresses
242- ) noexcept
214+ inline size_type multi_allocate (const std::chrono::time_point<Clock>& maxWaitPoint, const uint32_t binding, const size_type count, value_type* outAddresses) noexcept
243215 {
244216 auto debugGuard = stAccessVerifyDebugGuard ();
245217
@@ -248,76 +220,60 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
248220 if (range == m_allocatableRanges.end ())
249221 return count;
250222
251- auto & eventHandler = range->second .eventHandler ;
252-
253223 // try allocate once
254- size_type unallocatedSize = try_multi_allocate (binding, count, descriptors, outDescriptorWrites, outAddresses);
224+ size_type unallocatedSize = try_multi_allocate (binding,count,outAddresses);
255225 if (!unallocatedSize)
256226 return 0u ;
257227
258228 // then try to wait at least once and allocate
229+ auto & eventHandler = range->second .eventHandler ;
230+ core::vector<IGPUDescriptorSet::SDropDescriptorSet> nulls (m_totalDeferredFrees);
231+ auto outNulls = nulls.data ();
259232 do
260233 {
261- eventHandler.wait (maxWaitPoint, unallocatedSize);
262-
263- unallocatedSize = try_multi_allocate (
264- binding,
265- unallocatedSize,
266- &descriptors[count - unallocatedSize],
267- &outDescriptorWrites[count - unallocatedSize],
268- &outAddresses[count - unallocatedSize]
269- );
234+ eventHandler.wait (maxWaitPoint, unallocatedSize, outNulls);
235+
236+ // always call with the same parameters, otherwise this turns into a mess with the non invalid_address gaps
237+ unallocatedSize = try_multi_allocate (binding,count,outAddresses);
270238 if (!unallocatedSize)
271- return 0u ;
239+ break ;
272240 } while (Clock::now ()<maxWaitPoint);
241+ m_logicalDevice->nullifyDescriptors ({nulls.data (),outNulls});
273242
274243 return unallocatedSize;
275244 }
276245
277- inline size_type multi_allocate (
278- uint32_t binding,
279- size_type count,
280- video::IGPUDescriptorSet::SDescriptorInfo* descriptors,
281- video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites,
282- value_type* outAddresses
283- ) noexcept
246+ // default timeout overload
247+ inline size_type multi_allocate (const uint32_t binding, const size_type count, value_type* outAddresses) noexcept
284248 {
285- auto range = m_allocatableRanges.find (binding);
286- // Check if this binding has an allocator
287- if (range == m_allocatableRanges.end ())
288- return count;
289-
290- return multi_allocate (TimelineEventHandlerBase::default_wait (), binding, count, descriptors, outDescriptorWrites, outAddresses);
249+ // check that the binding is allocatable is done inside anyway
250+ return multi_allocate (TimelineEventHandlerBase::default_wait (), binding, count, outAddresses);
291251 }
292252
293- inline void multi_deallocate (uint32_t binding, size_type count, const size_type* addr)
253+ // Very explicit low level call you'd need to sync and drop descriptors by yourself
254+ // Returns: the one-past the last `outNullify` write pointer, this allows you to work out how many descriptors were freed
255+ inline void multi_deallocate (IGPUDescriptorSet::SDropDescriptorSet* outNullify, uint32_t binding, size_type count, const size_type* addr)
294256 {
295257 auto debugGuard = stAccessVerifyDebugGuard ();
296258
297259 auto allocator = getBindingAllocator (binding);
298- if (!allocator)
299- return ;
260+ if (allocator)
300261 for (size_type i=0 ; i<count; i++)
301262 {
302263 if (addr[i]==AddressAllocator::invalid_address)
303264 continue ;
304265
305266 allocator->free_addr (addr[i],1 );
306- // TODO: should also write something to the descriptor sets
307- // basically if nullDescriptor device feature is enabled, you would
308- // indeed write to the DS, else you'd just drop the refcounted references
309- //
310- // this needs to be done as a IGPUDescriptorSet::nullify(const uint32_t binding,
311- // std::span<uint32_t> indices) function + a virtual nullify_impl
312- video::IGPUDescriptorSet::SDropDescriptorSet dropDescriptorSet;
313- dropDescriptorSet.dstSet = m_descriptorSet.get ();
314- dropDescriptorSet.binding = binding;
315- dropDescriptorSet.arrayElement = i;
316- dropDescriptorSet.count = 1 ;
317- m_logicalDevice->nullifyDescriptors ({ &dropDescriptorSet, 1 });
267+ outNullify->dstSet = m_descriptorSet.get ();
268+ outNullify->binding = binding;
269+ outNullify->arrayElement = i;
270+ outNullify->count = 1 ;
271+ outNullify++;
318272 }
273+ return outNullify;
319274 }
320275
276+ // 100% will defer
321277 inline void multi_deallocate (uint32_t binding, const ISemaphore::SWaitInfo& futureWait, DeferredFreeFunctor&& functor) noexcept
322278 {
323279 auto range = m_allocatableRanges.find (binding);
@@ -327,26 +283,36 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
327283
328284 auto & eventHandler = range->second .eventHandler ;
329285 auto debugGuard = stAccessVerifyDebugGuard ();
286+ m_totalDeferredFrees += functor.getWorstCaseCount ();
330287 eventHandler.latch (futureWait,std::move (functor));
331288 }
332289
290+ // defers based on the conservative estimation if `futureWait` needs to be waited on, if doesn't will call nullify descriiptors internally immediately
333291 inline void multi_deallocate (uint32_t binding, size_type count, const value_type* addr, const ISemaphore::SWaitInfo& futureWait) noexcept
334292 {
335293 if (futureWait.semaphore )
336294 multi_deallocate (binding, futureWait, DeferredFreeFunctor (this , binding, count, addr));
337295 else
338- multi_deallocate (binding, count, addr);
296+ {
297+ core::vector<IGPUDescriptorSet::SDropDescriptorSet> nulls (count);
298+ auto actualEnd = multi_deallocate (nulls.data (), binding, count, addr);
299+ m_logicalDevice->nullifyDescriptors ({nulls.data (),actualEnd});
300+ }
339301 }
302+
340303 // ! Returns free events still outstanding
341304 inline uint32_t cull_frees () noexcept
342305 {
343306 auto debugGuard = stAccessVerifyDebugGuard ();
344307 uint32_t frees = 0 ;
308+ core::vector<IGPUDescriptorSet::SDropDescriptorSet> nulls (m_totalDeferredFrees);
309+ auto outNulls = nulls.data ();
345310 for (uint32_t i = 0 ; i < m_allocatableRanges.size (); i++)
346311 {
347312 auto & it = m_allocatableRanges[i];
348- frees += it.eventHandler .poll ().eventsLeft ;
313+ frees += it.eventHandler .poll (outNulls ).eventsLeft ;
349314 }
315+ m_logicalDevice->nullifyDescriptors ({nulls.data (),outNulls});
350316 return frees;
351317 }
352318};
0 commit comments