diff --git a/framework/audio/common/CMakeLists.txt b/framework/audio/common/CMakeLists.txt index 14ff38cce5..ca9237da11 100644 --- a/framework/audio/common/CMakeLists.txt +++ b/framework/audio/common/CMakeLists.txt @@ -33,6 +33,13 @@ target_sources(muse_audio_common PRIVATE iaudiothreadsecurer.h audiothreadsecurer.cpp audiothreadsecurer.h + audioworkgroup.h + audioworkgroup.cpp + audiotaskscheduler.h + iaudiotaskscheduler.h + realtimethreadpool.h + concurrentqueue.h + lightweightsemaphore.h workmode.cpp workmode.h alignmentbuffer.h diff --git a/framework/audio/common/audiotaskscheduler.h b/framework/audio/common/audiotaskscheduler.h new file mode 100644 index 0000000000..dac2c2716f --- /dev/null +++ b/framework/audio/common/audiotaskscheduler.h @@ -0,0 +1,97 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only + * MuseScore-CLA-applies + * + * MuseScore + * Music Composition & Notation + * + * Copyright (C) 2026 MuseScore Limited and others + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include "audioworkgroup.h" +#include "realtimethreadpool.h" +#include "iaudiodriver.h" +#include "iaudiotaskscheduler.h" +#include "audiosanitizer.h" +#include "global/async/asyncable.h" +#include "global/log.h" +#include + +namespace muse::audio { +class AudioTaskScheduler : public IAudioTaskScheduler, public muse::async::Asyncable +{ + constexpr static const char* threadpoolName = "audio_realtime_thread"; +public: + void submitRealtimeTasksAndWait(const std::vector& tasks) override + { + std::lock_guard lock(m_threadPoolMutex); + for (const auto& task : tasks) { + IF_ASSERT_FAILED(m_threadPool->enqueue(task)) { + task(); + } + } + m_threadPool->participateAndWait(); + } + + void setAudioDriver(const IAudioDriverPtr& audioDriver) + { + if (!audioDriver) { + return; + } + setWorkgroup(audioDriver->getAudioWorkGroup()); + audioDriver->currentWorkgroupChanged().onNotify( + this, [this, audioDriverWeak = std::weak_ptr(audioDriver)]() { + if (auto audioDriver = audioDriverWeak.lock()) { + setWorkgroup(audioDriver->getAudioWorkGroup()); + } + }); + } + +private: + + void setWorkgroup(const AudioWorkGroup& workGroup) + { + std::lock_guard lock(m_threadPoolMutex); + ensureThreadPoolSize(workGroup); + m_threadPool->setAudioWorkgroup(workGroup); + } + + int getIdealThreadCount(AudioWorkGroup workGroup) const + { + int bestThreadHint = std::thread::hardware_concurrency(); + if (workGroup.getProvider() != nullptr) { + bestThreadHint = workGroup.getMaxParallelThreadCount(); + } + constexpr int hardwareToRealtimeRatio = 2; // This is a heuristic value. The optimal value may vary depending on the workload and system. + return bestThreadHint > 0 ? static_cast(bestThreadHint / hardwareToRealtimeRatio) : 1; + } + + void ensureThreadPoolSize(const AudioWorkGroup& currentWorkGroup) + { + auto idealWorkerCount = getIdealThreadCount(currentWorkGroup); + std::lock_guard lock(m_threadPoolMutex); + if (m_threadPool->getNumberOfWorkers() != idealWorkerCount) { + m_threadPool = std::make_unique(threadpoolName, idealWorkerCount); + AudioSanitizer::setMixerThreads(m_threadPool->threadIdSet()); + } + } + + std::unique_ptr m_threadPool{ std::make_unique(threadpoolName, getIdealThreadCount({})) }; + + std::recursive_mutex m_threadPoolMutex; +}; +} diff --git a/framework/audio/common/audioworkgroup.cpp b/framework/audio/common/audioworkgroup.cpp new file mode 100644 index 0000000000..2ccd009822 --- /dev/null +++ b/framework/audio/common/audioworkgroup.cpp @@ -0,0 +1,244 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only + * MuseScore-CLA-applies + * + * MuseScore + * Music Composition & Notation + * + * Copyright (C) 2026 MuseScore Limited and others + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "audioworkgroup.h" + +#include +#include + +#ifdef __APPLE__ +#include +#include +#else +#include +#endif + +namespace muse::audio { +#ifdef __APPLE__ +class AudioWorkgroupTokenProvider +{ +public: + AudioWorkgroupTokenProvider(os_workgroup_t workgroup) + : m_workgroup(workgroup) + { + if (!m_workgroup) { + return; + } + + if (__builtin_available(macOS 11.0, *)) { + os_retain(m_workgroup); + auto status = os_workgroup_join(m_workgroup, &m_joinToken); + if (status == 0) { + return; + } + + os_release(m_workgroup); + m_workgroup = nullptr; + m_joinToken = {}; + return; + } + + m_workgroup = nullptr; + } + + bool isAttachedTo(os_workgroup_t wg) const { return m_workgroup == wg; } + bool isValid() const { return m_workgroup != nullptr; } + + AudioWorkgroupTokenProvider(const AudioWorkgroupTokenProvider& other) = delete; + + AudioWorkgroupTokenProvider& operator=(const AudioWorkgroupTokenProvider&) = delete; + + AudioWorkgroupTokenProvider(AudioWorkgroupTokenProvider&& other) noexcept + : m_workgroup(other.m_workgroup), m_joinToken(other.m_joinToken) + { + other.m_workgroup = nullptr; + other.m_joinToken = {}; + } + + ~AudioWorkgroupTokenProvider() + { + leave(); + } + +private: + void leave() noexcept + { + if (!m_workgroup) { + return; + } + + if (__builtin_available(macOS 11.0, *)) { + os_workgroup_leave(m_workgroup, &m_joinToken); + } + os_release(m_workgroup); + m_workgroup = nullptr; + m_joinToken = {}; + } + + os_workgroup_t m_workgroup; + os_workgroup_join_token_s m_joinToken; +}; + +class AudioWorkgroupProvider +{ +public: + explicit AudioWorkgroupProvider(os_workgroup_t wg) + : m_workgroup(wg) + { + os_retain(m_workgroup); + } + + ~AudioWorkgroupProvider() + { + if (m_workgroup) { + os_release(m_workgroup); + } + } + + AudioWorkgroupProvider(const AudioWorkgroupProvider& other) + : m_workgroup(other.m_workgroup) + { + os_retain(m_workgroup); + } + + AudioWorkgroupProvider& operator=(const AudioWorkgroupProvider& other) + { + if (this != &other) { + os_retain(other.m_workgroup); + if (m_workgroup) { + os_release(m_workgroup); + } + m_workgroup = other.m_workgroup; + } + return *this; + } + + AudioWorkgroupProvider(AudioWorkgroupProvider&& other) noexcept + : m_workgroup(other.m_workgroup) + { + other.m_workgroup = nullptr; + } + + bool join(AudioWorkgroupToken& tokenProvider) const + { + if (auto existingProvider = AudioWorkGroup::providerFor(tokenProvider); + existingProvider != nullptr && existingProvider->isAttachedTo(m_workgroup)) { + return true; + } + AudioWorkGroup::resetProviderFor(tokenProvider); + AudioWorkgroupTokenProvider provider(m_workgroup); + if (!provider.isValid()) { + return false; + } + + AudioWorkGroup::setProviderFor(tokenProvider, [provider = std::move(provider)]() { + return &provider; + }); + return true; + } + + size_t getMaxParallelThreadCount() const + { + if (__builtin_available(macOS 11.0, *)) { + return (size_t)os_workgroup_max_parallel_threads(m_workgroup, nullptr); + } + return 0; + } + +private: + + os_workgroup_t m_workgroup; +}; + +bool AudioWorkGroup::join(AudioWorkgroupToken& token) +{ + auto provider = getProvider(); + if (!provider) { + return false; + } + return provider->join(token); +} + +AudioWorkGroup makeAudioWorkgroup(void* opaqueHandle) +{ + if (opaqueHandle == nullptr) { + return {}; + } + + os_workgroup_t handle = reinterpret_cast(opaqueHandle); + + return AudioWorkGroup { std::make_unique(handle) }; +} + +size_t AudioWorkGroup::getMaxParallelThreadCount() const +{ + if (auto provider = getProvider(); provider) { + return provider->getMaxParallelThreadCount(); + } + return 0; +} + +#else + +class AudioWorkgroupProvider +{ +}; + +size_t AudioWorkGroup::getMaxParallelThreadCount() const +{ + return std::thread::hardware_concurrency(); +} + +bool AudioWorkGroup::join(AudioWorkgroupToken&) +{ + return false; +} + +AudioWorkGroup makeAudioWorkgroup(void*) +{ + return {}; +} + +#endif + +AudioWorkGroup::AudioWorkGroup() = default; + +AudioWorkGroup::AudioWorkGroup(std::unique_ptr provider) + : m_provider(std::move(provider)) {} + +AudioWorkGroup::~AudioWorkGroup() = default; + +AudioWorkGroup::AudioWorkGroup(const AudioWorkGroup& other) + : m_provider(other.m_provider ? std::make_unique(*other.m_provider) : nullptr) {} + +AudioWorkGroup::AudioWorkGroup(AudioWorkGroup&& other) noexcept = default; + +AudioWorkGroup& AudioWorkGroup::operator=(const AudioWorkGroup& other) +{ + if (this != &other) { + m_provider = other.m_provider ? std::make_unique(*other.m_provider) : nullptr; + } + return *this; +} + +AudioWorkGroup& AudioWorkGroup::operator=(AudioWorkGroup&& other) noexcept = default; +} // namespace muse::audio diff --git a/framework/audio/common/audioworkgroup.h b/framework/audio/common/audioworkgroup.h new file mode 100644 index 0000000000..6f19b002cf --- /dev/null +++ b/framework/audio/common/audioworkgroup.h @@ -0,0 +1,147 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only + * MuseScore-CLA-applies + * + * MuseScore + * Music Composition & Notation + * + * Copyright (C) 2026 MuseScore Limited and others + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include +#include +#include +#include +#include + +#include "global/functional/inplace_function_mv.h" + +namespace muse::audio { +class AudioWorkgroupTokenProvider; + +using ErasedAudioWorkgroupTokenProvider = muse::functional::MoveOnlyInplaceFunction; + +class AudioWorkgroupToken +{ +public: + AudioWorkgroupToken() = default; + AudioWorkgroupToken(const AudioWorkgroupToken&) = delete; + AudioWorkgroupToken& operator=(const AudioWorkgroupToken&) = delete; + AudioWorkgroupToken(AudioWorkgroupToken&& other) noexcept + { + moveFrom(std::move(other)); + } + + AudioWorkgroupToken& operator=(AudioWorkgroupToken&& other) noexcept + { + if (this != &other) { + reset(); + moveFrom(std::move(other)); + } + + return *this; + } + + ~AudioWorkgroupToken() + { + reset(); + } + +private: + friend class AudioWorkGroup; + + explicit AudioWorkgroupToken(ErasedAudioWorkgroupTokenProvider provider) + { + setProvider(std::move(provider)); + } + + const AudioWorkgroupTokenProvider* provider() const + { + assertCurrentThread(); + return m_provider ? m_provider() : nullptr; + } + + void setProvider(ErasedAudioWorkgroupTokenProvider workgroup) + { + reset(); + m_provider = std::move(workgroup); + if (m_provider) { + m_threadId = std::this_thread::get_id(); + } + } + + void reset() + { + assertCurrentThread(); + m_provider = nullptr; + m_threadId = {}; + } + + void moveFrom(AudioWorkgroupToken&& other) noexcept + { + other.assertCurrentThread(); + m_provider = std::move(other.m_provider); + m_threadId = other.m_threadId; + other.m_threadId = {}; + } + + void assertCurrentThread() const + { + assert((!m_provider || m_threadId == std::this_thread::get_id()) + && "AudioWorkgroupToken must be used on the thread that joined the audio workgroup"); + } + + ErasedAudioWorkgroupTokenProvider m_provider; + std::thread::id m_threadId; +}; + +class AudioWorkgroupProvider; + +class AudioWorkGroup +{ +public: + AudioWorkGroup(); + AudioWorkGroup(const AudioWorkGroup&); + AudioWorkGroup(AudioWorkGroup&&) noexcept; + AudioWorkGroup& operator=(const AudioWorkGroup&); + AudioWorkGroup& operator=(AudioWorkGroup&&) noexcept; + ~AudioWorkGroup(); + + bool join(AudioWorkgroupToken& token); + + const AudioWorkgroupProvider* getProvider() const { return m_provider.get(); } + + size_t getMaxParallelThreadCount() const; + +private: + friend class AudioWorkgroupProvider; + friend AudioWorkGroup makeAudioWorkgroup(void* opaqueHandle); + + explicit AudioWorkGroup(std::unique_ptr provider); + + static const AudioWorkgroupTokenProvider* providerFor(const AudioWorkgroupToken& token) { return token.provider(); } + static void setProviderFor(AudioWorkgroupToken& token, ErasedAudioWorkgroupTokenProvider provider) + { + token.setProvider(std::move(provider)); + } + + static void resetProviderFor(AudioWorkgroupToken& token) { token.reset(); } + + std::unique_ptr m_provider; +}; + +AudioWorkGroup makeAudioWorkgroup(void* opaqueHandle); +} // namespace muse::audio diff --git a/framework/audio/common/concurrentqueue.h b/framework/audio/common/concurrentqueue.h new file mode 100644 index 0000000000..8b64d63cc3 --- /dev/null +++ b/framework/audio/common/concurrentqueue.h @@ -0,0 +1,20 @@ +#pragma once + +#if defined(__MACH__) +#include + +#pragma push_macro("integer_t") +#define integer_t ::integer_t +#endif + +#include "../thirdparty/moodycamel/blockingconcurrentqueue.h" + +#if defined(__MACH__) +#pragma pop_macro("integer_t") +#endif + +namespace muse { +template +using BlockingConcurrentQueue = moodycamel::BlockingConcurrentQueue; +} diff --git a/framework/audio/common/iaudiotaskscheduler.h b/framework/audio/common/iaudiotaskscheduler.h new file mode 100644 index 0000000000..29686b6690 --- /dev/null +++ b/framework/audio/common/iaudiotaskscheduler.h @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only + * MuseScore-CLA-applies + * + * MuseScore + * Music Composition & Notation + * + * Copyright (C) 2026 MuseScore Limited and others + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include "global/modularity/imoduleinterface.h" +#include "global/functional/inplace_function.h" +#include +#include + +namespace muse::audio { +class IAudioTaskScheduler : MODULE_GLOBAL_INTERFACE +{ + INTERFACE_ID(IAudioTaskScheduler) +public: + using Task = muse::functional::inplace_function; + virtual void submitRealtimeTasksAndWait(const std::vector& tasks) = 0; +}; + +using IAudioTaskSchedulerPtr = std::shared_ptr; +} diff --git a/framework/audio/common/lightweightsemaphore.h b/framework/audio/common/lightweightsemaphore.h new file mode 100644 index 0000000000..0d0557d1a6 --- /dev/null +++ b/framework/audio/common/lightweightsemaphore.h @@ -0,0 +1,7 @@ +#pragma once + +#include "../thirdparty/moodycamel/lightweightsemaphore.h" + +namespace muse { +using LightweightSemaphore = moodycamel::LightweightSemaphore; +} diff --git a/framework/audio/common/realtimethreadpool.h b/framework/audio/common/realtimethreadpool.h new file mode 100644 index 0000000000..6605d0e887 --- /dev/null +++ b/framework/audio/common/realtimethreadpool.h @@ -0,0 +1,170 @@ +#pragma once + +#include "audioworkgroup.h" +#include "concurrency/threadutils.h" +#include "runtime.h" +#include "concurrentqueue.h" +#include "global/functional/inplace_function.h" +#include "lightweightsemaphore.h" +#include "thirdparty/kors_logger/src/log_base.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace muse::audio { +class RealtimeThreadPool +{ + struct InflightSemaphoreRelease { + explicit InflightSemaphoreRelease(muse::LightweightSemaphore& semaphore) + : m_semaphore(semaphore) {} + + InflightSemaphoreRelease(const InflightSemaphoreRelease&) = delete; + InflightSemaphoreRelease& operator=(const InflightSemaphoreRelease&) = delete; + + ~InflightSemaphoreRelease() noexcept + { + m_semaphore.signal(); + } + + muse::LightweightSemaphore& m_semaphore; + }; + +public: + static constexpr int maxTaskCount = 10000; + using Task = muse::functional::inplace_function; + RealtimeThreadPool( + std::string name, + int num_of_workers = std::thread::hardware_concurrency()) + { + num_of_workers = std::max(0, num_of_workers); + m_workers.reserve(num_of_workers); + try { + for (size_t i = 0; i < static_cast(num_of_workers); ++i) { + auto worker = std::make_unique(); + Worker* workerPtr = worker.get(); + const size_t workerIndex = i; + worker->m_thread = std::make_unique( + [this, workerPtr, workerIndex, name] { + muse::runtime::setThreadName(name + "_" + std::to_string(workerIndex)); + AudioWorkgroupToken workgroupToken; + for (;;) { + Task task; + + m_queue.wait_dequeue(task); + { + std::lock_guard lock(workerPtr->m_workgroupMutex); + workerPtr->m_workgroup.join(workgroupToken); + } + + if (this->m_should_stop) { + return; + } + + InflightSemaphoreRelease release(m_inflightSemaphore); + task(); + } + }); + m_workers.push_back(std::move(worker)); + muse::setThreadPriority(*m_workers.back()->m_thread, ThreadPriority::High); + } + } catch (...) { + terminate(); + throw; + } + } + + void setAudioWorkgroup(muse::audio::AudioWorkGroup audioworkgroup) + { + for (auto& worker : m_workers) { + std::lock_guard lock(worker->m_workgroupMutex); + worker->m_workgroup = audioworkgroup; + } + } + + bool enqueue(const Task& func) + { + m_inflightSemaphore.wait(); + if (!m_queue.enqueue(func)) { + m_inflightSemaphore.signal(); + return false; + } + return true; + } + + void participateAndWait() + { + Task task; + while (m_queue.try_dequeue(task)) { + InflightSemaphoreRelease release(m_inflightSemaphore); + task(); + } + waitUntilFinished(); + m_inflightSemaphore.signal(maxTaskCount); + } + + std::set threadIdSet() const + { + std::set result; + + for (const auto& worker : m_workers) { + result.insert(worker->m_thread->get_id()); + } + + return result; + } + + int getNumberOfWorkers() const + { + return static_cast(m_workers.size()); + } + + ~RealtimeThreadPool() + { + terminate(); + } + +private: + void waitUntilFinished() + { + auto actuallyAwaited = m_inflightSemaphore.waitMany(maxTaskCount); + for (size_t i = 0; + i < static_cast(maxTaskCount - actuallyAwaited); ++i) { + m_inflightSemaphore.wait(); + } + } + + void terminate() + { + m_should_stop = true; + for (size_t i = 0; i < m_workers.size(); ++i) { + IF_ASSERT_FAILED(m_queue.enqueue([] {})) { + // this is _extremely_ unlikely to fail. But if it does the threads would hang indefinitely. + std::terminate(); + } + } + + for (auto& worker : m_workers) { + if (worker->m_thread && worker->m_thread->joinable()) { + worker->m_thread->join(); + } + } + } + + struct Worker { + std::unique_ptr m_thread; + AudioWorkGroup m_workgroup; + std::mutex m_workgroupMutex; + }; + + muse::BlockingConcurrentQueue m_queue; + std::vector > m_workers; + muse::LightweightSemaphore m_inflightSemaphore { maxTaskCount }; + std::atomic m_should_stop{ false }; +}; // namespace muse::audio +} // namespace muse::audio diff --git a/framework/audio/driver/CMakeLists.txt b/framework/audio/driver/CMakeLists.txt index 950c7086bc..6bc44af87e 100644 --- a/framework/audio/driver/CMakeLists.txt +++ b/framework/audio/driver/CMakeLists.txt @@ -85,10 +85,13 @@ elseif(OS_IS_MAC) target_sources(muse_audio_driver PRIVATE platform/osx/osxaudiodriver.mm platform/osx/osxaudiodriver.h + platform/osx/osxdirectaudiodriver.mm + platform/osx/osxdirectaudiodriver.h ) - + set_source_files_properties( platform/osx/osxaudiodriver.mm + platform/osx/osxdirectaudiodriver.mm PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON SKIP_PRECOMPILE_HEADERS ON diff --git a/framework/audio/driver/platform/osx/osxdirectaudiodriver.h b/framework/audio/driver/platform/osx/osxdirectaudiodriver.h new file mode 100644 index 0000000000..215d3bedd8 --- /dev/null +++ b/framework/audio/driver/platform/osx/osxdirectaudiodriver.h @@ -0,0 +1,93 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only + * MuseScore-CLA-applies + * + * MuseScore + * Music Composition & Notation + * + * Copyright (C) 2026 MuseScore Limited and others + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef MUSE_AUDIO_OSXDIRECTAUDIODRIVER_H +#define MUSE_AUDIO_OSXDIRECTAUDIODRIVER_H + +#include +#include +#include +#include + +#include + +#include "common/audioworkgroup.h" +#include "iaudiodriver.h" + +struct AudioTimeStamp; +struct AudioQueueBuffer; +struct OpaqueAudioQueue; + +namespace muse::audio { +class OSXDirectAudioDriver : public IAudioDriver +{ +public: + OSXDirectAudioDriver(); + ~OSXDirectAudioDriver(); + + void init() override; + + std::string name() const override; + + AudioDeviceID defaultDevice() const override; + + bool open(const Spec& spec, Spec* activeSpec) override; + void close() override; + bool isOpened() const override; + + const Spec& activeSpec() const override; + async::Channel activeSpecChanged() const override; + + AudioDeviceList availableOutputDevices() const override; + async::Notification availableOutputDevicesChanged() const override; + void updateDeviceMap(); + + std::vector availableOutputDeviceBufferSizes() const override; + std::vector availableOutputDeviceSampleRates() const override; + + AudioWorkGroup getAudioWorkGroup() const override; + async::Notification currentWorkgroupChanged() const override; + + struct Data; + +private: + static void logError(const std::string message, OSStatus error); + + void initDeviceMapListener(); + void removeDeviceMapListener(); + void doClose(); + + std::optional getAudioDeviceId(const AudioDeviceID& deviceId) const; + + UInt32 osxDeviceId() const; + + std::unique_ptr m_data = nullptr; + async::Channel m_activeSpecChanged; + std::map m_outputDevices = {}, m_inputDevices = {}; + mutable std::mutex m_devicesMutex; + async::Notification m_availableOutputDevicesChanged; + async::Notification m_currentWorkgroupChanged; + + AudioWorkGroup m_audioWorkGroup; + bool m_deviceMapListenerRegistered = false; +}; +} +#endif // MUSE_AUDIO_OSXDIRECTAUDIODRIVER_H diff --git a/framework/audio/driver/platform/osx/osxdirectaudiodriver.mm b/framework/audio/driver/platform/osx/osxdirectaudiodriver.mm new file mode 100644 index 0000000000..fa2423569d --- /dev/null +++ b/framework/audio/driver/platform/osx/osxdirectaudiodriver.mm @@ -0,0 +1,1004 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only + * MuseScore-CLA-applies + * + * MuseScore + * Music Composition & Notation + * + * Copyright (C) 2026 MuseScore Limited and others + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "osxdirectaudiodriver.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "common/audiotypes.h" +#include "common/audioworkgroup.h" +#include "translation.h" +#include "log.h" + +typedef AudioDeviceID OSXAudioDeviceID; + +using namespace muse; +using namespace muse::audio; + +struct ChannelBufferDetails { + int streamNumber; + unsigned int hardwareChannelNumber; + unsigned int dataOffsetSamples; + unsigned int dataStrideSamples; +}; + +struct OSXDirectAudioDriver::Data { + Spec format; + AudioDeviceIOProcID procId{}; + bool canBeDirectlyMapped = false; + + OSXAudioDeviceID deviceId{}; + + std::atomic stopPending{ false }; + std::atomic stopped{ false }; + + std::vector channelBufferOutputDetails; + std::vector outBuffer; + + Data& operator=(const Data& other) + { + format = other.format; + procId = other.procId; + canBeDirectlyMapped = other.canBeDirectlyMapped; + deviceId = other.deviceId; + stopPending = other.stopPending.load(); + stopped = other.stopped.load(); + channelBufferOutputDetails = other.channelBufferOutputDetails; + outBuffer = other.outBuffer; + return *this; + } + + void clear() + { + *this = Data(); + } +}; +namespace muse::audio { +AudioWorkGroup makeAudioWorkgroup(void* opaqueHandle); +AudioWorkGroup OSXDirectAudioDriver::getAudioWorkGroup() const +{ + return m_audioWorkGroup; +} + +async::Notification OSXDirectAudioDriver::currentWorkgroupChanged() const +{ + return m_currentWorkgroupChanged; +} +} // namespace muse::audio + +OSXDirectAudioDriver::OSXDirectAudioDriver() + : m_data(std::make_unique()) +{ + initDeviceMapListener(); + updateDeviceMap(); +} + +OSXDirectAudioDriver::~OSXDirectAudioDriver() +{ + removeDeviceMapListener(); + doClose(); +} + +void OSXDirectAudioDriver::init() +{ +} + +std::string OSXDirectAudioDriver::name() const +{ + return "OSX"; +} + +muse::audio::AudioDeviceID OSXDirectAudioDriver::defaultDevice() const +{ + return DEFAULT_DEVICE_ID; +} + +static uint32_t outputBufferSamplesPerChannel(const AudioBuffer& buffer) +{ + if (buffer.mNumberChannels == 0) { + return 0; + } + + return buffer.mDataByteSize / (buffer.mNumberChannels * sizeof(float)); +} + +static uint32_t callbackSamplesPerChannel(const AudioBufferList* outputData, + const std::vector& details) +{ + std::optional result; + + for (const ChannelBufferDetails& detail : details) { + if (detail.streamNumber < 0 || static_cast(detail.streamNumber) >= outputData->mNumberBuffers) { + continue; + } + + const AudioBuffer& outputBuffer = outputData->mBuffers[detail.streamNumber]; + const uint32_t frames = outputBufferSamplesPerChannel(outputBuffer); + if (frames == 0) { + continue; + } + + result = result.has_value() ? std::min(result.value(), frames) : frames; + } + + return result.value_or(0); +} + +static std::optional getSameRequestedSampleCount(const AudioBufferList* outOutputData, + const std::vector& details) +{ + std::optional sampleCount; + + for (const ChannelBufferDetails& detail : details) { + if (detail.streamNumber < 0 || static_cast(detail.streamNumber) >= outOutputData->mNumberBuffers) { + continue; + } + + const AudioBuffer& outputBuffer = outOutputData->mBuffers[detail.streamNumber]; + const uint32_t frames = outputBufferSamplesPerChannel(outputBuffer); + if (frames == 0) { + continue; + } + + if (!sampleCount.has_value()) { + sampleCount = frames; + } else if (sampleCount.value() != frames) { + return std::nullopt; + } + } + + return sampleCount; +} + +//TODO: make use of timing parameters +static int coreAudioIOProc(AudioObjectID /* inDevice*/, + const AudioTimeStamp* /* inNow */, + const AudioBufferList* /* inInputData */, + const AudioTimeStamp* /* inInputTime */, + AudioBufferList* outOutputData, + const AudioTimeStamp* /* inOutputTime */, + void* __nullable inClientData) +{ + auto* data = reinterpret_cast(inClientData); + if (data->stopPending) { + AudioDeviceStop(data->deviceId, data->procId); + data->stopPending = false; + data->stopped = true; + return noErr; + } + if (data->channelBufferOutputDetails.empty()) { + return noErr; + } + + uint32_t samplesPerChannel = callbackSamplesPerChannel(outOutputData, data->channelBufferOutputDetails); + if (samplesPerChannel == 0) { + return noErr; + } + + const uint32_t callbackDataStride = data->format.output.audioChannelCount; + int dataSize = static_cast(samplesPerChannel * callbackDataStride * sizeof(float)); + if (data->canBeDirectlyMapped && getSameRequestedSampleCount(outOutputData, + data->channelBufferOutputDetails).value_or(0) == samplesPerChannel) { + data->format.callback( + (uint8_t*)outOutputData + ->mBuffers[data->channelBufferOutputDetails[0].streamNumber] + .mData, + dataSize); + return 0; + } + + if (data->outBuffer.size() < static_cast(samplesPerChannel) * callbackDataStride) { + samplesPerChannel = static_cast(data->outBuffer.size() / callbackDataStride); + dataSize = static_cast(samplesPerChannel * callbackDataStride * sizeof(float)); + } + + data->format.callback((uint8_t*)data->outBuffer.data(), dataSize); + + for (int ch = 0; ch < (int)data->channelBufferOutputDetails.size(); ++ch) { + const auto& details = data->channelBufferOutputDetails[ch]; + auto& outputBuffer = outOutputData->mBuffers[details.streamNumber]; + int stride = details.dataStrideSamples; + if (stride == 0) { + continue; + } + + const uint64_t requiredSamples = details.dataOffsetSamples + + static_cast(samplesPerChannel - 1) * stride + + 1; + if (outputBuffer.mDataByteSize < (requiredSamples * sizeof(float))) { + // this is very unexpected. Should always be the same as samplesPerChannel + continue; + } + + float* src = data->outBuffer.data() + ch; + float* dest = static_cast(outputBuffer.mData) + + details.dataOffsetSamples; + for (int j = samplesPerChannel; --j >= 0;) { + *dest = *src; + dest += stride; + src += callbackDataStride; + } + } + return 0; +} + +std::optional > getPreferredStereoHardwareChannels(OSXAudioDeviceID deviceId) +{ + AudioObjectPropertyAddress addr{ + kAudioDevicePropertyPreferredChannelsForStereo, + kAudioDevicePropertyScopeOutput, kAudioObjectPropertyElementMaster }; + UInt32 stereo[2] = {}; + UInt32 size = sizeof(stereo); + + if (AudioObjectGetPropertyData(deviceId, &addr, 0, nullptr, &size, stereo) != noErr) { + return std::nullopt; + } + return std::array { stereo[0], stereo[1] }; +} + +bool filterAndReorderStereoHardwareChannels(const OSXAudioDeviceID& deviceId, + std::vector& details) +{ + auto stereoChannels = getPreferredStereoHardwareChannels(deviceId); + if (!stereoChannels.has_value()) { + return false; + } + details.erase( + std::remove_if(details.begin(), details.end(), + [&stereoChannels](const ChannelBufferDetails& details) { + return details.hardwareChannelNumber + != stereoChannels->at(0) + && details.hardwareChannelNumber + != stereoChannels->at(1); + }), + details.end()); + std::sort(details.begin(), details.end(), + [&stereoChannels](const ChannelBufferDetails& a, + const ChannelBufferDetails& b) { + auto aIndex = (a.hardwareChannelNumber + == stereoChannels->at(0)) + ? 0 + : 1; + auto bIndex = (b.hardwareChannelNumber + == stereoChannels->at(0)) + ? 0 + : 1; + return aIndex < bIndex; + }); + return true; +} + +std::vector getChannelBufferDetails(const OSXAudioDeviceID& deviceId, + const std::function& onError) +{ + std::vector result; + + UInt32 size{}; + AudioObjectPropertyAddress address = { kAudioDevicePropertyStreamConfiguration, + kAudioDevicePropertyScopeOutput, + kAudioObjectPropertyElementMaster }; + if (auto status = AudioObjectGetPropertyDataSize(deviceId, &address, 0, nullptr, &size); status != noErr) { + onError("Failed to get device " + std::to_string(deviceId) + " stream configuration" + ", err: " + std::to_string(status), status); + return result; + } + + std::vector buf(size); + + auto status = AudioObjectGetPropertyData(deviceId, &address, 0, nullptr, + &size, buf.data()); + if (status != noErr) { + onError("Failed to get device " + std::to_string(deviceId) + " stream configuration" + ", err: " + std::to_string(status), status); + return result; + } + + AudioBufferList* bufList = reinterpret_cast(buf.data()); + + const int numStreams = static_cast(bufList->mNumberBuffers); + unsigned int hardwareChannelNumber = 1; + + for (int i = 0; i < numStreams; ++i) { + auto& b = bufList->mBuffers[i]; + for (unsigned int j = 0; j < b.mNumberChannels; ++j) { + ChannelBufferDetails details { + .streamNumber = i, + .hardwareChannelNumber = hardwareChannelNumber++, + .dataOffsetSamples = j, + .dataStrideSamples + =b.mNumberChannels }; + result.push_back(details); + } + } + + return result; +} + +static bool isSupportedCallbackFormat(const AudioStreamBasicDescription& format) +{ + const bool isNativeEndian = (format.mFormatFlags & kAudioFormatFlagIsBigEndian) == kAudioFormatFlagsNativeEndian; + const bool isNativeFloatPacked = (format.mFormatFlags & kAudioFormatFlagsNativeFloatPacked) == kAudioFormatFlagsNativeFloatPacked; + + return format.mFormatID == kAudioFormatLinearPCM + && isNativeEndian + && isNativeFloatPacked + && format.mBitsPerChannel == sizeof(float) * 8 + && format.mFramesPerPacket == 1; +} + +static bool validateOutputStreamFormats(OSXAudioDeviceID deviceId, + const std::vector& details, + const std::function& logError) +{ + AudioObjectPropertyAddress streamsAddress { + kAudioDevicePropertyStreams, + kAudioDevicePropertyScopeOutput, + kAudioObjectPropertyElementMaster + }; + + UInt32 streamsSize = 0; + OSStatus status = AudioObjectGetPropertyDataSize(deviceId, &streamsAddress, 0, nullptr, &streamsSize); + if (status != noErr) { + logError("Failed to get device " + std::to_string(deviceId) + " output streams size, err: ", status); + return false; + } + + std::vector streamIds(streamsSize / sizeof(AudioStreamID)); + status = AudioObjectGetPropertyData(deviceId, &streamsAddress, 0, nullptr, &streamsSize, streamIds.data()); + if (status != noErr) { + logError("Failed to get device " + std::to_string(deviceId) + " output streams, err: ", status); + return false; + } + + std::vector checkedStreams; + for (const ChannelBufferDetails& detail : details) { + if (std::find(checkedStreams.cbegin(), checkedStreams.cend(), detail.streamNumber) != checkedStreams.cend()) { + continue; + } + + if (detail.streamNumber < 0 || static_cast(detail.streamNumber) >= streamIds.size()) { + LOGE() << "CoreAudio output stream index " << detail.streamNumber + << " is outside stream list for device " << deviceId; + return false; + } + + AudioStreamBasicDescription format {}; + UInt32 formatSize = sizeof(format); + AudioObjectPropertyAddress formatAddress { + kAudioStreamPropertyVirtualFormat, + kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster + }; + + status = AudioObjectGetPropertyData(streamIds[detail.streamNumber], &formatAddress, 0, nullptr, &formatSize, &format); + if (status != noErr) { + logError("Failed to get device " + std::to_string(deviceId) + " output stream format, err: ", status); + return false; + } + + if (!isSupportedCallbackFormat(format)) { + LOGE() << "Unsupported CoreAudio output stream format for device " << deviceId + << ", stream " << detail.streamNumber + << ": expected native packed 32-bit float PCM"; + return false; + } + + checkedStreams.push_back(detail.streamNumber); + } + + return true; +} + +inline static bool canBeDirectlyMapped(const std::vector& details, + uint32_t callbackDataStride) +{ + std::optional firstStreamNumber; + std::optional expectedOffset; + for (int channel = 0; channel < (int)details.size(); ++channel) { + const auto& detail = details[channel]; + if (detail.dataStrideSamples != callbackDataStride) { + return false; + } + if (!firstStreamNumber.has_value()) { + firstStreamNumber = detail.streamNumber; + } + if (firstStreamNumber.value() != detail.streamNumber) { + return false; + } + if (!expectedOffset.has_value()) { + expectedOffset = detail.dataOffsetSamples; + } + if (expectedOffset.value() != detail.dataOffsetSamples) { + return false; + } + *expectedOffset += 1; + } + return true; +} + +static std::vector getFittingChannelStreamsFromDevice(OSXAudioDeviceID deviceId, int channelCount, + const std::function& logError) +{ + auto outputStreamInfos = getChannelBufferDetails(deviceId, logError); + if (outputStreamInfos.size() + < (unsigned int)channelCount) { + logError("Not enough channels are available with current device", noErr); + return {}; + } + if (channelCount == 2) { + filterAndReorderStereoHardwareChannels(deviceId, outputStreamInfos); + } + if (outputStreamInfos.size() < (unsigned int)channelCount) { + logError("Not enough channels are available with current device after filtering for stereo preference", noErr); + return {}; + } + if (outputStreamInfos.size() + > (unsigned int)channelCount) { + // if there are more hardware channels than requested, we just use the first ones. This is a fallback if filterStereoHardwareChannels was not successful + outputStreamInfos.resize(channelCount); + } + return outputStreamInfos; +} + +static std::optional closestAvailableSampleRate(OSXAudioDeviceID deviceId, Float64 requested, + const std::function& onError) +{ + AudioObjectPropertyAddress address { + kAudioDevicePropertyAvailableNominalSampleRates, + kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster + }; + + UInt32 size = 0; + if (auto status = AudioObjectGetPropertyDataSize(deviceId, &address, 0, nullptr, &size); status != noErr || size == 0) { + onError("Failed to get device " + std::to_string(deviceId) + " available sample rates" + ", err: " + std::to_string(status), + status); + return std::nullopt; + } + + std::vector ranges(size / sizeof(AudioValueRange)); + if (auto status = AudioObjectGetPropertyData(deviceId, &address, 0, nullptr, &size, ranges.data()); status != noErr) { + onError("Failed to get device " + std::to_string(deviceId) + " available sample rates" + ", err: " + std::to_string(status), + status); + return std::nullopt; + } + + std::optional best; + Float64 bestDistance = std::numeric_limits::max(); + + for (const AudioValueRange& range : ranges) { + Float64 candidate = requested; + + if (requested < range.mMinimum) { + candidate = range.mMinimum; + } else if (requested > range.mMaximum) { + candidate = range.mMaximum; + } + + Float64 distance = std::abs(candidate - requested); + if (distance < bestDistance) { + best = candidate; + bestDistance = distance; + } + } + + return best; +} + +static std::optional prepareDeviceWithOutputSpec(OSXAudioDeviceID deviceId, const OutputSpec& spec, + const std::function& logError) +{ + AudioObjectPropertyAddress requestedSampleRate{ + kAudioDevicePropertyNominalSampleRate, kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster }; + + Float64 currentSampleRate = 0.0; + UInt32 sampleRateSize = sizeof(currentSampleRate); + + OSStatus result + =AudioObjectGetPropertyData(deviceId, &requestedSampleRate, 0, nullptr, + &sampleRateSize, ¤tSampleRate); + + if (result != noErr) { + logError("Failed to get current sample rate for device " + std::to_string(deviceId) + ", err: ", result); + return std::nullopt; + } + + Float64 sampleRate = closestAvailableSampleRate(deviceId, spec.sampleRate, logError) + .value_or(currentSampleRate); + + if (std::abs(currentSampleRate - sampleRate) > 0.5) { + result = AudioObjectSetPropertyData( + deviceId, + &requestedSampleRate, + 0, + nullptr, + sizeof(sampleRate), + &sampleRate); + + if (result != noErr) { + sampleRate = currentSampleRate; + } + } else { + sampleRate = currentSampleRate; + } + + AudioValueRange bufferSizeRange = { 0, 0 }; + UInt32 bufferSizeRangeSize = sizeof(AudioValueRange); + AudioObjectPropertyAddress bufferSizeRangeAddress = { + .mSelector = kAudioDevicePropertyBufferFrameSizeRange, + .mScope = kAudioObjectPropertyScopeGlobal, + .mElement = kAudioObjectPropertyElementMaster + }; + + result = AudioObjectGetPropertyData(deviceId, &bufferSizeRangeAddress, 0, 0, &bufferSizeRangeSize, &bufferSizeRange); + if (result != noErr) { + logError("Failed to create Audio Queue Output, err: ", result); + return std::nullopt; + } + + samples_t minBufferSize = static_cast(bufferSizeRange.mMinimum); + samples_t maxBufferSize = static_cast(bufferSizeRange.mMaximum); + UInt32 bufferSizeOut = std::min(maxBufferSize, std::max(minBufferSize, spec.samplesPerChannel)); + + AudioObjectPropertyAddress preferredBufferSizeAddress = { + .mSelector = kAudioDevicePropertyBufferFrameSize, + .mScope = kAudioObjectPropertyScopeGlobal, + .mElement = kAudioObjectPropertyElementMaster + }; + + result = AudioObjectSetPropertyData(deviceId, &preferredBufferSizeAddress, 0, 0, sizeof(bufferSizeOut), (void*)&bufferSizeOut); + if (result != noErr) { + logError("Failed to create Audio Queue Output, err: ", result); + return std::nullopt; + } + + UInt32 actualBufferSizeOut = bufferSizeOut; + UInt32 actualBufferSizeOutSize = sizeof(actualBufferSizeOut); + result = AudioObjectGetPropertyData(deviceId, &preferredBufferSizeAddress, 0, 0, &actualBufferSizeOutSize, &actualBufferSizeOut); + if (result != noErr) { + logError("Failed to get Audio Device bufferFrameSize, err: ", result); + return std::nullopt; + } + bufferSizeOut = actualBufferSizeOut; + + return OutputSpec { + .sampleRate = (uint64_t)sampleRate, .samplesPerChannel = bufferSizeOut, .audioChannelCount = spec.audioChannelCount + }; +} + +AudioWorkGroup createAudioWorkgroup(OSXAudioDeviceID deviceId) +{ + AudioObjectPropertyAddress pa; + pa.mSelector = kAudioDevicePropertyIOThreadOSWorkgroup; + pa.mScope = kAudioObjectPropertyScopeWildcard; + pa.mElement = kAudioObjectPropertyElementMaster; + os_workgroup_t workgroup; + uint32_t workgroupSize = sizeof(workgroup); + if (AudioObjectGetPropertyData(deviceId, &pa, 0, nullptr, &workgroupSize, + &workgroup) != noErr) { + return {}; + } + + return makeAudioWorkgroup(workgroup); +} + +bool OSXDirectAudioDriver::open(const Spec& spec, Spec* activeSpec) +{ + if (isOpened()) { + return false; + } + + m_data->clear(); + + auto deviceId = getAudioDeviceId(spec.deviceId); + if (!deviceId) { + logError("Failed to find device " + spec.deviceId, noErr); + return false; + } + auto bestOutputStreamInfos = getFittingChannelStreamsFromDevice( + *deviceId, spec.output.audioChannelCount, &logError); + if (bestOutputStreamInfos.empty()) { + return false; + } + + IF_ASSERT_FAILED_X(validateOutputStreamFormats(*deviceId, bestOutputStreamInfos, &logError), + "CoreAudio output stream format must be native packed 32-bit float PCM") { + return false; + } + + auto actualOutputSpec = prepareDeviceWithOutputSpec(*deviceId, spec.output, &logError); + if (!actualOutputSpec) { + return false; + } + + m_data->canBeDirectlyMapped = canBeDirectlyMapped(bestOutputStreamInfos, + spec.output.audioChannelCount); + m_data->format = spec; + m_data->format.output = actualOutputSpec.value(); + m_data->format.deviceId = QString::number(*deviceId).toStdString(); + m_data->channelBufferOutputDetails = std::move(bestOutputStreamInfos); + m_data->outBuffer.resize(m_data->format.output.samplesPerChannel + * m_data->format.output.audioChannelCount); + m_data->deviceId = *deviceId; + + auto result = AudioDeviceCreateIOProcID(*deviceId, coreAudioIOProc, m_data.get(), + &m_data->procId); + if (result != noErr) { + m_data->clear(); + logError("Failed to create Audio Device IO Proc, err: ", result); + return false; + } + + result = AudioDeviceStart(*deviceId, m_data->procId); + if (result != noErr) { + AudioDeviceDestroyIOProcID(*deviceId, m_data->procId); + m_data->clear(); + logError("Failed to start Audio Device, err: ", result); + return false; + } + + m_audioWorkGroup = createAudioWorkgroup(*deviceId); + m_currentWorkgroupChanged.notify(); + + if (activeSpec) { + *activeSpec = m_data->format; + } + + m_activeSpecChanged.send(m_data->format); + + LOGI() << "Connected to " << m_data->format.deviceId + << " with bufferSize " << m_data->format.output.samplesPerChannel + << ", sampleRate " << m_data->format.output.sampleRate; + + return true; +} + +void OSXDirectAudioDriver::close() +{ + doClose(); +} + +void OSXDirectAudioDriver::doClose() +{ + if (!isOpened()) { + return; + } + + m_data->stopPending = true; + // we spin while we wait for the callback to stop the device. That way we can be sure that data will no longer be used by the callback + for (int i = 0; i < 100 && !m_data->stopped; ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + if (!m_data->stopped) { + [[maybe_unused]] auto _leaked = m_data.release(); // we let it leak since it might still be accessed by the callback + m_data = std::make_unique(); + } else { + AudioDeviceDestroyIOProcID(m_data->deviceId, m_data->procId); + m_data->clear(); + } + m_audioWorkGroup = {}; + m_currentWorkgroupChanged.notify(); +} + +bool OSXDirectAudioDriver::isOpened() const +{ + return m_data->procId != nullptr; +} + +const OSXDirectAudioDriver::Spec& OSXDirectAudioDriver::activeSpec() const +{ + return m_data->format; +} + +async::Channel OSXDirectAudioDriver::activeSpecChanged() const +{ + return m_activeSpecChanged; +} + +AudioDeviceList OSXDirectAudioDriver::availableOutputDevices() const +{ + std::lock_guard lock(m_devicesMutex); + + AudioDeviceList deviceList; + deviceList.push_back({ DEFAULT_DEVICE_ID, muse::trc("audio", "System default") }); + + for (auto& device : m_outputDevices) { + AudioDevice deviceInfo; + deviceInfo.id = QString::number(device.first).toStdString(); + deviceInfo.name = device.second; + + deviceList.push_back(deviceInfo); + } + + return deviceList; +} + +async::Notification OSXDirectAudioDriver::availableOutputDevicesChanged() const +{ + return m_availableOutputDevicesChanged; +} + +void OSXDirectAudioDriver::updateDeviceMap() +{ + std::lock_guard lock(m_devicesMutex); + + UInt32 propertySize; + OSStatus result; + std::vector audioObjects = {}; + m_outputDevices.clear(); + m_inputDevices.clear(); + + AudioObjectPropertyAddress devicesPropertyAddress = { + .mSelector = kAudioHardwarePropertyDevices, + .mScope = kAudioObjectPropertyScopeGlobal, + .mElement = kAudioObjectPropertyElementMaster, + }; + + AudioObjectPropertyAddress namePropertyAddress = { + .mSelector = kAudioDevicePropertyDeviceNameCFString, + .mScope = kAudioObjectPropertyScopeGlobal, + .mElement = kAudioObjectPropertyElementMaster, + }; + + auto getStreamsCount + = [](const AudioObjectID& id, const AudioObjectPropertyScope& scope, const std::string& deviceName) -> unsigned int { + AudioObjectPropertyAddress propertyAddress = { + .mSelector = kAudioDevicePropertyStreamConfiguration, + .mScope = scope, + .mElement = kAudioObjectPropertyElementWildcard + }; + UInt32 propertySize = 0; + OSStatus result = AudioObjectGetPropertyDataSize(id, &propertyAddress, 0, NULL, &propertySize); + if (result != noErr) { + logError("Failed to get device's (" + deviceName + ") streams size, err: ", result); + return 0; + } + + auto freeBufferList = [](AudioBufferList* list) { free(list); }; + std::unique_ptr bufferList(reinterpret_cast(malloc(propertySize)), + freeBufferList); + result = AudioObjectGetPropertyData(id, &propertyAddress, 0, NULL, &propertySize, bufferList.get()); + if (result != noErr) { + logError("Failed to get device's (" + deviceName + ") streams, err: ", result); + return 0; + } + + return bufferList->mNumberBuffers; + }; + + result = AudioObjectGetPropertyDataSize(kAudioObjectSystemObject, &devicesPropertyAddress, 0, NULL, &propertySize); + if (result != noErr) { + logError("Failed to get devices count, err: ", result); + return; + } + + audioObjects.resize(propertySize / sizeof(OSXAudioDeviceID)); + result = AudioObjectGetPropertyData(kAudioObjectSystemObject, &devicesPropertyAddress, 0, NULL, &propertySize, audioObjects.data()); + if (result != noErr) { + logError("Failed to get devices list, err: ", result); + return; + } + + for (auto&& deviceId : audioObjects) { + CFStringRef nameRef; + propertySize = sizeof(nameRef); + + result = AudioObjectGetPropertyData(deviceId, &namePropertyAddress, 0, NULL, &propertySize, &nameRef); + if (result != noErr) { + logError("Failed to get device's name, err: ", result); + continue; + } + + NSString* nsString = (NSString*)nameRef; + std::string deviceName = [nsString UTF8String]; + + if (getStreamsCount(deviceId, kAudioObjectPropertyScopeOutput, deviceName) > 0) { + m_outputDevices[deviceId] = deviceName; + } + + if (getStreamsCount(deviceId, kAudioObjectPropertyScopeInput, deviceName) > 0) { + m_inputDevices[deviceId] = deviceName; + } + + CFRelease(nameRef); + } + m_availableOutputDevicesChanged.notify(); +} + +std::vector OSXDirectAudioDriver::availableOutputDeviceBufferSizes() const +{ + OSXAudioDeviceID osxDeviceId = this->osxDeviceId(); + AudioObjectPropertyAddress bufferFrameSizePropertyAddress = { + .mSelector = kAudioDevicePropertyBufferFrameSizeRange, + .mScope = kAudioObjectPropertyScopeGlobal, + .mElement = kAudioObjectPropertyElementMaster + }; + + AudioValueRange range = { 0, 0 }; + UInt32 dataSize = sizeof(AudioValueRange); + OSStatus rangeResult = AudioObjectGetPropertyData(osxDeviceId, &bufferFrameSizePropertyAddress, 0, NULL, &dataSize, &range); + if (rangeResult != noErr) { + logError("Failed to get device " + m_data->format.deviceId + " bufferFrameSize, err: ", rangeResult); + return {}; + } + + samples_t minimum = std::max(static_cast(range.mMinimum), MINIMUM_BUFFER_SIZE); + samples_t maximum = std::min(static_cast(range.mMaximum), MAXIMUM_BUFFER_SIZE); + + std::vector result; + for (samples_t bufferSize = maximum; bufferSize >= minimum;) { + result.push_back(bufferSize); + bufferSize /= 2; + } + + std::sort(result.begin(), result.end()); + + return result; +} + +//TODO: replace hardcoded values with truth +std::vector OSXDirectAudioDriver::availableOutputDeviceSampleRates() const +{ + return { + 44100, + 48000, + 88200, + 96000, + }; +} + +static std::optional getDefaultDeviceId(const std::function& logError) +{ + OSXAudioDeviceID osxDeviceId = kAudioObjectUnknown; + UInt32 deviceIdSize = sizeof(osxDeviceId); + + AudioObjectPropertyAddress deviceNamePropertyAddress = { + .mSelector = kAudioHardwarePropertyDefaultOutputDevice, + .mScope = kAudioDevicePropertyScopeOutput, + .mElement = kAudioObjectPropertyElementMaster + }; + + OSStatus result = AudioObjectGetPropertyData(kAudioObjectSystemObject, &deviceNamePropertyAddress, 0, 0, &deviceIdSize, &osxDeviceId); + if (result != noErr) { + logError("Failed to get default device ID, err: ", result); + return std::nullopt; + } + + return osxDeviceId; +} + +UInt32 OSXDirectAudioDriver::osxDeviceId() const +{ + AudioDeviceID deviceId = m_data->format.deviceId; + if (deviceId == DEFAULT_DEVICE_ID) { + auto defaultDeviceId = getDefaultDeviceId(&logError); + if (!defaultDeviceId) { + logError("Failed to get default device ID, err: ", noErr); + return kAudioObjectUnknown; + } + return *defaultDeviceId; + } + + return QString::fromStdString(deviceId).toInt(); +} + +void OSXDirectAudioDriver::logError(const std::string message, OSStatus error) +{ + if (error == noErr) { + return; + } + + char errorString[5]; + + UInt32 errorBigEndian = CFSwapInt32HostToBig(error); + errorString[0] = errorBigEndian & 0xFF; + errorString[1] = (errorBigEndian >> 8) & 0xFF; + errorString[2] = (errorBigEndian >> 16) & 0xFF; + errorString[3] = (errorBigEndian >> 24) & 0xFF; + errorString[4] = '\0'; + if (isprint(errorString[0]) && isprint(errorString[1]) && isprint(errorString[2]) && isprint(errorString[3])) { + LOGE() << message << errorString << "(" << error << ")"; + } else { + LOGE() << message << error; + } +} + +static OSStatus onDeviceListChanged(AudioObjectID inObjectID, UInt32 inNumberAddresses, const AudioObjectPropertyAddress* inAddresses, + void* inClientData) +{ + UNUSED(inObjectID); + UNUSED(inNumberAddresses); + UNUSED(inAddresses); + auto driver = reinterpret_cast(inClientData); + driver->updateDeviceMap(); + + return noErr; +} + +void OSXDirectAudioDriver::initDeviceMapListener() +{ + AudioObjectPropertyAddress propertyAddress; + propertyAddress.mSelector = kAudioHardwarePropertyDevices; + propertyAddress.mScope = kAudioObjectPropertyScopeGlobal; + propertyAddress.mElement = kAudioObjectPropertyElementMaster; + + auto result = AudioObjectAddPropertyListener(kAudioObjectSystemObject, &propertyAddress, &onDeviceListChanged, this); + if (result != noErr) { + logError("Failed to add devices list listener, err: ", result); + return; + } + + m_deviceMapListenerRegistered = true; +} + +void OSXDirectAudioDriver::removeDeviceMapListener() +{ + if (!m_deviceMapListenerRegistered) { + return; + } + + AudioObjectPropertyAddress propertyAddress; + propertyAddress.mSelector = kAudioHardwarePropertyDevices; + propertyAddress.mScope = kAudioObjectPropertyScopeGlobal; + propertyAddress.mElement = kAudioObjectPropertyElementMaster; + + auto result = AudioObjectRemovePropertyListener(kAudioObjectSystemObject, &propertyAddress, &onDeviceListChanged, this); + if (result != noErr) { + logError("Failed to remove devices list listener, err: ", result); + return; + } + + m_deviceMapListenerRegistered = false; +} + +std::optional muse::audio::OSXDirectAudioDriver::getAudioDeviceId( + const AudioDeviceID& deviceId) const +{ + if (deviceId.empty() || deviceId == DEFAULT_DEVICE_ID) { + return getDefaultDeviceId(&logError); //default device used + } + + std::lock_guard lock(m_devicesMutex); + + uint deviceIdInt = QString::fromStdString(deviceId).toInt(); + auto index = std::find_if(m_outputDevices.begin(), m_outputDevices.end(), [&deviceIdInt](auto& d) { + return d.first == deviceIdInt; + }); + + if (index == m_outputDevices.end()) { + return std::nullopt; + } + return index->first; +} diff --git a/framework/audio/engine/internal/mixer.cpp b/framework/audio/engine/internal/mixer.cpp index 29a2695fea..a7c4758b5b 100644 --- a/framework/audio/engine/internal/mixer.cpp +++ b/framework/audio/engine/internal/mixer.cpp @@ -26,12 +26,6 @@ #include "audio/common/audiosanitizer.h" #include "audio/common/audioerrors.h" -#include "muse_framework_config.h" - -#ifdef MUSE_THREADS_SUPPORT -#include "concurrency/taskscheduler.h" -#endif - #include "log.h" using namespace muse; @@ -44,22 +38,11 @@ constexpr size_t MIN_TRACK_COUNT_FOR_MULTITHREADING = 2; Mixer::~Mixer() { ONLY_AUDIO_MAIN_OR_ENGINE_THREAD; - delete m_taskScheduler; } void Mixer::init() { ONLY_AUDIO_ENGINE_THREAD; - -#ifdef MUSE_THREADS_SUPPORT - m_taskScheduler = new TaskScheduler(); - - if (!m_taskScheduler->setThreadsPriority(ThreadPriority::High)) { - LOGE() << "Unable to change audio threads priority"; - } - - AudioSanitizer::setMixerThreads(m_taskScheduler->threadIdSet()); -#endif } Ret Mixer::addTrack(TrackChainPtr trackChain, const AuxSendsParams& auxSends) @@ -72,6 +55,7 @@ Ret Mixer::addTrack(TrackChainPtr trackChain, const AuxSendsParams& auxSends) trackData.buffer.resize(outBufferSize); m_tracks.emplace_back(std::move(trackData)); + m_trackTasks.reserve(m_tracks.size()); setAuxSends(trackData.trackId, auxSends); @@ -179,7 +163,8 @@ void Mixer::process(float* outBuffer, samples_t samplesPerChannel) processAuxChannels(outBuffer, samplesPerChannel); } -void Mixer::processTrackChannels(size_t outBufferSize, size_t samplesPerChannel) +void Mixer::processTrackChannels(size_t outBufferSize, + size_t samplesPerChannel) { auto processChannel = [outBufferSize, samplesPerChannel](TrackData& trackData) { IF_ASSERT_FAILED(trackData.chain) { @@ -199,8 +184,7 @@ void Mixer::processTrackChannels(size_t outBufferSize, size_t samplesPerChannel) #ifdef MUSE_THREADS_SUPPORT if (useMultithreading()) { - std::vector > futures; - + m_trackTasks.clear(); for (auto& t : m_tracks) { t.processed = false; @@ -208,13 +192,11 @@ void Mixer::processTrackChannels(size_t outBufferSize, size_t samplesPerChannel) continue; } - std::future future = m_taskScheduler->submit(processChannel, std::ref(t)); - futures.emplace_back(std::move(future)); - } - - for (auto& f : futures) { - f.wait(); + m_trackTasks.emplace_back([tPtr = &t, processChannel] { + processChannel(*tPtr); + }); } + audioTaskScheduler()->submitRealtimeTasksAndWait(m_trackTasks); } else #endif { diff --git a/framework/audio/engine/internal/mixer.h b/framework/audio/engine/internal/mixer.h index 535662df49..a5f606be51 100644 --- a/framework/audio/engine/internal/mixer.h +++ b/framework/audio/engine/internal/mixer.h @@ -24,6 +24,7 @@ #include +#include "common/iaudiotaskscheduler.h" #include "global/modularity/ioc.h" #include "global/async/asyncable.h" @@ -35,6 +36,7 @@ #include "nodes/signalnode.h" #include "nodes/trackchain.h" +#include "muse_framework_config.h" namespace muse { class TaskScheduler; } @@ -50,6 +52,9 @@ namespace muse::audio::engine { class Mixer : public AudioNode, public async::Asyncable { GlobalInject audioFactory; +#ifdef MUSE_THREADS_SUPPORT + GlobalInject audioTaskScheduler; +#endif public: ~Mixer() override; @@ -84,8 +89,6 @@ class Mixer : public AudioNode, public async::Asyncable bool useMultithreading() const; - TaskScheduler* m_taskScheduler = nullptr; - struct TrackData { TrackId trackId; TrackChainPtr chain; @@ -97,6 +100,8 @@ class Mixer : public AudioNode, public async::Asyncable std::vector m_auxTracks; std::map m_auxSends; + std::vector m_trackTasks; + size_t m_nonMutedTrackCount = 0; std::unordered_set m_tracksToProcessWhenIdle; }; diff --git a/framework/audio/iaudiodriver.h b/framework/audio/iaudiodriver.h index 8076a9de07..9d81dc0875 100644 --- a/framework/audio/iaudiodriver.h +++ b/framework/audio/iaudiodriver.h @@ -27,6 +27,7 @@ #include #include +#include "common/audioworkgroup.h" #include "global/async/notification.h" #include "audio/common/audiotypes.h" @@ -52,6 +53,8 @@ class IAudioDriver virtual std::string name() const = 0; virtual AudioDeviceID defaultDevice() const = 0; + virtual AudioWorkGroup getAudioWorkGroup() const { return {}; } + virtual async::Notification currentWorkgroupChanged() const { return {}; } virtual bool open(const Spec& spec, Spec* activeSpec) = 0; virtual void close() = 0; diff --git a/framework/audio/main/audiomodule.cpp b/framework/audio/main/audiomodule.cpp index 155e5f2123..ec781e295d 100644 --- a/framework/audio/main/audiomodule.cpp +++ b/framework/audio/main/audiomodule.cpp @@ -21,6 +21,7 @@ */ #include "audiomodule.h" +#include "common/iaudiotaskscheduler.h" #include "ui/iuiactionsregister.h" #include "global/modularity/ioc.h" @@ -87,6 +88,7 @@ void AudioModule::registerExports() globalIoc()->registerExport(mname, std::make_shared()); globalIoc()->registerExport(mname, m_rpcChannel); globalIoc()->registerExport(mname, m_audioDriverController); + globalIoc()->registerExport(mname, m_audioDriverController->getAudioTaskScheduler()); globalIoc()->registerExport(mname, m_soundFontController); globalIoc()->registerExport(mname, m_startAudioController); } diff --git a/framework/audio/main/internal/audiodrivercontroller.cpp b/framework/audio/main/internal/audiodrivercontroller.cpp index 6a3996eaf2..f68b80bef4 100644 --- a/framework/audio/main/internal/audiodrivercontroller.cpp +++ b/framework/audio/main/internal/audiodrivercontroller.cpp @@ -22,6 +22,8 @@ #include "audiodrivercontroller.h" +#include "common/audiotaskscheduler.h" +#include "common/iaudiotaskscheduler.h" #include "global/async/async.h" #include "muse_framework_config.h" @@ -47,6 +49,7 @@ #ifdef Q_OS_MACOS #include "audio/driver/platform/osx/osxaudiodriver.h" +#include "audio/driver/platform/osx/osxdirectaudiodriver.h" #endif #ifdef Q_OS_WASM @@ -59,6 +62,11 @@ using namespace muse; using namespace muse::audio; using namespace muse::audio::rpc; +AudioDriverController::AudioDriverController() + : m_audioTaskScheduler(std::make_shared()) +{ +} + IAudioDriverPtr AudioDriverController::createDriver(const std::string& name) const { #ifdef MUSE_MODULE_AUDIO_JACK @@ -105,7 +113,9 @@ IAudioDriverPtr AudioDriverController::createDriver(const std::string& name) con #endif #ifdef Q_OS_MACOS - UNUSED(name); + if (name == "CoreAudioDirect") { + return std::shared_ptr(new OSXDirectAudioDriver()); + } return std::shared_ptr(new OSXAudioDriver()); #endif @@ -147,6 +157,7 @@ std::vector AudioDriverController::availableAudioDrivers() const #ifdef Q_OS_MACOS names.push_back("CoreAudio"); + names.push_back("CoreAudioDirect"); return names; #endif @@ -188,6 +199,11 @@ void AudioDriverController::setNewDriver(IAudioDriverPtr newDriver) }); }); } + + auto audioWorkgroupSource = std::dynamic_pointer_cast(m_audioTaskScheduler); + if (audioWorkgroupSource) { + audioWorkgroupSource->setAudioDriver(m_audioDriver); + } } IAudioDriver::Spec AudioDriverController::defaultSpec() const @@ -463,3 +479,8 @@ async::Notification AudioDriverController::outputDeviceSampleRateChanged() const { return m_outputDeviceSampleRateChanged; } + +IAudioTaskSchedulerPtr muse::audio::AudioDriverController::getAudioTaskScheduler() const +{ + return m_audioTaskScheduler; +} diff --git a/framework/audio/main/internal/audiodrivercontroller.h b/framework/audio/main/internal/audiodrivercontroller.h index f6628bd9c3..57a7596817 100644 --- a/framework/audio/main/internal/audiodrivercontroller.h +++ b/framework/audio/main/internal/audiodrivercontroller.h @@ -22,6 +22,7 @@ #pragma once +#include "common/iaudiotaskscheduler.h" #include "global/async/asyncable.h" #include "audio/iaudiodrivercontroller.h" @@ -37,6 +38,7 @@ class AudioDriverController : public IAudioDriverController, public async::Async GlobalInject rpcChannel; public: + AudioDriverController(); std::vector availableAudioDrivers() const override; @@ -67,6 +69,8 @@ class AudioDriverController : public IAudioDriverController, public async::Async void changeSampleRate(sample_rate_t sampleRate) override; async::Notification outputDeviceSampleRateChanged() const override; + IAudioTaskSchedulerPtr getAudioTaskScheduler() const; + private: IAudioDriverPtr createDriver(const std::string& name) const; void setNewDriver(IAudioDriverPtr newDriver); @@ -77,6 +81,7 @@ class AudioDriverController : public IAudioDriverController, public async::Async void updateOutputSpec(); IAudioDriver::Callback m_callback; + IAudioTaskSchedulerPtr m_audioTaskScheduler; IAudioDriverPtr m_audioDriver; async::Notification m_currentAudioDriverChanged; async::Notification m_availableOutputDevicesChanged; diff --git a/framework/audio/thirdparty/moodycamel/blockingconcurrentqueue.h b/framework/audio/thirdparty/moodycamel/blockingconcurrentqueue.h new file mode 100644 index 0000000000..ec4d8cfbb8 --- /dev/null +++ b/framework/audio/thirdparty/moodycamel/blockingconcurrentqueue.h @@ -0,0 +1,581 @@ +// Provides an efficient blocking version of moodycamel::ConcurrentQueue. +// ©2015-2020 Cameron Desrochers. Distributed under the terms of the simplified +// BSD license, available at the top of concurrentqueue.h. +// Also dual-licensed under the Boost Software License (see LICENSE.md) +// Uses Jeff Preshing's semaphore implementation (under the terms of its +// separate zlib license, see lightweightsemaphore.h). + +#pragma once + +#include "concurrentqueue.h" +#include "lightweightsemaphore.h" + +#include +#include +#include +#include +#include + +namespace moodycamel { +// This is a blocking version of the queue. It has an almost identical interface to +// the normal non-blocking version, with the addition of various wait_dequeue() methods +// and the removal of producer-specific dequeue methods. +template +class BlockingConcurrentQueue +{ +private: + typedef ::moodycamel::ConcurrentQueue ConcurrentQueue; + typedef ::moodycamel::LightweightSemaphore LightweightSemaphore; + +public: + typedef typename ConcurrentQueue::producer_token_t producer_token_t; + typedef typename ConcurrentQueue::consumer_token_t consumer_token_t; + + typedef typename ConcurrentQueue::index_t index_t; + typedef typename ConcurrentQueue::size_t size_t; + typedef typename std::make_signed::type ssize_t; + + static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE; + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD; + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE; + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE; + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE + = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE; + static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE; + +public: + // Creates a queue with at least `capacity` element slots; note that the + // actual number of elements that can be inserted without additional memory + // allocation depends on the number of producers and the block size (e.g. if + // the block size is equal to `capacity`, only a single block will be allocated + // up-front, which means only a single producer will be able to enqueue elements + // without an extra allocation -- blocks aren't shared between producers). + // This method is not thread safe -- it is up to the user to ensure that the + // queue is fully constructed before it starts being used by other threads (this + // includes making the memory effects of construction visible, possibly with a + // memory barrier). + explicit BlockingConcurrentQueue(size_t capacity = 6* BLOCK_SIZE) + : inner(capacity), + sema(create(0, (int)Traits::MAX_SEMA_SPINS), + &BlockingConcurrentQueue::template destroy) + { + assert(reinterpret_cast((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner + && "BlockingConcurrentQueue must have ConcurrentQueue as its first member"); + if (!sema) { + MOODYCAMEL_THROW(std::bad_alloc()); + } + } + + BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers) + : inner(minCapacity, maxExplicitProducers, maxImplicitProducers), + sema(create(0, (int)Traits::MAX_SEMA_SPINS), + &BlockingConcurrentQueue::template destroy) + { + assert(reinterpret_cast((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner + && "BlockingConcurrentQueue must have ConcurrentQueue as its first member"); + if (!sema) { + MOODYCAMEL_THROW(std::bad_alloc()); + } + } + + // Disable copying and copy assignment + BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + + // Moving is supported, but note that it is *not* a thread-safe operation. + // Nobody can use the queue while it's being moved, and the memory effects + // of that move must be propagated to other threads before they can use it. + // Note: When a queue is moved, its tokens are still valid but can only be + // used with the destination queue (i.e. semantically they are moved along + // with the queue itself). + BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + : inner(std::move(other.inner)), sema(std::move(other.sema)) + { } + + inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + { + return swap_internal(other); + } + + // Swaps this queue's state with the other's. Not thread-safe. + // Swapping two queues does not invalidate their tokens, however + // the tokens that were created for one queue must be used with + // only the swapped queue (i.e. the tokens are tied to the + // queue's movable state, not the object itself). + inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT + { + swap_internal(other); + } + +private: + BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other) + { + if (this == &other) { + return *this; + } + + inner.swap(other.inner); + sema.swap(other.sema); + return *this; + } + +public: + // Enqueues a single item (by copying it). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T const& item) + { + if ((details::likely)(inner.enqueue(item))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by moving it, if possible). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T&& item) + { + if ((details::likely)(inner.enqueue(std::move(item)))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T const& item) + { + if ((details::likely)(inner.enqueue(token, item))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T&& item) + { + if ((details::likely)(inner.enqueue(token, std::move(item)))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues several items. + // Allocates memory if required. Only fails if memory allocation fails (or + // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved instead of copied. + // Thread-safe. + template + inline bool enqueue_bulk(It itemFirst, size_t count) + { + if ((details::likely)(inner.enqueue_bulk(std::forward(itemFirst), count))) { + sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); + return true; + } + return false; + } + + // Enqueues several items using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails + // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + if ((details::likely)(inner.enqueue_bulk(token, std::forward(itemFirst), count))) { + sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); + return true; + } + return false; + } + + // Enqueues a single item (by copying it). + // Does not allocate memory. Fails if not enough room to enqueue (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0). + // Thread-safe. + inline bool try_enqueue(T const& item) + { + if (inner.try_enqueue(item)) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by moving it, if possible). + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Thread-safe. + inline bool try_enqueue(T&& item) + { + if (inner.try_enqueue(std::move(item))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T const& item) + { + if (inner.try_enqueue(token, item)) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T&& item) + { + if (inner.try_enqueue(token, std::move(item))) { + sema->signal(); + return true; + } + return false; + } + + // Enqueues several items. + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + inline bool try_enqueue_bulk(It itemFirst, size_t count) + { + if (inner.try_enqueue_bulk(std::forward(itemFirst), count)) { + sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); + return true; + } + return false; + } + + // Enqueues several items using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + if (inner.try_enqueue_bulk(token, std::forward(itemFirst), count)) { + sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count); + return true; + } + return false; + } + + // Attempts to dequeue from the queue. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline bool try_dequeue(U& item) + { + if (sema->tryWait()) { + while (!inner.try_dequeue(item)) { + continue; + } + return true; + } + return false; + } + + // Attempts to dequeue from the queue using an explicit consumer token. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline bool try_dequeue(consumer_token_t& token, U& item) + { + if (sema->tryWait()) { + while (!inner.try_dequeue(token, item)) { + continue; + } + return true; + } + return false; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline size_t try_dequeue_bulk(It itemFirst, size_t max) + { + size_t count = 0; + max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max); + while (count != max) { + count += inner.template try_dequeue_bulk(itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) + { + size_t count = 0; + max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max); + while (count != max) { + count += inner.template try_dequeue_bulk(token, itemFirst, max - count); + } + return count; + } + + // Blocks the current thread until there's something to dequeue, then + // dequeues it. + // Never allocates. Thread-safe. + template + inline void wait_dequeue(U& item) + { + while (!sema->wait()) { + continue; + } + while (!inner.try_dequeue(item)) { + continue; + } + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout (specified in microseconds) expires. Returns false + // without setting `item` if the timeout expires, otherwise assigns + // to `item` and returns true. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue. + // Never allocates. Thread-safe. + template + inline bool wait_dequeue_timed(U& item, std::int64_t timeout_usecs) + { + if (!sema->wait(timeout_usecs)) { + return false; + } + while (!inner.try_dequeue(item)) { + continue; + } + return true; + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout expires. Returns false without setting `item` if the + // timeout expires, otherwise assigns to `item` and returns true. + // Never allocates. Thread-safe. + template + inline bool wait_dequeue_timed(U& item, std::chrono::duration const& timeout) + { + return wait_dequeue_timed(item, std::chrono::duration_cast(timeout).count()); + } + + // Blocks the current thread until there's something to dequeue, then + // dequeues it using an explicit consumer token. + // Never allocates. Thread-safe. + template + inline void wait_dequeue(consumer_token_t& token, U& item) + { + while (!sema->wait()) { + continue; + } + while (!inner.try_dequeue(token, item)) { + continue; + } + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout (specified in microseconds) expires. Returns false + // without setting `item` if the timeout expires, otherwise assigns + // to `item` and returns true. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue. + // Never allocates. Thread-safe. + template + inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::int64_t timeout_usecs) + { + if (!sema->wait(timeout_usecs)) { + return false; + } + while (!inner.try_dequeue(token, item)) { + continue; + } + return true; + } + + // Blocks the current thread until either there's something to dequeue + // or the timeout expires. Returns false without setting `item` if the + // timeout expires, otherwise assigns to `item` and returns true. + // Never allocates. Thread-safe. + template + inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::chrono::duration const& timeout) + { + return wait_dequeue_timed(token, item, std::chrono::duration_cast(timeout).count()); + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued, which will + // always be at least one (this method blocks until the queue + // is non-empty) and at most max. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk(It itemFirst, size_t max) + { + size_t count = 0; + max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max); + while (count != max) { + count += inner.template try_dequeue_bulk(itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued, which can + // be 0 if the timeout expires while waiting for elements, + // and at most max. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue_bulk. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::int64_t timeout_usecs) + { + size_t count = 0; + max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs); + while (count != max) { + count += inner.template try_dequeue_bulk(itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued, which can + // be 0 if the timeout expires while waiting for elements, + // and at most max. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::chrono::duration const& timeout) + { + return wait_dequeue_bulk_timed(itemFirst, max, std::chrono::duration_cast(timeout).count()); + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued, which will + // always be at least one (this method blocks until the queue + // is non-empty) and at most max. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) + { + size_t count = 0; + max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max); + while (count != max) { + count += inner.template try_dequeue_bulk(token, itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued, which can + // be 0 if the timeout expires while waiting for elements, + // and at most max. + // Using a negative timeout indicates an indefinite timeout, + // and is thus functionally equivalent to calling wait_dequeue_bulk. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::int64_t timeout_usecs) + { + size_t count = 0; + max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs); + while (count != max) { + count += inner.template try_dequeue_bulk(token, itemFirst, max - count); + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued, which can + // be 0 if the timeout expires while waiting for elements, + // and at most max. + // Never allocates. Thread-safe. + template + inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, + std::chrono::duration const& timeout) + { + return wait_dequeue_bulk_timed(token, itemFirst, max, std::chrono::duration_cast(timeout).count()); + } + + // Returns an estimate of the total number of elements currently in the queue. This + // estimate is only accurate if the queue has completely stabilized before it is called + // (i.e. all enqueue and dequeue operations have completed and their memory effects are + // visible on the calling thread, and no further operations start while this method is + // being called). + // Thread-safe. + inline size_t size_approx() const + { + return (size_t)sema->availableApprox(); + } + + // Returns true if the underlying atomic variables used by + // the queue are lock-free (they should be on most platforms). + // Thread-safe. + static constexpr bool is_lock_free() + { + return ConcurrentQueue::is_lock_free(); + } + +private: + template + static inline U* create(A1&& a1, A2&& a2) + { + void* p = (Traits::malloc)(sizeof(U)); + return p != nullptr ? new (p) U(std::forward(a1), std::forward(a2)) : nullptr; + } + + template + static inline void destroy(U* p) + { + if (p != nullptr) { + p->~U(); + } + (Traits::free)(p); + } + +private: + ConcurrentQueue inner; + std::unique_ptr sema; +}; + +template +inline void swap(BlockingConcurrentQueue& a, BlockingConcurrentQueue& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} +} // end namespace moodycamel diff --git a/framework/audio/thirdparty/moodycamel/concurrentqueue.h b/framework/audio/thirdparty/moodycamel/concurrentqueue.h new file mode 100644 index 0000000000..d758324e0c --- /dev/null +++ b/framework/audio/thirdparty/moodycamel/concurrentqueue.h @@ -0,0 +1,3953 @@ +// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free queue. +// An overview, including benchmark results, is provided here: +// http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++ +// The full design is also described in excruciating detail at: +// http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue + +// Simplified BSD license: +// Copyright (c) 2013-2020, Cameron Desrochers. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// - Redistributions of source code must retain the above copyright notice, this list of +// conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, this list of +// conditions and the following disclaimer in the documentation and/or other materials +// provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +// OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +// TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Also dual-licensed under the Boost Software License (see LICENSE.md) + +#pragma once + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) +// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and +// Traits::index_t are set to < 32 bits, causing integer promotion, causing warnings +// upon assigning any computed values) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" + +#ifdef MCDBGQ_USE_RELACY +#pragma GCC diagnostic ignored "-Wint-to-pointer-cast" +#endif +#endif + +#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) +// VS2019 with /W4 warns about constant conditional expressions but unless /std=c++17 or higher +// does not support `if constexpr`, so we have no choice but to simply disable the warning +#pragma warning(push) +#pragma warning(disable: 4127) // conditional expression is constant +#endif + +#if defined(__APPLE__) +#include "TargetConditionals.h" +#endif + +#ifdef MCDBGQ_USE_RELACY +#include "relacy/relacy_std.hpp" +#include "relacy_shims.h" +// We only use malloc/free anyway, and the delete macro messes up `= delete` method declarations. +// We'll override the default trait malloc ourselves without a macro. +#undef new +#undef delete +#undef malloc +#undef free +#else +#include // Requires C++11. Sorry VS2010. +#include +#endif +#include // for max_align_t +#include +#include +#include +#include +#include +#include +#include // for CHAR_BIT +#include +#include // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading +#include // used for thread exit synchronization + +// Platform-specific definitions of a numeric thread ID type and an invalid value +namespace moodycamel { +namespace details { +template struct thread_id_converter { + typedef thread_id_t thread_id_numeric_size_t; + typedef thread_id_t thread_id_hash_t; + static thread_id_hash_t prehash(thread_id_t const& x) { return x; } +}; +} +} +#if defined(MCDBGQ_USE_RELACY) +namespace moodycamel { +namespace details { +typedef std::uint32_t thread_id_t; +static const thread_id_t invalid_thread_id = 0xFFFFFFFFU; +static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU; +static inline thread_id_t thread_id() { return rl::thread_index(); } +} +} +#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__) +// No sense pulling in windows.h in a header, we'll manually declare the function +// we use and rely on backwards-compatibility for this not to break +extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void); +namespace moodycamel { +namespace details { +static_assert(sizeof(unsigned long) == sizeof(std::uint32_t), "Expected size of unsigned long to be 32 bits on Windows"); +typedef std::uint32_t thread_id_t; +static const thread_id_t invalid_thread_id = 0; // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx +static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU; // Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4. +static inline thread_id_t thread_id() { return static_cast(::GetCurrentThreadId()); } +} +} +#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) \ + || defined(MOODYCAMEL_NO_THREAD_LOCAL) +namespace moodycamel { +namespace details { +static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes"); + +typedef std::thread::id thread_id_t; +static const thread_id_t invalid_thread_id; // Default ctor creates invalid ID + +// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have one; it's +// only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined anyway, which it won't +// be. +static inline thread_id_t thread_id() { return std::this_thread::get_id(); } + +template struct thread_id_size { }; +template<> struct thread_id_size<4> { + typedef std::uint32_t numeric_t; +}; +template<> struct thread_id_size<8> { + typedef std::uint64_t numeric_t; +}; + +template<> struct thread_id_converter { + typedef thread_id_size::numeric_t thread_id_numeric_size_t; +#ifndef __APPLE__ + typedef std::size_t thread_id_hash_t; +#else + typedef thread_id_numeric_size_t thread_id_hash_t; +#endif + + static thread_id_hash_t prehash(thread_id_t const& x) + { +#ifndef __APPLE__ + return std::hash()(x); +#else + return *reinterpret_cast(&x); +#endif + } +}; +} +} +#else +// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475 +// In order to get a numeric thread ID in a platform-independent way, we use a thread-local +// static variable's address as a thread identifier :-) +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#define MOODYCAMEL_THREADLOCAL __thread +#elif defined(_MSC_VER) +#define MOODYCAMEL_THREADLOCAL __declspec(thread) +#else +// Assume C++11 compliant compiler +#define MOODYCAMEL_THREADLOCAL thread_local +#endif +namespace moodycamel { +namespace details { +typedef std::uintptr_t thread_id_t; +static const thread_id_t invalid_thread_id = 0; // Address can't be nullptr +static const thread_id_t invalid_thread_id2 = 1; // Member accesses off a null pointer are also generally invalid. Plus it's not aligned. +inline thread_id_t thread_id() { static MOODYCAMEL_THREADLOCAL int x; return reinterpret_cast(&x); } +} +} +#endif + +// Constexpr if +#ifndef MOODYCAMEL_CONSTEXPR_IF +#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || __cplusplus > 201402L +#define MOODYCAMEL_CONSTEXPR_IF if constexpr +#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]] +#else +#define MOODYCAMEL_CONSTEXPR_IF if +#define MOODYCAMEL_MAYBE_UNUSED +#endif +#endif + +// Exceptions +#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED +#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__)) +#define MOODYCAMEL_EXCEPTIONS_ENABLED +#endif +#endif +#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED +#define MOODYCAMEL_TRY try +#define MOODYCAMEL_CATCH(...) catch (__VA_ARGS__) +#define MOODYCAMEL_RETHROW throw +#define MOODYCAMEL_THROW(expr) throw (expr) +#else +#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF(true) +#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF(false) +#define MOODYCAMEL_RETHROW +#define MOODYCAMEL_THROW(expr) +#endif + +#ifndef MOODYCAMEL_NOEXCEPT +#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED) +#define MOODYCAMEL_NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true +#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800 +// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when it shouldn't :-( +// We have to assume *all* non-trivial constructors may throw on VS2012! +#define MOODYCAMEL_NOEXCEPT _NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, \ + expr) (std::is_rvalue_reference::value \ + && std::is_move_constructible::value ? std::is_trivially_move_constructible::value \ + : std::is_trivially_copy_constructible::value) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, \ + expr) ((std::is_rvalue_reference::value \ + && std::is_move_assignable::value ? std::is_trivially_move_assignable::value \ + || std::is_nothrow_move_assignable::value \ + : std::is_trivially_copy_assignable::value \ + || std::is_nothrow_copy_assignable::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, \ + expr)) +#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900 +#define MOODYCAMEL_NOEXCEPT _NOEXCEPT +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, \ + expr) (std::is_rvalue_reference::value \ + && std::is_move_constructible::value ? std::is_trivially_move_constructible::value \ + || std::is_nothrow_move_constructible::value \ + : std::is_trivially_copy_constructible::value \ + || std::is_nothrow_copy_constructible::value) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, \ + expr) ((std::is_rvalue_reference::value \ + && std::is_move_assignable::value ? std::is_trivially_move_assignable::value \ + || std::is_nothrow_move_assignable::value \ + : std::is_trivially_copy_assignable::value \ + || std::is_nothrow_copy_assignable::value) && MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, \ + expr)) +#else +#define MOODYCAMEL_NOEXCEPT noexcept +#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr) +#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr) +#endif +#endif + +#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#ifdef MCDBGQ_USE_RELACY +#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#else +// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445 +// g++ <=4.7 doesn't support thread_local either. +// Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work +#if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) \ + && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) \ + && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) +// Assume `thread_local` is fully supported in all other C++11 compilers/platforms +#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // tentatively enabled for now; years ago several users report having problems with it on +#endif +#endif +#endif + +// VS2012 doesn't support deleted functions. +// In this case, we declare the function normally but don't define it. A link error will be generated if the function is called. +#ifndef MOODYCAMEL_DELETE_FUNCTION +#if defined(_MSC_VER) && _MSC_VER < 1800 +#define MOODYCAMEL_DELETE_FUNCTION +#else +#define MOODYCAMEL_DELETE_FUNCTION = delete +#endif +#endif + +namespace moodycamel { +namespace details { +#ifndef MOODYCAMEL_ALIGNAS +// VS2013 doesn't support alignas or alignof, and align() requires a constant literal +#if defined(_MSC_VER) && _MSC_VER <= 1800 +#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment)) +#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) typename details::Vs2013Aligned::value, T>::type +template struct Vs2013Aligned { }; // default, unsupported alignment +template struct Vs2013Aligned<1, T> { + typedef __declspec(align (1)) T type; +}; +template struct Vs2013Aligned<2, T> { + typedef __declspec(align (2)) T type; +}; +template struct Vs2013Aligned<4, T> { + typedef __declspec(align (4)) T type; +}; +template struct Vs2013Aligned<8, T> { + typedef __declspec(align (8)) T type; +}; +template struct Vs2013Aligned<16, T> { + typedef __declspec(align (16)) T type; +}; +template struct Vs2013Aligned<32, T> { + typedef __declspec(align (32)) T type; +}; +template struct Vs2013Aligned<64, T> { + typedef __declspec(align (64)) T type; +}; +template struct Vs2013Aligned<128, T> { + typedef __declspec(align (128)) T type; +}; +template struct Vs2013Aligned<256, T> { + typedef __declspec(align (256)) T type; +}; +#else +template struct identity { + typedef T type; +}; +#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment) +#define MOODYCAMEL_ALIGNOF(obj) alignof(obj) +#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) alignas(alignof(obj)) typename details::identity::type +#endif +#endif +} +} + +// TSAN can false report races in lock-free code. To enable TSAN to be used from projects that use this one, +// we can apply per-function compile-time suppression. +// See https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer +#define MOODYCAMEL_NO_TSAN +#if defined(__has_feature) + #if __has_feature(thread_sanitizer) + #undef MOODYCAMEL_NO_TSAN + #define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread"))) + #endif // TSAN +#endif // TSAN + +// Compiler-specific likely/unlikely hints +namespace moodycamel { +namespace details { +#if defined(__GNUC__) +static inline bool(likely)(bool x) { + return __builtin_expect((x), true); +} +static inline bool(unlikely)(bool x) { + return __builtin_expect((x), false); +} +#else +static inline bool(likely)(bool x) { + return x; +} +static inline bool(unlikely)(bool x) { + return x; +} +#endif +} +} + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG +#include "internal/concurrentqueue_internal_debug.h" +#endif + +namespace moodycamel { +namespace details { +template +struct const_numeric_max { + static_assert(std::is_integral::value, "const_numeric_max can only be used with integers"); + static const T value = std::numeric_limits::is_signed + ? (static_cast(1) << (sizeof(T) * CHAR_BIT - 1)) - static_cast(1) + : static_cast(-1); +}; + +#if defined(__GLIBCXX__) +typedef ::max_align_t std_max_align_t; // libstdc++ forgot to add it to std:: for a while +#else +typedef std::max_align_t std_max_align_t; // Others (e.g. MSVC) insist it can *only* be accessed via std:: +#endif + +// Some platforms have incorrectly set max_align_t to a type with <8 bytes alignment even while supporting +// 8-byte aligned scalar values (*cough* 32-bit iOS). Work around this with our own union. See issue #64. +typedef union { + std_max_align_t x; + long long y; + void* z; +} max_align_t; +} + +// Default traits for the ConcurrentQueue. To change some of the +// traits without re-implementing all of them, inherit from this +// struct and shadow the declarations you wish to be different; +// since the traits are used as a template type parameter, the +// shadowed declarations will be used where defined, and the defaults +// otherwise. +struct ConcurrentQueueDefaultTraits +{ + // General-purpose size type. std::size_t is strongly recommended. + typedef std::size_t size_t; + + // The type used for the enqueue and dequeue indices. Must be at least as + // large as size_t. Should be significantly larger than the number of elements + // you expect to hold at once, especially if you have a high turnover rate; + // for example, on 32-bit x86, if you expect to have over a hundred million + // elements or pump several million elements through your queue in a very + // short space of time, using a 32-bit type *may* trigger a race condition. + // A 64-bit int type is recommended in that case, and in practice will + // prevent a race condition no matter the usage of the queue. Note that + // whether the queue is lock-free with a 64-int type depends on the whether + // std::atomic is lock-free, which is platform-specific. + typedef std::size_t index_t; + + // Internally, all elements are enqueued and dequeued from multi-element + // blocks; this is the smallest controllable unit. If you expect few elements + // but many producers, a smaller block size should be favoured. For few producers + // and/or many elements, a larger block size is preferred. A sane default + // is provided. Must be a power of 2. + static const size_t BLOCK_SIZE = 32; + + // For explicit producers (i.e. when using a producer token), the block is + // checked for being empty by iterating through a list of flags, one per element. + // For large block sizes, this is too inefficient, and switching to an atomic + // counter-based approach is faster. The switch is made for block sizes strictly + // larger than this threshold. + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32; + + // How many full blocks can be expected for a single explicit producer? This should + // reflect that number's maximum for optimal performance. Must be a power of 2. + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32; + + // How many full blocks can be expected for a single implicit producer? This should + // reflect that number's maximum for optimal performance. Must be a power of 2. + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32; + + // The initial size of the hash table mapping thread IDs to implicit producers. + // Note that the hash is resized every time it becomes half full. + // Must be a power of two, and either 0 or at least 1. If 0, implicit production + // (using the enqueue methods without an explicit producer token) is disabled. + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32; + + // Controls the number of items that an explicit consumer (i.e. one with a token) + // must consume before it causes all consumers to rotate and move on to the next + // internal queue. + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = 256; + + // The maximum number of elements (inclusive) that can be enqueued to a sub-queue. + // Enqueue operations that would cause this limit to be surpassed will fail. Note + // that this limit is enforced at the block level (for performance reasons), i.e. + // it's rounded up to the nearest block size. + static const size_t MAX_SUBQUEUE_SIZE = details::const_numeric_max::value; + + // The number of times to spin before sleeping when waiting on a semaphore. + // Recommended values are on the order of 1000-10000 unless the number of + // consumer threads exceeds the number of idle cores (in which case try 0-100). + // Only affects instances of the BlockingConcurrentQueue. + static const int MAX_SEMA_SPINS = 10000; + + // Whether to recycle dynamically-allocated blocks into an internal free list or + // not. If false, only pre-allocated blocks (controlled by the constructor + // arguments) will be recycled, and all others will be `free`d back to the heap. + // Note that blocks consumed by explicit producers are only freed on destruction + // of the queue (not following destruction of the token) regardless of this trait. + static const bool RECYCLE_ALLOCATED_BLOCKS = false; + +#ifndef MCDBGQ_USE_RELACY + // Memory allocation can be customized if needed. + // malloc should return nullptr on failure, and handle alignment like std::malloc. +#if defined(malloc) || defined(free) + // Gah, this is 2015, stop defining macros that break standard code already! + // Work around malloc/free being special macros: + static inline void* WORKAROUND_malloc(size_t size) { return malloc(size); } + static inline void WORKAROUND_free(void* ptr) { return free(ptr); } + static inline void* (malloc)(size_t size) { + return WORKAROUND_malloc(size); + } + static inline void(free)(void* ptr) { + return WORKAROUND_free(ptr); + } +#else + static inline void* malloc(size_t size) { return std::malloc(size); } + static inline void free(void* ptr) { return std::free(ptr); } +#endif +#else + // Debug versions when running under the Relacy race detector (ignore + // these in user code) + static inline void* malloc(size_t size) { return rl::rl_malloc(size, $); } + static inline void free(void* ptr) { return rl::rl_free(ptr, $); } +#endif +}; + +// When producing or consuming many elements, the most efficient way is to: +// 1) Use one of the bulk-operation methods of the queue with a token +// 2) Failing that, use the bulk-operation methods without a token +// 3) Failing that, create a token and use that with the single-item methods +// 4) Failing that, use the single-parameter methods of the queue +// Having said that, don't create tokens willy-nilly -- ideally there should be +// a maximum of one token per thread (of each kind). +struct ProducerToken; +struct ConsumerToken; + +template class ConcurrentQueue; +template class BlockingConcurrentQueue; +class ConcurrentQueueTests; + +namespace details { +struct ConcurrentQueueProducerTypelessBase +{ + ConcurrentQueueProducerTypelessBase* next; + std::atomic inactive; + ProducerToken* token; + + ConcurrentQueueProducerTypelessBase() + : next(nullptr), inactive(false), token(nullptr) + { + } +}; + +template struct _hash_32_or_64 { + static inline std::uint32_t hash(std::uint32_t h) + { + // MurmurHash3 finalizer -- see https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp + // Since the thread ID is already unique, all we really want to do is propagate that + // uniqueness evenly across all the bits, so that we can use a subset of the bits while + // reducing collisions significantly + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + return h ^ (h >> 16); + } +}; +template<> struct _hash_32_or_64<1> { + static inline std::uint64_t hash(std::uint64_t h) + { + h ^= h >> 33; + h *= 0xff51afd7ed558ccd; + h ^= h >> 33; + h *= 0xc4ceb9fe1a85ec53; + return h ^ (h >> 33); + } +}; +template struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> { }; + +static inline size_t hash_thread_id(thread_id_t id) +{ + static_assert(sizeof(thread_id_t) <= 8, "Expected a platform where thread IDs are at most 64-bit values"); + return static_cast(hash_32_or_64::thread_id_hash_t)>::hash( + thread_id_converter::prehash(id))); +} + +template +static inline bool circular_less_than(T a, T b) +{ + static_assert(std::is_integral::value && !std::numeric_limits::is_signed, + "circular_less_than is intended to be used only with unsigned integer types"); + return static_cast(a - b) > static_cast(static_cast(1) << (static_cast(sizeof(T) * CHAR_BIT - 1))); + // Note: extra parens around rhs of operator<< is MSVC bug: https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931 + // silencing the bug requires #pragma warning(disable: 4554) around the calling code and has no effect when done here. +} + +template +static inline char* align_for(char* ptr) +{ + const std::size_t alignment = std::alignment_of::value; + return ptr + (alignment - (reinterpret_cast(ptr) % alignment)) % alignment; +} + +template +static inline T ceil_to_pow_2(T x) +{ + static_assert(std::is_integral::value && !std::numeric_limits::is_signed, + "ceil_to_pow_2 is intended to be used only with unsigned integer types"); + + // Adapted from http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + --x; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + for (std::size_t i = 1; i < sizeof(T); i <<= 1) { + x |= x >> (i << 3); + } + ++x; + return x; +} + +template +static inline void swap_relaxed(std::atomic& left, std::atomic& right) +{ + T temp = std::move(left.load(std::memory_order_relaxed)); + left.store(std::move(right.load(std::memory_order_relaxed)), std::memory_order_relaxed); + right.store(std::move(temp), std::memory_order_relaxed); +} + +template +static inline T const& nomove(T const& x) +{ + return x; +} + +template +struct nomove_if +{ + template + static inline T const& eval(T const& x) + { + return x; + } +}; + +template<> +struct nomove_if +{ + template + static inline auto eval(U&& x) + -> decltype(std::forward(x)) + { + return std::forward(x); + } +}; + +template +static inline auto deref_noexcept(It& it) MOODYCAMEL_NOEXCEPT->decltype(*it) +{ + return *it; +} + +#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) +template struct is_trivially_destructible : std::is_trivially_destructible { }; +#else +template struct is_trivially_destructible : std::has_trivial_destructor { }; +#endif + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED +#ifdef MCDBGQ_USE_RELACY +typedef RelacyThreadExitListener ThreadExitListener; +typedef RelacyThreadExitNotifier ThreadExitNotifier; +#else +class ThreadExitNotifier; + +struct ThreadExitListener +{ + typedef void (*callback_t)(void*); + callback_t callback; + void* userData; + + ThreadExitListener* next; // reserved for use by the ThreadExitNotifier + ThreadExitNotifier* chain; // reserved for use by the ThreadExitNotifier +}; + +class ThreadExitNotifier +{ +public: + static void subscribe(ThreadExitListener* listener) + { + auto& tlsInst = instance(); + std::lock_guard guard(mutex()); + listener->next = tlsInst.tail; + listener->chain = &tlsInst; + tlsInst.tail = listener; + } + + static void unsubscribe(ThreadExitListener* listener) + { + std::lock_guard guard(mutex()); + if (!listener->chain) { + return; // race with ~ThreadExitNotifier + } + auto& tlsInst = *listener->chain; + listener->chain = nullptr; + ThreadExitListener** prev = &tlsInst.tail; + for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) { + if (ptr == listener) { + *prev = ptr->next; + break; + } + prev = &ptr->next; + } + } + +private: + ThreadExitNotifier() + : tail(nullptr) { } + ThreadExitNotifier(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; + ThreadExitNotifier& operator=(ThreadExitNotifier const&) MOODYCAMEL_DELETE_FUNCTION; + + ~ThreadExitNotifier() + { + // This thread is about to exit, let everyone know! + assert(this == &instance() + && + "If this assert fails, you likely have a buggy compiler! Change the preprocessor conditions such that MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined."); + std::lock_guard guard(mutex()); + for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) { + ptr->chain = nullptr; + ptr->callback(ptr->userData); + } + } + + // Thread-local + static inline ThreadExitNotifier& instance() + { + static thread_local ThreadExitNotifier notifier; + return notifier; + } + + static inline std::mutex& mutex() + { + // Must be static because the ThreadExitNotifier could be destroyed while unsubscribe is called + static std::mutex mutex; + return mutex; + } + +private: + ThreadExitListener* tail; +}; +#endif +#endif + +template struct static_is_lock_free_num { + enum { + value = 0 + }; +}; +template<> struct static_is_lock_free_num { + enum { + value = ATOMIC_CHAR_LOCK_FREE + }; +}; +template<> struct static_is_lock_free_num { + enum { + value = ATOMIC_SHORT_LOCK_FREE + }; +}; +template<> struct static_is_lock_free_num { + enum { + value = ATOMIC_INT_LOCK_FREE + }; +}; +template<> struct static_is_lock_free_num { + enum { + value = ATOMIC_LONG_LOCK_FREE + }; +}; +template<> struct static_is_lock_free_num { + enum { + value = ATOMIC_LLONG_LOCK_FREE + }; +}; +template struct static_is_lock_free : static_is_lock_free_num::type> { }; +template<> struct static_is_lock_free { + enum { + value = ATOMIC_BOOL_LOCK_FREE + }; +}; +template struct static_is_lock_free { + enum { + value = ATOMIC_POINTER_LOCK_FREE + }; +}; +} + +struct ProducerToken +{ + template + explicit ProducerToken(ConcurrentQueue& queue); + + template + explicit ProducerToken(BlockingConcurrentQueue& queue); + + ProducerToken(ProducerToken&& other) MOODYCAMEL_NOEXCEPT + : producer(other.producer) + { + other.producer = nullptr; + if (producer != nullptr) { + producer->token = this; + } + } + + inline ProducerToken& operator=(ProducerToken&& other) MOODYCAMEL_NOEXCEPT + { + swap(other); + return *this; + } + + void swap(ProducerToken& other) MOODYCAMEL_NOEXCEPT + { + std::swap(producer, other.producer); + if (producer != nullptr) { + producer->token = this; + } + if (other.producer != nullptr) { + other.producer->token = &other; + } + } + + // A token is always valid unless: + // 1) Memory allocation failed during construction + // 2) It was moved via the move constructor + // (Note: assignment does a swap, leaving both potentially valid) + // 3) The associated queue was destroyed + // Note that if valid() returns true, that only indicates + // that the token is valid for use with a specific queue, + // but not which one; that's up to the user to track. + inline bool valid() const { return producer != nullptr; } + + ~ProducerToken() + { + if (producer != nullptr) { + producer->token = nullptr; + producer->inactive.store(true, std::memory_order_release); + } + } + + // Disable copying and assignment + ProducerToken(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; + ProducerToken& operator=(ProducerToken const&) MOODYCAMEL_DELETE_FUNCTION; + +private: + template friend class ConcurrentQueue; + friend class ConcurrentQueueTests; + +protected: + details::ConcurrentQueueProducerTypelessBase* producer; +}; + +struct ConsumerToken +{ + template + explicit ConsumerToken(ConcurrentQueue& q); + + template + explicit ConsumerToken(BlockingConcurrentQueue& q); + + ConsumerToken(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT + : initialOffset(other.initialOffset), lastKnownGlobalOffset(other.lastKnownGlobalOffset), + itemsConsumedFromCurrent(other.itemsConsumedFromCurrent), currentProducer(other.currentProducer), + desiredProducer(other.desiredProducer) + { + } + + inline ConsumerToken& operator=(ConsumerToken&& other) MOODYCAMEL_NOEXCEPT + { + swap(other); + return *this; + } + + void swap(ConsumerToken& other) MOODYCAMEL_NOEXCEPT + { + std::swap(initialOffset, other.initialOffset); + std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset); + std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent); + std::swap(currentProducer, other.currentProducer); + std::swap(desiredProducer, other.desiredProducer); + } + + // Disable copying and assignment + ConsumerToken(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; + ConsumerToken& operator=(ConsumerToken const&) MOODYCAMEL_DELETE_FUNCTION; + +private: + template friend class ConcurrentQueue; + friend class ConcurrentQueueTests; + +private: // but shared with ConcurrentQueue + std::uint32_t initialOffset; + std::uint32_t lastKnownGlobalOffset; + std::uint32_t itemsConsumedFromCurrent; + details::ConcurrentQueueProducerTypelessBase* currentProducer; + details::ConcurrentQueueProducerTypelessBase* desiredProducer; +}; + +// Need to forward-declare this swap because it's in a namespace. +// See http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces +template +inline void swap(typename ConcurrentQueue::ImplicitProducerKVP& a, + typename ConcurrentQueue::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT; + +template +class ConcurrentQueue +{ +public: + typedef ::moodycamel::ProducerToken producer_token_t; + typedef ::moodycamel::ConsumerToken consumer_token_t; + + typedef typename Traits::index_t index_t; + typedef typename Traits::size_t size_t; + + static const size_t BLOCK_SIZE = static_cast(Traits::BLOCK_SIZE); + static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = static_cast(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD); + static const size_t EXPLICIT_INITIAL_INDEX_SIZE = static_cast(Traits::EXPLICIT_INITIAL_INDEX_SIZE); + static const size_t IMPLICIT_INITIAL_INDEX_SIZE = static_cast(Traits::IMPLICIT_INITIAL_INDEX_SIZE); + static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = static_cast(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE); + static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE + = static_cast(Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE); +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4307) // + integral constant overflow (that's what the ternary expression is for!) +#pragma warning(disable: 4309) // static_cast: Truncation of constant value +#endif + static const size_t MAX_SUBQUEUE_SIZE = (details::const_numeric_max::value - static_cast(Traits::MAX_SUBQUEUE_SIZE) + < BLOCK_SIZE) ? details::const_numeric_max::value + : ((static_cast(Traits::MAX_SUBQUEUE_SIZE) + (BLOCK_SIZE - 1)) / BLOCK_SIZE + * BLOCK_SIZE); +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + static_assert(!std::numeric_limits::is_signed && std::is_integral::value, + "Traits::size_t must be an unsigned integral type"); + static_assert(!std::numeric_limits::is_signed && std::is_integral::value, + "Traits::index_t must be an unsigned integral type"); + static_assert(sizeof(index_t) >= sizeof(size_t), "Traits::index_t must be at least as wide as Traits::size_t"); + static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)), "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)"); + static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) + && !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD & (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)), + "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a power of 2 (and greater than 1)"); + static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) && !(EXPLICIT_INITIAL_INDEX_SIZE & (EXPLICIT_INITIAL_INDEX_SIZE - 1)), + "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)"); + static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) && !(IMPLICIT_INITIAL_INDEX_SIZE & (IMPLICIT_INITIAL_INDEX_SIZE - 1)), + "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and greater than 1)"); + static_assert((INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) + || !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE & (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)), + "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2"); + static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 || INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1, + "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least 1 (or 0 to disable implicit enqueueing)"); + +public: + // Creates a queue with at least `capacity` element slots; note that the + // actual number of elements that can be inserted without additional memory + // allocation depends on the number of producers and the block size (e.g. if + // the block size is equal to `capacity`, only a single block will be allocated + // up-front, which means only a single producer will be able to enqueue elements + // without an extra allocation -- blocks aren't shared between producers). + // This method is not thread safe -- it is up to the user to ensure that the + // queue is fully constructed before it starts being used by other threads (this + // includes making the memory effects of construction visible, possibly with a + // memory barrier). + explicit ConcurrentQueue(size_t capacity = 32* BLOCK_SIZE) + : producerListTail(nullptr), + producerCount(0), + initialBlockPoolIndex(0), + nextExplicitConsumerId(0), + globalExplicitConsumerOffset(0) + { + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + populate_initial_block_list(capacity / BLOCK_SIZE + ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1)); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + // Track all the producers using a fully-resolved typed list for + // each kind; this makes it possible to debug them starting from + // the root queue object (otherwise wacky casts are needed that + // don't compile in the debugger's expression evaluator). + explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + } + + // Computes the correct amount of pre-allocated blocks for you based + // on the minimum number of elements you want available at any given + // time, and the maximum concurrent number of each type of producer. + ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers) + : producerListTail(nullptr), + producerCount(0), + initialBlockPoolIndex(0), + nextExplicitConsumerId(0), + globalExplicitConsumerOffset(0) + { + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) * (maxExplicitProducers + 1) + 2 + * (maxExplicitProducers + maxImplicitProducers); + populate_initial_block_list(blocks); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + } + + // Note: The queue should not be accessed concurrently while it's + // being deleted. It's up to the user to synchronize this. + // This method is not thread safe. + ~ConcurrentQueue() + { + // Destroy producers + auto ptr = producerListTail.load(std::memory_order_relaxed); + while (ptr != nullptr) { + auto next = ptr->next_prod(); + if (ptr->token != nullptr) { + ptr->token->producer = nullptr; + } + destroy(ptr); + ptr = next; + } + + // Destroy implicit producer hash tables + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) { + auto hash = implicitProducerHash.load(std::memory_order_relaxed); + while (hash != nullptr) { + auto prev = hash->prev; + if (prev != nullptr) { // The last hash is part of this object and was not allocated dynamically + for (size_t i = 0; i != hash->capacity; ++i) { + hash->entries[i].~ImplicitProducerKVP(); + } + hash->~ImplicitProducerHash(); + (Traits::free)(hash); + } + hash = prev; + } + } + + // Destroy global free list + auto block = freeList.head_unsafe(); + while (block != nullptr) { + auto next = block->freeListNext.load(std::memory_order_relaxed); + if (block->dynamicallyAllocated) { + destroy(block); + } + block = next; + } + + // Destroy initial free list + destroy_array(initialBlockPool, initialBlockPoolSize); + } + + // Disable copying and copy assignment + ConcurrentQueue(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + ConcurrentQueue& operator=(ConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION; + + // Moving is supported, but note that it is *not* a thread-safe operation. + // Nobody can use the queue while it's being moved, and the memory effects + // of that move must be propagated to other threads before they can use it. + // Note: When a queue is moved, its tokens are still valid but can only be + // used with the destination queue (i.e. semantically they are moved along + // with the queue itself). + ConcurrentQueue(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + : producerListTail(other.producerListTail.load(std::memory_order_relaxed)), + producerCount(other.producerCount.load(std::memory_order_relaxed)), + initialBlockPoolIndex(other.initialBlockPoolIndex.load(std::memory_order_relaxed)), + initialBlockPool(other.initialBlockPool), + initialBlockPoolSize(other.initialBlockPoolSize), + freeList(std::move(other.freeList)), + nextExplicitConsumerId(other.nextExplicitConsumerId.load(std::memory_order_relaxed)), + globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(std::memory_order_relaxed)) + { + // Move the other one into this, and leave the other one as an empty queue + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + populate_initial_implicit_producer_hash(); + swap_implicit_producer_hashes(other); + + other.producerListTail.store(nullptr, std::memory_order_relaxed); + other.producerCount.store(0, std::memory_order_relaxed); + other.nextExplicitConsumerId.store(0, std::memory_order_relaxed); + other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + explicitProducers.store(other.explicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.explicitProducers.store(nullptr, std::memory_order_relaxed); + implicitProducers.store(other.implicitProducers.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.implicitProducers.store(nullptr, std::memory_order_relaxed); +#endif + + other.initialBlockPoolIndex.store(0, std::memory_order_relaxed); + other.initialBlockPoolSize = 0; + other.initialBlockPool = nullptr; + + reown_producers(); + } + + inline ConcurrentQueue& operator=(ConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT + { + return swap_internal(other); + } + + // Swaps this queue's state with the other's. Not thread-safe. + // Swapping two queues does not invalidate their tokens, however + // the tokens that were created for one queue must be used with + // only the swapped queue (i.e. the tokens are tied to the + // queue's movable state, not the object itself). + inline void swap(ConcurrentQueue& other) MOODYCAMEL_NOEXCEPT + { + swap_internal(other); + } + +private: + ConcurrentQueue& swap_internal(ConcurrentQueue& other) + { + if (this == &other) { + return *this; + } + + details::swap_relaxed(producerListTail, other.producerListTail); + details::swap_relaxed(producerCount, other.producerCount); + details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex); + std::swap(initialBlockPool, other.initialBlockPool); + std::swap(initialBlockPoolSize, other.initialBlockPoolSize); + freeList.swap(other.freeList); + details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId); + details::swap_relaxed(globalExplicitConsumerOffset, other.globalExplicitConsumerOffset); + + swap_implicit_producer_hashes(other); + + reown_producers(); + other.reown_producers(); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + details::swap_relaxed(explicitProducers, other.explicitProducers); + details::swap_relaxed(implicitProducers, other.implicitProducers); +#endif + + return *this; + } + +public: + // Enqueues a single item (by copying it). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T const& item) + { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else { + return inner_enqueue(item); + } + } + + // Enqueues a single item (by moving it, if possible). + // Allocates memory if required. Only fails if memory allocation fails (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, + // or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(T&& item) + { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else { + return inner_enqueue(std::move(item)); + } + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T const& item) + { + return inner_enqueue(token, item); + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails (or + // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Thread-safe. + inline bool enqueue(producer_token_t const& token, T&& item) + { + return inner_enqueue(token, std::move(item)); + } + + // Enqueues several items. + // Allocates memory if required. Only fails if memory allocation fails (or + // implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved instead of copied. + // Thread-safe. + template + bool enqueue_bulk(It itemFirst, size_t count) + { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else { + return inner_enqueue_bulk(itemFirst, count); + } + } + + // Enqueues several items using an explicit producer token. + // Allocates memory if required. Only fails if memory allocation fails + // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + return inner_enqueue_bulk(token, itemFirst, count); + } + + // Enqueues a single item (by copying it). + // Does not allocate memory. Fails if not enough room to enqueue (or implicit + // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE + // is 0). + // Thread-safe. + inline bool try_enqueue(T const& item) + { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else { + return inner_enqueue(item); + } + } + + // Enqueues a single item (by moving it, if possible). + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Thread-safe. + inline bool try_enqueue(T&& item) + { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else { + return inner_enqueue(std::move(item)); + } + } + + // Enqueues a single item (by copying it) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T const& item) + { + return inner_enqueue(token, item); + } + + // Enqueues a single item (by moving it, if possible) using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Thread-safe. + inline bool try_enqueue(producer_token_t const& token, T&& item) + { + return inner_enqueue(token, std::move(item)); + } + + // Enqueues several items. + // Does not allocate memory (except for one-time implicit producer). + // Fails if not enough room to enqueue (or implicit production is + // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0). + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool try_enqueue_bulk(It itemFirst, size_t count) + { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) return false; + else { + return inner_enqueue_bulk(itemFirst, count); + } + } + + // Enqueues several items using an explicit producer token. + // Does not allocate memory. Fails if not enough room to enqueue. + // Note: Use std::make_move_iterator if the elements should be moved + // instead of copied. + // Thread-safe. + template + bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + return inner_enqueue_bulk(token, itemFirst, count); + } + + // Attempts to dequeue from the queue. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + bool try_dequeue(U& item) + { + // Instead of simply trying each producer in turn (which could cause needless contention on the first + // producer), we score them heuristically. + size_t nonEmptyCount = 0; + ProducerBase* best = nullptr; + size_t bestSize = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) { + auto size = ptr->size_approx(); + if (size > 0) { + if (size > bestSize) { + bestSize = size; + best = ptr; + } + ++nonEmptyCount; + } + } + + // If there was at least one non-empty queue but it appears empty at the time + // we try to dequeue from it, we need to make sure every queue's been tried + if (nonEmptyCount > 0) { + if ((details::likely)(best->dequeue(item))) { + return true; + } + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr != best && ptr->dequeue(item)) { + return true; + } + } + } + return false; + } + + // Attempts to dequeue from the queue. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // This differs from the try_dequeue(item) method in that this one does + // not attempt to reduce contention by interleaving the order that producer + // streams are dequeued from. So, using this method can reduce overall throughput + // under contention, but will give more predictable results in single-threaded + // consumer scenarios. This is mostly only useful for internal unit tests. + // Never allocates. Thread-safe. + template + bool try_dequeue_non_interleaved(U& item) + { + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr->dequeue(item)) { + return true; + } + } + return false; + } + + // Attempts to dequeue from the queue using an explicit consumer token. + // Returns false if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + bool try_dequeue(consumer_token_t& token, U& item) + { + // The idea is roughly as follows: + // Every 256 items from one producer, make everyone rotate (increase the global offset) -> this means the highest efficiency consumer dictates the rotation speed of everyone else, more or less + // If you see that the global offset has changed, you must reset your consumption counter and move to your designated place + // If there's no items where you're supposed to be, keep moving until you find a producer with some items + // If the global offset has not changed but you've run out of items to consume, move over from your current position until you find an producer with something in it + + if (token.desiredProducer == nullptr + || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { + if (!update_current_producer_after_rotation(token)) { + return false; + } + } + + // If there was at least one non-empty queue but it appears empty at the time + // we try to dequeue from it, we need to make sure every queue's been tried + if (static_cast(token.currentProducer)->dequeue(item)) { + if (++token.itemsConsumedFromCurrent == EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { + globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); + } + return true; + } + + auto tail = producerListTail.load(std::memory_order_acquire); + auto ptr = static_cast(token.currentProducer)->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + while (ptr != static_cast(token.currentProducer)) { + if (ptr->dequeue(item)) { + token.currentProducer = ptr; + token.itemsConsumedFromCurrent = 1; + return true; + } + ptr = ptr->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + } + return false; + } + + // Attempts to dequeue several elements from the queue. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + size_t try_dequeue_bulk(It itemFirst, size_t max) + { + size_t count = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + count += ptr->dequeue_bulk(itemFirst, max - count); + if (count == max) { + break; + } + } + return count; + } + + // Attempts to dequeue several elements from the queue using an explicit consumer token. + // Returns the number of items actually dequeued. + // Returns 0 if all producer streams appeared empty at the time they + // were checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max) + { + if (token.desiredProducer == nullptr + || token.lastKnownGlobalOffset != globalExplicitConsumerOffset.load(std::memory_order_relaxed)) { + if (!update_current_producer_after_rotation(token)) { + return 0; + } + } + + size_t count = static_cast(token.currentProducer)->dequeue_bulk(itemFirst, max); + if (count == max) { + if ((token.itemsConsumedFromCurrent += static_cast(max)) >= EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) { + globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed); + } + return max; + } + token.itemsConsumedFromCurrent += static_cast(count); + max -= count; + + auto tail = producerListTail.load(std::memory_order_acquire); + auto ptr = static_cast(token.currentProducer)->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + while (ptr != static_cast(token.currentProducer)) { + auto dequeued = ptr->dequeue_bulk(itemFirst, max); + count += dequeued; + if (dequeued != 0) { + token.currentProducer = ptr; + token.itemsConsumedFromCurrent = static_cast(dequeued); + } + if (dequeued == max) { + break; + } + max -= dequeued; + ptr = ptr->next_prod(); + if (ptr == nullptr) { + ptr = tail; + } + } + return count; + } + + // Attempts to dequeue from a specific producer's inner queue. + // If you happen to know which producer you want to dequeue from, this + // is significantly faster than using the general-case try_dequeue methods. + // Returns false if the producer's queue appeared empty at the time it + // was checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline bool try_dequeue_from_producer(producer_token_t const& producer, U& item) + { + return static_cast(producer.producer)->dequeue(item); + } + + // Attempts to dequeue several elements from a specific producer's inner queue. + // Returns the number of items actually dequeued. + // If you happen to know which producer you want to dequeue from, this + // is significantly faster than using the general-case try_dequeue methods. + // Returns 0 if the producer's queue appeared empty at the time it + // was checked (so, the queue is likely but not guaranteed to be empty). + // Never allocates. Thread-safe. + template + inline size_t try_dequeue_bulk_from_producer(producer_token_t const& producer, It itemFirst, size_t max) + { + return static_cast(producer.producer)->dequeue_bulk(itemFirst, max); + } + + // Returns an estimate of the total number of elements currently in the queue. This + // estimate is only accurate if the queue has completely stabilized before it is called + // (i.e. all enqueue and dequeue operations have completed and their memory effects are + // visible on the calling thread, and no further operations start while this method is + // being called). + // Thread-safe. + size_t size_approx() const + { + size_t size = 0; + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + size += ptr->size_approx(); + } + return size; + } + + // Returns true if the underlying atomic variables used by + // the queue are lock-free (they should be on most platforms). + // Thread-safe. + static constexpr bool is_lock_free() + { + return + details::static_is_lock_free::value == 2 + && details::static_is_lock_free::value == 2 + && details::static_is_lock_free::value == 2 + && details::static_is_lock_free::value == 2 + && details::static_is_lock_free::value == 2 + && details::static_is_lock_free::thread_id_numeric_size_t>::value + == 2; + } + +private: + friend struct ProducerToken; + friend struct ConsumerToken; + struct ExplicitProducer; + friend struct ExplicitProducer; + struct ImplicitProducer; + friend struct ImplicitProducer; + friend class ConcurrentQueueTests; + + enum AllocationMode { + CanAlloc, CannotAlloc + }; + + /////////////////////////////// + // Queue methods + /////////////////////////////// + + template + inline bool inner_enqueue(producer_token_t const& token, U&& element) + { + return static_cast(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue( + std::forward(element)); + } + + template + inline bool inner_enqueue(U&& element) + { + auto producer = get_or_add_implicit_producer(); + return producer + == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue(std::forward(element)); + } + + template + inline bool inner_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count) + { + return static_cast(token.producer)->ConcurrentQueue::ExplicitProducer::template enqueue_bulk(itemFirst, + count); + } + + template + inline bool inner_enqueue_bulk(It itemFirst, size_t count) + { + auto producer = get_or_add_implicit_producer(); + return producer == nullptr ? false : producer->ConcurrentQueue::ImplicitProducer::template enqueue_bulk(itemFirst, count); + } + + inline bool update_current_producer_after_rotation(consumer_token_t& token) + { + // Ah, there's been a rotation, figure out where we should be! + auto tail = producerListTail.load(std::memory_order_acquire); + if (token.desiredProducer == nullptr && tail == nullptr) { + return false; + } + auto prodCount = producerCount.load(std::memory_order_relaxed); + auto globalOffset = globalExplicitConsumerOffset.load(std::memory_order_relaxed); + if ((details::unlikely)(token.desiredProducer == nullptr)) { + // Aha, first time we're dequeueing anything. + // Figure out our local position + // Note: offset is from start, not end, but we're traversing from end -- subtract from count first + std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount); + token.desiredProducer = tail; + for (std::uint32_t i = 0; i != offset; ++i) { + token.desiredProducer = static_cast(token.desiredProducer)->next_prod(); + if (token.desiredProducer == nullptr) { + token.desiredProducer = tail; + } + } + } + + std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset; + if (delta >= prodCount) { + delta = delta % prodCount; + } + for (std::uint32_t i = 0; i != delta; ++i) { + token.desiredProducer = static_cast(token.desiredProducer)->next_prod(); + if (token.desiredProducer == nullptr) { + token.desiredProducer = tail; + } + } + + token.lastKnownGlobalOffset = globalOffset; + token.currentProducer = token.desiredProducer; + token.itemsConsumedFromCurrent = 0; + return true; + } + + /////////////////////////// + // Free list + /////////////////////////// + + template + struct FreeListNode + { + FreeListNode() + : freeListRefs(0), freeListNext(nullptr) { } + + std::atomic freeListRefs; + std::atomic freeListNext; + }; + + // A simple CAS-based lock-free free list. Not the fastest thing in the world under heavy contention, but + // simple and correct (assuming nodes are never freed until after the free list is destroyed), and fairly + // speedy under low contention. + template // N must inherit FreeListNode or have the same fields (and initialization of them) + struct FreeList + { + FreeList() + : freeListHead(nullptr) { } + FreeList(FreeList&& other) + : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) + { + other.freeListHead.store(nullptr, std::memory_order_relaxed); + } + + void swap(FreeList& other) { details::swap_relaxed(freeListHead, other.freeListHead); } + + FreeList(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; + FreeList& operator=(FreeList const&) MOODYCAMEL_DELETE_FUNCTION; + + inline void add(N* node) + { +#ifdef MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugLock lock(mutex); +#endif + // We know that the should-be-on-freelist bit is 0 at this point, so it's safe to + // set it using a fetch_add + if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST, std::memory_order_acq_rel) == 0) { + // Oh look! We were the last ones referencing this node, and we know + // we want to add it to the free list, so let's do it! + add_knowing_refcount_is_zero(node); + } + } + + inline N* try_get() + { +#ifdef MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugLock lock(mutex); +#endif + auto head = freeListHead.load(std::memory_order_acquire); + while (head != nullptr) { + auto prevHead = head; + auto refs = head->freeListRefs.load(std::memory_order_relaxed); + if ((refs & REFS_MASK) == 0 || !head->freeListRefs.compare_exchange_strong(refs, refs + 1, std::memory_order_acquire, + std::memory_order_relaxed)) { + head = freeListHead.load(std::memory_order_acquire); + continue; + } + + // Good, reference count has been incremented (it wasn't at zero), which means we can read the + // next and not worry about it changing between now and the time we do the CAS + auto next = head->freeListNext.load(std::memory_order_relaxed); + if (freeListHead.compare_exchange_strong(head, next, std::memory_order_acquire, std::memory_order_relaxed)) { + // Yay, got the node. This means it was on the list, which means shouldBeOnFreeList must be false no + // matter the refcount (because nobody else knows it's been taken off yet, it can't have been put back on). + assert((head->freeListRefs.load(std::memory_order_relaxed) & SHOULD_BE_ON_FREELIST) == 0); + + // Decrease refcount twice, once for our ref, and once for the list's ref + head->freeListRefs.fetch_sub(2, std::memory_order_release); + return head; + } + + // OK, the head must have changed on us, but we still need to decrease the refcount we increased. + // Note that we don't need to release any memory effects, but we do need to ensure that the reference + // count decrement happens-after the CAS on the head. + refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel); + if (refs == SHOULD_BE_ON_FREELIST + 1) { + add_knowing_refcount_is_zero(prevHead); + } + } + + return nullptr; + } + + // Useful for traversing the list when there's no contention (e.g. to destroy remaining nodes) + N* head_unsafe() const { return freeListHead.load(std::memory_order_relaxed); } + + private: + inline void add_knowing_refcount_is_zero(N* node) + { + // Since the refcount is zero, and nobody can increase it once it's zero (except us, and we run + // only one copy of this method per node at a time, i.e. the single thread case), then we know + // we can safely change the next pointer of the node; however, once the refcount is back above + // zero, then other threads could increase it (happens under heavy contention, when the refcount + // goes to zero in between a load and a refcount increment of a node in try_get, then back up to + // something non-zero, then the refcount increment is done by the other thread) -- so, if the CAS + // to add the node to the actual list fails, decrease the refcount and leave the add operation to + // the next thread who puts the refcount back at zero (which could be us, hence the loop). + auto head = freeListHead.load(std::memory_order_relaxed); + while (true) { + node->freeListNext.store(head, std::memory_order_relaxed); + node->freeListRefs.store(1, std::memory_order_release); + if (!freeListHead.compare_exchange_strong(head, node, std::memory_order_release, std::memory_order_relaxed)) { + // Hmm, the add failed, but we can only try again when the refcount goes back to zero + if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1, std::memory_order_release) == 1) { + continue; + } + } + return; + } + } + + private: + // Implemented like a stack, but where node order doesn't matter (nodes are inserted out of order under contention) + std::atomic freeListHead; + + static const std::uint32_t REFS_MASK = 0x7FFFFFFF; + static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000; + +#ifdef MCDBGQ_NOLOCKFREE_FREELIST + debug::DebugMutex mutex; +#endif + }; + + /////////////////////////// + // Block + /////////////////////////// + + enum InnerQueueContext { + implicit_context = 0, explicit_context = 1 + }; + + struct Block + { + Block() + : next(nullptr), elementsCompletelyDequeued(0), freeListRefs(0), freeListNext(nullptr), dynamicallyAllocated(true) + { +#ifdef MCDBGQ_TRACKMEM + owner = nullptr; +#endif + } + + template + inline bool is_empty() const + { + MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Check flags + for (size_t i = 0; i < BLOCK_SIZE; ++i) { + if (!emptyFlags[i].load(std::memory_order_relaxed)) { + return false; + } + } + + // Aha, empty; make sure we have all other memory effects that happened before the empty flags were set + std::atomic_thread_fence(std::memory_order_acquire); + return true; + } else { + // Check counter + if (elementsCompletelyDequeued.load(std::memory_order_relaxed) == BLOCK_SIZE) { + std::atomic_thread_fence(std::memory_order_acquire); + return true; + } + assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <= BLOCK_SIZE); + return false; + } + } + + // Returns true if the block is now empty (does not apply in explicit context) + template + inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i) + { + MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set flag + assert(!emptyFlags[BLOCK_SIZE - 1 + - static_cast(i & static_cast(BLOCK_SIZE - 1))].load(std::memory_order_relaxed)); + emptyFlags[BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1))].store(true, + std::memory_order_release); + return false; + } else { + // Increment counter + auto prevVal = elementsCompletelyDequeued.fetch_add(1, std::memory_order_release); + assert(prevVal < BLOCK_SIZE); + return prevVal == BLOCK_SIZE - 1; + } + } + + // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping and count > 0). + // Returns true if the block is now empty (does not apply in explicit context). + template + inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i, size_t count) + { + MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set flags + std::atomic_thread_fence(std::memory_order_release); + i = BLOCK_SIZE - 1 - static_cast(i & static_cast(BLOCK_SIZE - 1)) - count + 1; + for (size_t j = 0; j != count; ++j) { + assert(!emptyFlags[i + j].load(std::memory_order_relaxed)); + emptyFlags[i + j].store(true, std::memory_order_relaxed); + } + return false; + } else { + // Increment counter + auto prevVal = elementsCompletelyDequeued.fetch_add(count, std::memory_order_release); + assert(prevVal + count <= BLOCK_SIZE); + return prevVal + count == BLOCK_SIZE; + } + } + + template + inline void set_all_empty() + { + MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Set all flags + for (size_t i = 0; i != BLOCK_SIZE; ++i) { + emptyFlags[i].store(true, std::memory_order_relaxed); + } + } else { + // Reset counter + elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed); + } + } + + template + inline void reset_empty() + { + MOODYCAMEL_CONSTEXPR_IF(context == explicit_context && BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) { + // Reset flags + for (size_t i = 0; i != BLOCK_SIZE; ++i) { + emptyFlags[i].store(false, std::memory_order_relaxed); + } + } else { + // Reset counter + elementsCompletelyDequeued.store(0, std::memory_order_relaxed); + } + } + + inline T* operator[](index_t idx) MOODYCAMEL_NOEXCEPT + { + return static_cast(static_cast(elements)) + static_cast(idx & static_cast(BLOCK_SIZE - 1)); + } + + inline T const* operator[](index_t idx) const MOODYCAMEL_NOEXCEPT + { + return static_cast(static_cast(elements)) + + static_cast(idx & static_cast(BLOCK_SIZE - 1)); + } + + private: + static_assert(std::alignment_of::value <= sizeof(T), + "The queue does not support types with an alignment greater than their size at this time"); + MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements; + public: + Block* next; + std::atomic elementsCompletelyDequeued; + std::atomic emptyFlags[BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1]; + public: + std::atomic freeListRefs; + std::atomic freeListNext; + bool dynamicallyAllocated; // Perhaps a better name for this would be 'isNotPartOfInitialBlockPool' + +#ifdef MCDBGQ_TRACKMEM + void* owner; +#endif + }; + static_assert(std::alignment_of::value >= std::alignment_of::value, + "Internal error: Blocks must be at least as aligned as the type they are wrapping"); + +#ifdef MCDBGQ_TRACKMEM +public: + struct MemStats; +private: +#endif + + /////////////////////////// + // Producer base + /////////////////////////// + + struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase + { + ProducerBase(ConcurrentQueue* parent_, bool isExplicit_) + : tailIndex(0), + headIndex(0), + dequeueOptimisticCount(0), + dequeueOvercommit(0), + tailBlock(nullptr), + isExplicit(isExplicit_), + parent(parent_) + { + } + + virtual ~ProducerBase() { } + + template + inline bool dequeue(U& element) + { + if (isExplicit) { + return static_cast(this)->dequeue(element); + } else { + return static_cast(this)->dequeue(element); + } + } + + template + inline size_t dequeue_bulk(It& itemFirst, size_t max) + { + if (isExplicit) { + return static_cast(this)->dequeue_bulk(itemFirst, max); + } else { + return static_cast(this)->dequeue_bulk(itemFirst, max); + } + } + + inline ProducerBase* next_prod() const { return static_cast(next); } + + inline size_t size_approx() const + { + auto tail = tailIndex.load(std::memory_order_relaxed); + auto head = headIndex.load(std::memory_order_relaxed); + return details::circular_less_than(head, tail) ? static_cast(tail - head) : 0; + } + + inline index_t getTail() const { return tailIndex.load(std::memory_order_relaxed); } + protected: + std::atomic tailIndex; // Where to enqueue to next + std::atomic headIndex; // Where to dequeue from next + + std::atomic dequeueOptimisticCount; + std::atomic dequeueOvercommit; + + Block* tailBlock; + + public: + bool isExplicit; + ConcurrentQueue* parent; + + protected: +#ifdef MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + /////////////////////////// + // Explicit queue + /////////////////////////// + + struct ExplicitProducer : public ProducerBase + { + explicit ExplicitProducer(ConcurrentQueue* parent_) + : ProducerBase(parent_, true), + blockIndex(nullptr), + pr_blockIndexSlotsUsed(0), + pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1), + pr_blockIndexFront(0), + pr_blockIndexEntries(nullptr), + pr_blockIndexRaw(nullptr) + { + size_t poolBasedIndexSize = details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1; + if (poolBasedIndexSize > pr_blockIndexSize) { + pr_blockIndexSize = poolBasedIndexSize; + } + + new_block_index(0); // This creates an index with double the number of current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE + } + + ~ExplicitProducer() + { + // Destruct any elements not yet dequeued. + // Since we're in the destructor, we can assume all elements + // are either completely dequeued or completely not (no halfways). + if (this->tailBlock != nullptr) { // Note this means there must be a block index too + // First find the block that's partially dequeued, if any + Block* halfDequeuedBlock = nullptr; + if ((this->headIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)) != 0) { + // The head's not on a block boundary, meaning a block somewhere is partially dequeued + // (or the head block is the tail block and was fully dequeued, but the head/tail are still not on a boundary) + size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & (pr_blockIndexSize - 1); + while (details::circular_less_than(pr_blockIndexEntries[i].base + BLOCK_SIZE, + this->headIndex.load(std::memory_order_relaxed))) { + i = (i + 1) & (pr_blockIndexSize - 1); + } + assert(details::circular_less_than(pr_blockIndexEntries[i].base, + this->headIndex.load(std::memory_order_relaxed))); + halfDequeuedBlock = pr_blockIndexEntries[i].block; + } + + // Start at the head block (note the first line in the loop gives us the head from the tail on the first iteration) + auto block = this->tailBlock; + do{ + block = block->next; + if (block->ConcurrentQueue::Block::template is_empty()) { + continue; + } + + size_t i = 0; // Offset into block + if (block == halfDequeuedBlock) { + i = static_cast(this->headIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)); + } + + // Walk through all the items in the block; if this is the tail block, we need to stop when we reach the tail index + auto lastValidIndex = (this->tailIndex.load(std::memory_order_relaxed) & static_cast(BLOCK_SIZE - 1)) + == 0 ? BLOCK_SIZE : static_cast(this->tailIndex.load(std::memory_order_relaxed) + & static_cast(BLOCK_SIZE - 1)); + while (i != BLOCK_SIZE && (block != this->tailBlock || i != lastValidIndex)) { + (*block)[i++]->~T(); + } + } while (block != this->tailBlock); + } + + // Destroy all blocks that we own + if (this->tailBlock != nullptr) { + auto block = this->tailBlock; + do{ + auto nextBlock = block->next; + this->parent->add_block_to_free_list(block); + block = nextBlock; + } while (block != this->tailBlock); + } + + // Destroy the block indices + auto header = static_cast(pr_blockIndexRaw); + while (header != nullptr) { + auto prev = static_cast(header->prev); + header->~BlockIndexHeader(); + (Traits::free)(header); + header = prev; + } + } + + template + inline bool enqueue(U&& element) + { + index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); + index_t newTailIndex = 1 + currentTailIndex; + if ((currentTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + // We reached the end of a block, start a new one + auto startBlock = this->tailBlock; + auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; + if (this->tailBlock != nullptr && this->tailBlock->next->ConcurrentQueue::Block::template is_empty()) { + // We can re-use the block ahead of us, it's empty! + this->tailBlock = this->tailBlock->next; + this->tailBlock->ConcurrentQueue::Block::template reset_empty(); + + // We'll put the block on the block index (guaranteed to be room since we're conceptually removing the + // last block from it first -- except instead of removing then adding, we can just overwrite). + // Note that there must be a valid block index here, since even if allocation failed in the ctor, + // it would have been re-attempted when adding the first block to the queue; since there is such + // a block, a block index must have been successfully allocated. + } else { + // Whatever head value we see here is >= the last value we saw here (relatively), + // and <= its current value. Since we have the most recent tail, the head must be + // <= to it. + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + if (!details::circular_less_than(head, currentTailIndex + BLOCK_SIZE) + || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value + && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { + // We can't enqueue in another block because there's not enough leeway -- the + // tail could surpass the head by the time the block fills up! (Or we'll exceed + // the size limit, if the second part of the condition was true.) + return false; + } + // We're going to need a new block; check that the block index has room + if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize) { + // Hmm, the circular block index is already full -- we'll need + // to allocate a new index. Note pr_blockIndexRaw can only be nullptr if + // the initial allocation failed in the constructor. + + MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) { + return false; + } else if (!new_block_index(pr_blockIndexSlotsUsed)) { + return false; + } + } + + // Insert a new block in the circular linked list + auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + return false; + } +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty(); + if (this->tailBlock == nullptr) { + newBlock->next = newBlock; + } else { + newBlock->next = this->tailBlock->next; + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + ++pr_blockIndexSlotsUsed; + } + + MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { + // The constructor may throw. We want the element not to appear in the queue in + // that case (without corrupting the queue): + MOODYCAMEL_TRY { + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + } + MOODYCAMEL_CATCH(...) { + // Revert change to the current block, but leave the new block available + // for next time + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? this->tailBlock : startBlock; + MOODYCAMEL_RETHROW; + } + } else { + (void)startBlock; + (void)originalBlockIndexSlotsUsed; + } + + // Add block to block index + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + blockIndex.load(std::memory_order_relaxed)->front.store(pr_blockIndexFront, std::memory_order_release); + pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + + MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + } + + // Enqueue + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + bool dequeue(U& element) + { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + if (details::circular_less_than(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { + // Might be something to dequeue, let's give it a try + + // Note that this if is purely for performance purposes in the common case when the queue is + // empty and the values are eventually consistent -- we may enter here spuriously. + + // Note that whatever the values of overcommit and tail are, they are not going to change (unless we + // change them) and must be the same value at this point (inside the if) as when the if condition was + // evaluated. + + // We insert an acquire fence here to synchronize-with the release upon incrementing dequeueOvercommit below. + // This ensures that whatever the value we got loaded into overcommit, the load of dequeueOptisticCount in + // the fetch_add below will result in a value at least as recent as that (and therefore at least as large). + // Note that I believe a compiler (signal) fence here would be sufficient due to the nature of fetch_add (all + // read-modify-write operations are guaranteed to work on the latest value in the modification order), but + // unfortunately that can't be shown to be correct using only the C++11 standard. + // See http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case + std::atomic_thread_fence(std::memory_order_acquire); + + // Increment optimistic counter, then check if it went over the boundary + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); + + // Note that since dequeueOvercommit must be <= dequeueOptimisticCount (because dequeueOvercommit is only ever + // incremented after dequeueOptimisticCount -- this is enforced in the `else` block below), and since we now + // have a version of dequeueOptimisticCount that is at least as recent as overcommit (due to the release upon + // incrementing dequeueOvercommit and the acquire above that synchronizes with it), overcommit <= myDequeueCount. + // However, we can't assert this since both dequeueOptimisticCount and dequeueOvercommit may (independently) + // overflow; in such a case, though, the logic still holds since the difference between the two is maintained. + + // Note that we reload tail here in case it changed; it will be the same value as before or greater, since + // this load is sequenced after (happens after) the earlier load above. This is supported by read-read + // coherency (as defined in the standard), explained here: http://en.cppreference.com/w/cpp/atomic/memory_order + tail = this->tailIndex.load(std::memory_order_acquire); + if ((details::likely)(details::circular_less_than(myDequeueCount - overcommit, tail))) { + // Guaranteed to be at least one element to dequeue! + + // Get the index. Note that since there's guaranteed to be at least one element, this + // will never exceed tail. We need to do an acquire-release fence here since it's possible + // that whatever condition got us to this point was for an earlier enqueued element (that + // we already see the memory effects for), but that by the time we increment somebody else + // has incremented it, and we need to see the memory effects for *that* element, which is + // in such a case is necessarily visible on the thread that incremented it in the first + // place with the more current condition (they must have acquired a tail that is at least + // as recent). + auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); + + // Determine which block the element is in + + auto localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); + + // We need to be careful here about subtracting and dividing because of index wrap-around. + // When an index wraps, we need to preserve the sign of the offset when dividing it by the + // block size (in order to get a correct signed block count offset in all cases): + auto headBase = localBlockIndex->entries[localBlockIndexHead].base; + auto blockBaseIndex = index & ~static_cast(BLOCK_SIZE - 1); + auto offset = static_cast(static_cast::type>(blockBaseIndex - headBase) + / static_cast::type>(BLOCK_SIZE)); + auto block = localBlockIndex->entries[(localBlockIndexHead + offset) & (localBlockIndex->size - 1)].block; + + // Dequeue + auto& el = *((*block)[index]); + if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, element = std::move(el))) { + // Make sure the element is still fully dequeued and destroyed even if the assignment + // throws + struct Guard { + Block* block; + index_t index; + + ~Guard() + { + (*block)[index]->~T(); + block->ConcurrentQueue::Block::template set_empty(index); + } + } guard = { block, index }; + + element = std::move(el); // NOLINT + } else { + element = std::move(el); // NOLINT + el.~T(); // NOLINT + block->ConcurrentQueue::Block::template set_empty(index); + } + + return true; + } else { + // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent + this->dequeueOvercommit.fetch_add(1, std::memory_order_release); // Release so that the fetch_add on dequeueOptimisticCount is guaranteed to happen before this write + } + } + + return false; + } + + template + bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count) + { + // First, we need to make sure we have enough room to enqueue all of the elements; + // this means pre-allocating blocks and putting them in the block index (but only if + // all the allocations succeeded). + index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); + auto startBlock = this->tailBlock; + auto originalBlockIndexFront = pr_blockIndexFront; + auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed; + + Block* firstAllocatedBlock = nullptr; + + // Figure out how many blocks we'll need to allocate, and do so + size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast(BLOCK_SIZE - 1)) + - ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); + index_t currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + if (blockBaseDiff > 0) { + // Allocate as many blocks as possible from ahead + while (blockBaseDiff > 0 && this->tailBlock != nullptr && this->tailBlock->next != firstAllocatedBlock + && this->tailBlock->next->ConcurrentQueue::Block::template is_empty()) { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + this->tailBlock = this->tailBlock->next; + firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; + + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + } + + // Now allocate as many blocks as necessary from the block pool + while (blockBaseDiff > 0) { + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + bool full = !details::circular_less_than(head, + currentTailIndex + BLOCK_SIZE) + || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value + && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); + if (pr_blockIndexRaw == nullptr || pr_blockIndexSlotsUsed == pr_blockIndexSize || full) { + MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) { + // Failed to allocate, undo changes (but keep injected blocks) + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } else if (full || !new_block_index(originalBlockIndexSlotsUsed)) { + // Failed to allocate, undo changes (but keep injected blocks) + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + + // pr_blockIndexFront is updated inside new_block_index, so we need to + // update our fallback value too (since we keep the new index even if we + // later fail) + originalBlockIndexFront = originalBlockIndexSlotsUsed; + } + + // Insert a new block in the circular linked list + auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + return false; + } + +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template set_all_empty(); + if (this->tailBlock == nullptr) { + newBlock->next = newBlock; + } else { + newBlock->next = this->tailBlock->next; + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + firstAllocatedBlock = firstAllocatedBlock == nullptr ? this->tailBlock : firstAllocatedBlock; + + ++pr_blockIndexSlotsUsed; + + auto& entry = blockIndex.load(std::memory_order_relaxed)->entries[pr_blockIndexFront]; + entry.base = currentTailIndex; + entry.block = this->tailBlock; + pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1); + } + + // Excellent, all allocations succeeded. Reset each block's emptiness before we fill them up, and + // publish the new block index front + auto block = firstAllocatedBlock; + while (true) { + block->ConcurrentQueue::Block::template reset_empty(); + if (block == this->tailBlock) { + break; + } + block = block->next; + } + + MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), + new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), + std::memory_order_release); + } + } + + // Enqueue, one block at a time + index_t newTailIndex = startTailIndex + static_cast(count); + currentTailIndex = startTailIndex; + auto endBlock = this->tailBlock; + this->tailBlock = startBlock; + assert((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0); + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) { + this->tailBlock = firstAllocatedBlock; + } + while (true) { + index_t stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(newTailIndex, stopIndex)) { + stopIndex = newTailIndex; + } + MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), + new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); + } + } else { + MOODYCAMEL_TRY { + while (currentTailIndex != stopIndex) { + // Must use copy constructor even if move constructor is available + // because we may have to revert if there's an exception. + // Sorry about the horrible templated next line, but it was the only way + // to disable moving *at compile time*, which is important because a type + // may only define a (noexcept) move constructor, and so calls to the + // cctor will not compile, even if they are in an if branch that will never + // be executed + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if (nullptr)) T( + details:: + deref_noexcept( + itemFirst)))> + ::eval(*itemFirst)); + ++currentTailIndex; + ++itemFirst; + } + } + MOODYCAMEL_CATCH(...) { + // Oh dear, an exception's been thrown -- destroy the elements that + // were enqueued so far and revert the entire bulk operation (we'll keep + // any allocated blocks in our linked list for later, though). + auto constructedStopIndex = currentTailIndex; + auto lastBlockEnqueued = this->tailBlock; + + pr_blockIndexFront = originalBlockIndexFront; + pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed; + this->tailBlock = startBlock == nullptr ? firstAllocatedBlock : startBlock; + + if (!details::is_trivially_destructible::value) { + auto block = startBlock; + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + block = firstAllocatedBlock; + } + currentTailIndex = startTailIndex; + while (true) { + stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(constructedStopIndex, stopIndex)) { + stopIndex = constructedStopIndex; + } + while (currentTailIndex != stopIndex) { + (*block)[currentTailIndex++]->~T(); + } + if (block == lastBlockEnqueued) { + break; + } + block = block->next; + } + } + MOODYCAMEL_RETHROW; + } + } + + if (this->tailBlock == endBlock) { + assert(currentTailIndex == newTailIndex); + break; + } + this->tailBlock = this->tailBlock->next; + } + + MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), + new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + if (firstAllocatedBlock != nullptr) { + blockIndex.load(std::memory_order_relaxed)->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1), + std::memory_order_release); + } + } + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + size_t dequeue_bulk(It& itemFirst, size_t max) + { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + auto desiredCount = static_cast(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit)); + if (details::circular_less_than(0, desiredCount)) { + desiredCount = desiredCount < max ? desiredCount : max; + std::atomic_thread_fence(std::memory_order_acquire); + + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); + + tail = this->tailIndex.load(std::memory_order_acquire); + auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); + if (details::circular_less_than(0, actualCount)) { + actualCount = desiredCount < actualCount ? desiredCount : actualCount; + if (actualCount < desiredCount) { + this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); + } + + // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this + // will never exceed tail. + auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); + + // Determine which block the first element is in + auto localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto localBlockIndexHead = localBlockIndex->front.load(std::memory_order_acquire); + + auto headBase = localBlockIndex->entries[localBlockIndexHead].base; + auto firstBlockBaseIndex = firstIndex & ~static_cast(BLOCK_SIZE - 1); + auto offset = static_cast(static_cast::type>(firstBlockBaseIndex + - headBase) + / static_cast::type>(BLOCK_SIZE)); + auto indexIndex = (localBlockIndexHead + offset) & (localBlockIndex->size - 1); + + // Iterate the blocks and dequeue + auto index = firstIndex; + do{ + auto firstIndexInBlock = index; + index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), + endIndex) ? firstIndex + + static_cast(actualCount) : endIndex; + auto block = localBlockIndex->entries[indexIndex].block; + if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst++ = std::move(el); + el.~T(); + ++index; + } + } else { + MOODYCAMEL_TRY { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst = std::move(el); + ++itemFirst; + el.~T(); + ++index; + } + } + MOODYCAMEL_CATCH(...) { + // It's too late to revert the dequeue, but we can make sure that all + // the dequeued objects are properly destroyed and the block index + // (and empty count) are properly updated before we propagate the exception + do{ + block = localBlockIndex->entries[indexIndex].block; + while (index != endIndex) { + (*block)[index++]->~T(); + } + block->ConcurrentQueue::Block::template set_many_empty(firstIndexInBlock, + static_cast(endIndex + - + firstIndexInBlock)); + indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); + + firstIndexInBlock = index; + endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), + endIndex) ? firstIndex + + static_cast(actualCount) : endIndex; + } while (index != firstIndex + actualCount); + + MOODYCAMEL_RETHROW; + } + } + block->ConcurrentQueue::Block::template set_many_empty(firstIndexInBlock, + static_cast(endIndex + - firstIndexInBlock)); + indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1); + } while (index != firstIndex + actualCount); + + return actualCount; + } else { + // Wasn't anything to dequeue after all; make the effective dequeue count eventually consistent + this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); + } + } + + return 0; + } + + private: + struct BlockIndexEntry + { + index_t base; + Block* block; + }; + + struct BlockIndexHeader + { + size_t size; + std::atomic front; // Current slot (not next, like pr_blockIndexFront) + BlockIndexEntry* entries; + void* prev; + }; + + bool new_block_index(size_t numberOfFilledSlotsToExpose) + { + auto prevBlockSizeMask = pr_blockIndexSize - 1; + + // Create the new block + pr_blockIndexSize <<= 1; + auto newRawPtr = static_cast((Traits::malloc)(sizeof(BlockIndexHeader) + std::alignment_of::value - 1 + + sizeof(BlockIndexEntry) * pr_blockIndexSize)); + if (newRawPtr == nullptr) { + pr_blockIndexSize >>= 1; // Reset to allow graceful retry + return false; + } + + auto newBlockIndexEntries = reinterpret_cast(details::align_for(newRawPtr + + sizeof(BlockIndexHeader))); + + // Copy in all the old indices, if any + size_t j = 0; + if (pr_blockIndexSlotsUsed != 0) { + auto i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask; + do{ + newBlockIndexEntries[j++] = pr_blockIndexEntries[i]; + i = (i + 1) & prevBlockSizeMask; + } while (i != pr_blockIndexFront); + } + + // Update everything + auto header = new (newRawPtr) BlockIndexHeader; + header->size = pr_blockIndexSize; + header->front.store(numberOfFilledSlotsToExpose - 1, std::memory_order_relaxed); + header->entries = newBlockIndexEntries; + header->prev = pr_blockIndexRaw; // we link the new block to the old one so we can free it later + + pr_blockIndexFront = j; + pr_blockIndexEntries = newBlockIndexEntries; + pr_blockIndexRaw = newRawPtr; + blockIndex.store(header, std::memory_order_release); + + return true; + } + + private: + std::atomic blockIndex; + + // To be used by producer only -- consumer must use the ones in referenced by blockIndex + size_t pr_blockIndexSlotsUsed; + size_t pr_blockIndexSize; + size_t pr_blockIndexFront; // Next slot (not current) + BlockIndexEntry* pr_blockIndexEntries; + void* pr_blockIndexRaw; + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + public: + ExplicitProducer* nextExplicitProducer; + private: +#endif + +#ifdef MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + ////////////////////////////////// + // Implicit queue + ////////////////////////////////// + + struct ImplicitProducer : public ProducerBase + { + ImplicitProducer(ConcurrentQueue* parent_) + : ProducerBase(parent_, false), + nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE), + blockIndex(nullptr) + { + new_block_index(); + } + + ~ImplicitProducer() + { + // Note that since we're in the destructor we can assume that all enqueue/dequeue operations + // completed already; this means that all undequeued elements are placed contiguously across + // contiguous blocks, and that only the first and last remaining blocks can be only partially + // empty (all other remaining blocks must be completely full). + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + // Unregister ourselves for thread termination notification + if (!this->inactive.load(std::memory_order_relaxed)) { + details::ThreadExitNotifier::unsubscribe(&threadExitListener); + } +#endif + + // Destroy all remaining elements! + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto index = this->headIndex.load(std::memory_order_relaxed); + Block* block = nullptr; + assert(index == tail || details::circular_less_than(index, tail)); + bool forceFreeLastBlock = index != tail; // If we enter the loop, then the last (tail) block will not be freed + while (index != tail) { + if ((index & static_cast(BLOCK_SIZE - 1)) == 0 || block == nullptr) { + if (block != nullptr) { + // Free the old block + this->parent->add_block_to_free_list(block); + } + + block = get_block_index_entry_for_index(index)->value.load(std::memory_order_relaxed); + } + + ((*block)[index])->~T(); + ++index; + } + // Even if the queue is empty, there's still one block that's not on the free list + // (unless the head index reached the end of it, in which case the tail will be poised + // to create a new block). + if (this->tailBlock != nullptr && (forceFreeLastBlock || (tail & static_cast(BLOCK_SIZE - 1)) != 0)) { + this->parent->add_block_to_free_list(this->tailBlock); + } + + // Destroy block index + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); + if (localBlockIndex != nullptr) { + for (size_t i = 0; i != localBlockIndex->capacity; ++i) { + localBlockIndex->index[i]->~BlockIndexEntry(); + } + do{ + auto prev = localBlockIndex->prev; + localBlockIndex->~BlockIndexHeader(); + (Traits::free)(localBlockIndex); + localBlockIndex = prev; + } while (localBlockIndex != nullptr); + } + } + + template + inline bool enqueue(U&& element) + { + index_t currentTailIndex = this->tailIndex.load(std::memory_order_relaxed); + index_t newTailIndex = 1 + currentTailIndex; + if ((currentTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + // We reached the end of a block, start a new one + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + if (!details::circular_less_than(head, + currentTailIndex + BLOCK_SIZE) || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) { + return false; + } +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Find out where we'll be inserting this block in the block index + BlockIndexEntry* idxEntry; + if (!insert_block_index_entry(idxEntry, currentTailIndex)) { + return false; + } + + // Get ahold of a new block + auto newBlock = this->parent->ConcurrentQueue::template requisition_block(); + if (newBlock == nullptr) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + return false; + } +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty(); + + MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { + // May throw, try to insert now before we publish the fact that we have this new block + MOODYCAMEL_TRY { + new ((*newBlock)[currentTailIndex]) T(std::forward(element)); + } + MOODYCAMEL_CATCH(...) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + this->parent->add_block_to_free_list(newBlock); + MOODYCAMEL_RETHROW; + } + } + + // Insert the new block into the index + idxEntry->value.store(newBlock, std::memory_order_relaxed); + + this->tailBlock = newBlock; + + MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(T, U, new (static_cast(nullptr)) T(std::forward(element)))) { + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + } + + // Enqueue + new ((*this->tailBlock)[currentTailIndex]) T(std::forward(element)); + + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + + template + bool dequeue(U& element) + { + // See ExplicitProducer::dequeue for rationale and explanation + index_t tail = this->tailIndex.load(std::memory_order_relaxed); + index_t overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + if (details::circular_less_than(this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit, tail)) { + std::atomic_thread_fence(std::memory_order_acquire); + + index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(1, std::memory_order_relaxed); + tail = this->tailIndex.load(std::memory_order_acquire); + if ((details::likely)(details::circular_less_than(myDequeueCount - overcommit, tail))) { + index_t index = this->headIndex.fetch_add(1, std::memory_order_acq_rel); + + // Determine which block the element is in + auto entry = get_block_index_entry_for_index(index); + + // Dequeue + auto block = entry->value.load(std::memory_order_relaxed); + auto& el = *((*block)[index]); + + if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, element = std::move(el))) { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + // Note: Acquiring the mutex with every dequeue instead of only when a block + // is released is very sub-optimal, but it is, after all, purely debug code. + debug::DebugLock lock(producer->mutex); +#endif + struct Guard { + Block* block; + index_t index; + BlockIndexEntry* entry; + ConcurrentQueue* parent; + + ~Guard() + { + (*block)[index]->~T(); + if (block->ConcurrentQueue::Block::template set_empty(index)) { + entry->value.store(nullptr, std::memory_order_relaxed); + parent->add_block_to_free_list(block); + } + } + } guard = { block, index, entry, this->parent }; + + element = std::move(el); // NOLINT + } else { + element = std::move(el); // NOLINT + el.~T(); // NOLINT + + if (block->ConcurrentQueue::Block::template set_empty(index)) { + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Add the block back into the global free pool (and remove from block index) + entry->value.store(nullptr, std::memory_order_relaxed); + } + this->parent->add_block_to_free_list(block); // releases the above store + } + } + + return true; + } else { + this->dequeueOvercommit.fetch_add(1, std::memory_order_release); + } + } + + return false; + } + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4706) // assignment within conditional expression +#endif + template + bool enqueue_bulk(It itemFirst, size_t count) + { + // First, we need to make sure we have enough room to enqueue all of the elements; + // this means pre-allocating blocks and putting them in the block index (but only if + // all the allocations succeeded). + + // Note that the tailBlock we start off with may not be owned by us any more; + // this happens if it was filled up exactly to the top (setting tailIndex to + // the first index of the next block which is not yet allocated), then dequeued + // completely (putting it on the free list) before we enqueue again. + + index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed); + auto startBlock = this->tailBlock; + Block* firstAllocatedBlock = nullptr; + auto endBlock = this->tailBlock; + + // Figure out how many blocks we'll need to allocate, and do so + size_t blockBaseDiff = ((startTailIndex + count - 1) & ~static_cast(BLOCK_SIZE - 1)) + - ((startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1)); + index_t currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + if (blockBaseDiff > 0) { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + do{ + blockBaseDiff -= static_cast(BLOCK_SIZE); + currentTailIndex += static_cast(BLOCK_SIZE); + + // Find out where we'll be inserting this block in the block index + BlockIndexEntry* idxEntry = nullptr; // initialization here unnecessary but compiler can't always tell + Block* newBlock; + bool indexInserted = false; + auto head = this->headIndex.load(std::memory_order_relaxed); + assert(!details::circular_less_than(currentTailIndex, head)); + bool full = !details::circular_less_than(head, + currentTailIndex + BLOCK_SIZE) + || (MAX_SUBQUEUE_SIZE != details::const_numeric_max::value + && (MAX_SUBQUEUE_SIZE == 0 || MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head)); + + if (full || !(indexInserted = insert_block_index_entry(idxEntry, + currentTailIndex)) + || (newBlock = this->parent->ConcurrentQueue::template requisition_block()) == nullptr) { + // Index allocation or block allocation failed; revert any other allocations + // and index insertions done so far for this operation + if (indexInserted) { + rewind_block_index_tail(); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + } + currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) { + currentTailIndex += static_cast(BLOCK_SIZE); + idxEntry = get_block_index_entry_for_index(currentTailIndex); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + rewind_block_index_tail(); + } + this->parent->add_blocks_to_free_list(firstAllocatedBlock); + this->tailBlock = startBlock; + + return false; + } + +#ifdef MCDBGQ_TRACKMEM + newBlock->owner = this; +#endif + newBlock->ConcurrentQueue::Block::template reset_empty(); + newBlock->next = nullptr; + + // Insert the new block into the index + idxEntry->value.store(newBlock, std::memory_order_relaxed); + + // Store the chain of blocks so that we can undo if later allocations fail, + // and so that we can find the blocks when we do the actual enqueueing + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr) { + assert(this->tailBlock != nullptr); + this->tailBlock->next = newBlock; + } + this->tailBlock = newBlock; + endBlock = newBlock; + firstAllocatedBlock = firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock; + } while (blockBaseDiff > 0); + } + + // Enqueue, one block at a time + index_t newTailIndex = startTailIndex + static_cast(count); + currentTailIndex = startTailIndex; + this->tailBlock = startBlock; + assert((startTailIndex & static_cast(BLOCK_SIZE - 1)) != 0 || firstAllocatedBlock != nullptr || count == 0); + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0 && firstAllocatedBlock != nullptr) { + this->tailBlock = firstAllocatedBlock; + } + while (true) { + index_t stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(newTailIndex, stopIndex)) { + stopIndex = newTailIndex; + } + MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(T, decltype(*itemFirst), + new (static_cast(nullptr)) T(details::deref_noexcept(itemFirst)))) { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++); + } + } else { + MOODYCAMEL_TRY { + while (currentTailIndex != stopIndex) { + new ((*this->tailBlock)[currentTailIndex]) T(details::nomove_if (nullptr)) T( + details:: + deref_noexcept( + itemFirst)))> + ::eval(*itemFirst)); + ++currentTailIndex; + ++itemFirst; + } + } + MOODYCAMEL_CATCH(...) { + auto constructedStopIndex = currentTailIndex; + auto lastBlockEnqueued = this->tailBlock; + + if (!details::is_trivially_destructible::value) { + auto block = startBlock; + if ((startTailIndex & static_cast(BLOCK_SIZE - 1)) == 0) { + block = firstAllocatedBlock; + } + currentTailIndex = startTailIndex; + while (true) { + stopIndex = (currentTailIndex & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + if (details::circular_less_than(constructedStopIndex, stopIndex)) { + stopIndex = constructedStopIndex; + } + while (currentTailIndex != stopIndex) { + (*block)[currentTailIndex++]->~T(); + } + if (block == lastBlockEnqueued) { + break; + } + block = block->next; + } + } + + currentTailIndex = (startTailIndex - 1) & ~static_cast(BLOCK_SIZE - 1); + for (auto block = firstAllocatedBlock; block != nullptr; block = block->next) { + currentTailIndex += static_cast(BLOCK_SIZE); + auto idxEntry = get_block_index_entry_for_index(currentTailIndex); + idxEntry->value.store(nullptr, std::memory_order_relaxed); + rewind_block_index_tail(); + } + this->parent->add_blocks_to_free_list(firstAllocatedBlock); + this->tailBlock = startBlock; + MOODYCAMEL_RETHROW; + } + } + + if (this->tailBlock == endBlock) { + assert(currentTailIndex == newTailIndex); + break; + } + this->tailBlock = this->tailBlock->next; + } + this->tailIndex.store(newTailIndex, std::memory_order_release); + return true; + } + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + template + size_t dequeue_bulk(It& itemFirst, size_t max) + { + auto tail = this->tailIndex.load(std::memory_order_relaxed); + auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed); + auto desiredCount = static_cast(tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) - overcommit)); + if (details::circular_less_than(0, desiredCount)) { + desiredCount = desiredCount < max ? desiredCount : max; + std::atomic_thread_fence(std::memory_order_acquire); + + auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(desiredCount, std::memory_order_relaxed); + + tail = this->tailIndex.load(std::memory_order_acquire); + auto actualCount = static_cast(tail - (myDequeueCount - overcommit)); + if (details::circular_less_than(0, actualCount)) { + actualCount = desiredCount < actualCount ? desiredCount : actualCount; + if (actualCount < desiredCount) { + this->dequeueOvercommit.fetch_add(desiredCount - actualCount, std::memory_order_release); + } + + // Get the first index. Note that since there's guaranteed to be at least actualCount elements, this + // will never exceed tail. + auto firstIndex = this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel); + + // Iterate the blocks and dequeue + auto index = firstIndex; + BlockIndexHeader* localBlockIndex; + auto indexIndex = get_block_index_index_for_index(index, localBlockIndex); + do{ + auto blockStartIndex = index; + index_t endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), + endIndex) ? firstIndex + + static_cast(actualCount) : endIndex; + + auto entry = localBlockIndex->index[indexIndex]; + auto block = entry->value.load(std::memory_order_relaxed); + if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, details::deref_noexcept(itemFirst) = std::move((*(*block)[index])))) { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst++ = std::move(el); + el.~T(); + ++index; + } + } else { + MOODYCAMEL_TRY { + while (index != endIndex) { + auto& el = *((*block)[index]); + *itemFirst = std::move(el); + ++itemFirst; + el.~T(); + ++index; + } + } + MOODYCAMEL_CATCH(...) { + do{ + entry = localBlockIndex->index[indexIndex]; + block = entry->value.load(std::memory_order_relaxed); + while (index != endIndex) { + (*block)[index++]->~T(); + } + + if (block->ConcurrentQueue::Block::template set_many_empty(blockStartIndex, + static_cast( + endIndex + - blockStartIndex))) { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + entry->value.store(nullptr, std::memory_order_relaxed); + this->parent->add_block_to_free_list(block); + } + indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); + + blockStartIndex = index; + endIndex = (index & ~static_cast(BLOCK_SIZE - 1)) + static_cast(BLOCK_SIZE); + endIndex = details::circular_less_than(firstIndex + static_cast(actualCount), + endIndex) ? firstIndex + + static_cast(actualCount) : endIndex; + } while (index != firstIndex + actualCount); + + MOODYCAMEL_RETHROW; + } + } + if (block->ConcurrentQueue::Block::template set_many_empty(blockStartIndex, + static_cast(endIndex + - blockStartIndex))) + { + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + // Note that the set_many_empty above did a release, meaning that anybody who acquires the block + // we're about to free can use it safely since our writes (and reads!) will have happened-before then. + entry->value.store(nullptr, std::memory_order_relaxed); + } + this->parent->add_block_to_free_list(block); // releases the above store + } + indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1); + } while (index != firstIndex + actualCount); + + return actualCount; + } else { + this->dequeueOvercommit.fetch_add(desiredCount, std::memory_order_release); + } + } + + return 0; + } + + private: + // The block size must be > 1, so any number with the low bit set is an invalid block base index + static const index_t INVALID_BLOCK_BASE = 1; + + struct BlockIndexEntry + { + std::atomic key; + std::atomic value; + }; + + struct BlockIndexHeader + { + size_t capacity; + std::atomic tail; + BlockIndexEntry* entries; + BlockIndexEntry** index; + BlockIndexHeader* prev; + }; + + template + inline bool insert_block_index_entry(BlockIndexEntry*& idxEntry, index_t blockStartIndex) + { + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); // We're the only writer thread, relaxed is OK + if (localBlockIndex == nullptr) { + return false; // this can happen if new_block_index failed in the constructor + } + size_t newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); + idxEntry = localBlockIndex->index[newTail]; + if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE + || idxEntry->value.load(std::memory_order_relaxed) == nullptr) { + idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); + localBlockIndex->tail.store(newTail, std::memory_order_release); + return true; + } + + // No room in the old block index, try to allocate another one! + MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) { + return false; + } else if (!new_block_index()) { + return false; + } else { + localBlockIndex = blockIndex.load(std::memory_order_relaxed); + newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) & (localBlockIndex->capacity - 1); + idxEntry = localBlockIndex->index[newTail]; + assert(idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE); + idxEntry->key.store(blockStartIndex, std::memory_order_relaxed); + localBlockIndex->tail.store(newTail, std::memory_order_release); + return true; + } + } + + inline void rewind_block_index_tail() + { + auto localBlockIndex = blockIndex.load(std::memory_order_relaxed); + localBlockIndex->tail.store((localBlockIndex->tail.load(std::memory_order_relaxed) - 1) & (localBlockIndex->capacity - 1), + std::memory_order_relaxed); + } + + inline BlockIndexEntry* get_block_index_entry_for_index(index_t index) const + { + BlockIndexHeader* localBlockIndex; + auto idx = get_block_index_index_for_index(index, localBlockIndex); + return localBlockIndex->index[idx]; + } + + inline size_t get_block_index_index_for_index(index_t index, BlockIndexHeader*& localBlockIndex) const + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + debug::DebugLock lock(mutex); +#endif + index &= ~static_cast(BLOCK_SIZE - 1); + localBlockIndex = blockIndex.load(std::memory_order_acquire); + auto tail = localBlockIndex->tail.load(std::memory_order_acquire); + auto tailBase = localBlockIndex->index[tail]->key.load(std::memory_order_relaxed); + assert(tailBase != INVALID_BLOCK_BASE); + // Note: Must use division instead of shift because the index may wrap around, causing a negative + // offset, whose negativity we want to preserve + auto offset = static_cast(static_cast::type>(index - tailBase) + / static_cast::type>(BLOCK_SIZE)); + size_t idx = (tail + offset) & (localBlockIndex->capacity - 1); + assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) == index + && localBlockIndex->index[idx]->value.load(std::memory_order_relaxed) != nullptr); + return idx; + } + + bool new_block_index() + { + auto prev = blockIndex.load(std::memory_order_relaxed); + size_t prevCapacity = prev == nullptr ? 0 : prev->capacity; + auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity; + auto raw = static_cast((Traits::malloc)( + sizeof(BlockIndexHeader) + + std::alignment_of::value - 1 + sizeof(BlockIndexEntry) * entryCount + + std::alignment_of::value - 1 + sizeof(BlockIndexEntry*) + * nextBlockIndexCapacity)); + if (raw == nullptr) { + return false; + } + + auto header = new (raw) BlockIndexHeader; + auto entries = reinterpret_cast(details::align_for(raw + sizeof(BlockIndexHeader))); + auto index = reinterpret_cast(details::align_for(reinterpret_cast(entries) + + sizeof(BlockIndexEntry) * entryCount)); + if (prev != nullptr) { + auto prevTail = prev->tail.load(std::memory_order_relaxed); + auto prevPos = prevTail; + size_t i = 0; + do{ + prevPos = (prevPos + 1) & (prev->capacity - 1); + index[i++] = prev->index[prevPos]; + } while (prevPos != prevTail); + assert(i == prevCapacity); + } + for (size_t i = 0; i != entryCount; ++i) { + new (entries + i) BlockIndexEntry; + entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed); + index[prevCapacity + i] = entries + i; + } + header->prev = prev; + header->entries = entries; + header->index = index; + header->capacity = nextBlockIndexCapacity; + header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1), std::memory_order_relaxed); + + blockIndex.store(header, std::memory_order_release); + + nextBlockIndexCapacity <<= 1; + + return true; + } + + private: + size_t nextBlockIndexCapacity; + std::atomic blockIndex; + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + public: + details::ThreadExitListener threadExitListener; + private: +#endif + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + public: + ImplicitProducer* nextImplicitProducer; + private: +#endif + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX + mutable debug::DebugMutex mutex; +#endif +#ifdef MCDBGQ_TRACKMEM + friend struct MemStats; +#endif + }; + + ////////////////////////////////// + // Block pool manipulation + ////////////////////////////////// + + void populate_initial_block_list(size_t blockCount) + { + initialBlockPoolSize = blockCount; + if (initialBlockPoolSize == 0) { + initialBlockPool = nullptr; + return; + } + + initialBlockPool = create_array(blockCount); + if (initialBlockPool == nullptr) { + initialBlockPoolSize = 0; + } + for (size_t i = 0; i < initialBlockPoolSize; ++i) { + initialBlockPool[i].dynamicallyAllocated = false; + } + } + + inline Block* try_get_block_from_initial_pool() + { + if (initialBlockPoolIndex.load(std::memory_order_relaxed) >= initialBlockPoolSize) { + return nullptr; + } + + auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed); + + return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr; + } + + inline void add_block_to_free_list(Block* block) + { +#ifdef MCDBGQ_TRACKMEM + block->owner = nullptr; +#endif + if (!Traits::RECYCLE_ALLOCATED_BLOCKS && block->dynamicallyAllocated) { + destroy(block); + } else { + freeList.add(block); + } + } + + inline void add_blocks_to_free_list(Block* block) + { + while (block != nullptr) { + auto next = block->next; + add_block_to_free_list(block); + block = next; + } + } + + inline Block* try_get_block_from_free_list() + { + return freeList.try_get(); + } + + // Gets a free block from one of the memory pools, or allocates a new one (if applicable) + template + Block* requisition_block() + { + auto block = try_get_block_from_initial_pool(); + if (block != nullptr) { + return block; + } + + block = try_get_block_from_free_list(); + if (block != nullptr) { + return block; + } + + MOODYCAMEL_CONSTEXPR_IF(canAlloc == CanAlloc) { + return create(); + } else { + return nullptr; + } + } + +#ifdef MCDBGQ_TRACKMEM +public: + struct MemStats { + size_t allocatedBlocks; + size_t usedBlocks; + size_t freeBlocks; + size_t ownedBlocksExplicit; + size_t ownedBlocksImplicit; + size_t implicitProducers; + size_t explicitProducers; + size_t elementsEnqueued; + size_t blockClassBytes; + size_t queueClassBytes; + size_t implicitBlockIndexBytes; + size_t explicitBlockIndexBytes; + + friend class ConcurrentQueue; + + private: + static MemStats getFor(ConcurrentQueue* q) + { + MemStats stats = { 0 }; + + stats.elementsEnqueued = q->size_approx(); + + auto block = q->freeList.head_unsafe(); + while (block != nullptr) { + ++stats.allocatedBlocks; + ++stats.freeBlocks; + block = block->freeListNext.load(std::memory_order_relaxed); + } + + for (auto ptr = q->producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + bool implicit = dynamic_cast(ptr) != nullptr; + stats.implicitProducers += implicit ? 1 : 0; + stats.explicitProducers += implicit ? 0 : 1; + + if (implicit) { + auto prod = static_cast(ptr); + stats.queueClassBytes += sizeof(ImplicitProducer); + auto head = prod->headIndex.load(std::memory_order_relaxed); + auto tail = prod->tailIndex.load(std::memory_order_relaxed); + auto hash = prod->blockIndex.load(std::memory_order_relaxed); + if (hash != nullptr) { + for (size_t i = 0; i != hash->capacity; ++i) { + if (hash->index[i]->key.load(std::memory_order_relaxed) != ImplicitProducer::INVALID_BLOCK_BASE + && hash->index[i]->value.load(std::memory_order_relaxed) != nullptr) { + ++stats.allocatedBlocks; + ++stats.ownedBlocksImplicit; + } + } + stats.implicitBlockIndexBytes += hash->capacity * sizeof(typename ImplicitProducer::BlockIndexEntry); + for (; hash != nullptr; hash = hash->prev) { + stats.implicitBlockIndexBytes += sizeof(typename ImplicitProducer::BlockIndexHeader) + hash->capacity + * sizeof(typename ImplicitProducer::BlockIndexEntry*); + } + } + for (; details::circular_less_than(head, tail); head += BLOCK_SIZE) { + //auto block = prod->get_block_index_entry_for_index(head); + ++stats.usedBlocks; + } + } else { + auto prod = static_cast(ptr); + stats.queueClassBytes += sizeof(ExplicitProducer); + auto tailBlock = prod->tailBlock; + bool wasNonEmpty = false; + if (tailBlock != nullptr) { + auto block = tailBlock; + do{ + ++stats.allocatedBlocks; + if (!block->ConcurrentQueue::Block::template is_empty() || wasNonEmpty) { + ++stats.usedBlocks; + wasNonEmpty = wasNonEmpty || block != tailBlock; + } + ++stats.ownedBlocksExplicit; + block = block->next; + } while (block != tailBlock); + } + auto index = prod->blockIndex.load(std::memory_order_relaxed); + while (index != nullptr) { + stats.explicitBlockIndexBytes += sizeof(typename ExplicitProducer::BlockIndexHeader) + index->size + * sizeof(typename ExplicitProducer::BlockIndexEntry); + index = static_cast(index->prev); + } + } + } + + auto freeOnInitialPool = q->initialBlockPoolIndex.load(std::memory_order_relaxed) + >= q->initialBlockPoolSize ? 0 : q->initialBlockPoolSize + - q->initialBlockPoolIndex.load(std::memory_order_relaxed); + stats.allocatedBlocks += freeOnInitialPool; + stats.freeBlocks += freeOnInitialPool; + + stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks; + stats.queueClassBytes += sizeof(ConcurrentQueue); + + return stats; + } + }; + + // For debugging only. Not thread-safe. + MemStats getMemStats() + { + return MemStats::getFor(this); + } + +private: + friend struct MemStats; +#endif + + ////////////////////////////////// + // Producer list manipulation + ////////////////////////////////// + + ProducerBase* recycle_or_create_producer(bool isExplicit) + { +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + // Try to re-use one first + for (auto ptr = producerListTail.load(std::memory_order_acquire); ptr != nullptr; ptr = ptr->next_prod()) { + if (ptr->inactive.load(std::memory_order_relaxed) && ptr->isExplicit == isExplicit) { + bool expected = true; + if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false, std::memory_order_acquire, + std::memory_order_relaxed)) { + // We caught one! It's been marked as activated, the caller can have it + return ptr; + } + } + } + + return add_producer(isExplicit ? static_cast(create(this)) : create(this)); + } + + ProducerBase* add_producer(ProducerBase* producer) + { + // Handle failed memory allocation + if (producer == nullptr) { + return nullptr; + } + + producerCount.fetch_add(1, std::memory_order_relaxed); + + // Add it to the lock-free list + auto prevTail = producerListTail.load(std::memory_order_relaxed); + do{ + producer->next = prevTail; + } while (!producerListTail.compare_exchange_weak(prevTail, producer, std::memory_order_release, std::memory_order_relaxed)); + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + if (producer->isExplicit) { + auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed); + do{ + static_cast(producer)->nextExplicitProducer = prevTailExplicit; + } while (!explicitProducers.compare_exchange_weak(prevTailExplicit, static_cast(producer), + std::memory_order_release, std::memory_order_relaxed)); + } else { + auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed); + do{ + static_cast(producer)->nextImplicitProducer = prevTailImplicit; + } while (!implicitProducers.compare_exchange_weak(prevTailImplicit, static_cast(producer), + std::memory_order_release, std::memory_order_relaxed)); + } +#endif + + return producer; + } + + void reown_producers() + { + // After another instance is moved-into/swapped-with this one, all the + // producers we stole still think their parents are the other queue. + // So fix them up! + for (auto ptr = producerListTail.load(std::memory_order_relaxed); ptr != nullptr; ptr = ptr->next_prod()) { + ptr->parent = this; + } + } + + ////////////////////////////////// + // Implicit producer hash + ////////////////////////////////// + + struct ImplicitProducerKVP + { + std::atomic key; + ImplicitProducer* value; // No need for atomicity since it's only read by the thread that sets it in the first place + + ImplicitProducerKVP() + : value(nullptr) { } + + ImplicitProducerKVP(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT + { + key.store(other.key.load(std::memory_order_relaxed), std::memory_order_relaxed); + value = other.value; + } + + inline ImplicitProducerKVP& operator=(ImplicitProducerKVP&& other) MOODYCAMEL_NOEXCEPT + { + swap(other); + return *this; + } + + inline void swap(ImplicitProducerKVP& other) MOODYCAMEL_NOEXCEPT + { + if (this != &other) { + details::swap_relaxed(key, other.key); + std::swap(value, other.value); + } + } + }; + + template + friend void moodycamel::swap(typename ConcurrentQueue::ImplicitProducerKVP&, + typename ConcurrentQueue::ImplicitProducerKVP&) MOODYCAMEL_NOEXCEPT; + + struct ImplicitProducerHash + { + size_t capacity; + ImplicitProducerKVP* entries; + ImplicitProducerHash* prev; + }; + + inline void populate_initial_implicit_producer_hash() + { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { + return; + } else { + implicitProducerHashCount.store(0, std::memory_order_relaxed); + auto hash = &initialImplicitProducerHash; + hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; + hash->entries = &initialImplicitProducerHashEntries[0]; + for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) { + initialImplicitProducerHashEntries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); + } + hash->prev = nullptr; + implicitProducerHash.store(hash, std::memory_order_relaxed); + } + } + + void swap_implicit_producer_hashes(ConcurrentQueue& other) + { + MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) { + return; + } else { + // Swap (assumes our implicit producer hash is initialized) + initialImplicitProducerHashEntries.swap(other.initialImplicitProducerHashEntries); + initialImplicitProducerHash.entries = &initialImplicitProducerHashEntries[0]; + other.initialImplicitProducerHash.entries = &other.initialImplicitProducerHashEntries[0]; + + details::swap_relaxed(implicitProducerHashCount, other.implicitProducerHashCount); + + details::swap_relaxed(implicitProducerHash, other.implicitProducerHash); + if (implicitProducerHash.load(std::memory_order_relaxed) == &other.initialImplicitProducerHash) { + implicitProducerHash.store(&initialImplicitProducerHash, std::memory_order_relaxed); + } else { + ImplicitProducerHash* hash; + for (hash = implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &other.initialImplicitProducerHash; + hash = hash->prev) { + continue; + } + hash->prev = &initialImplicitProducerHash; + } + if (other.implicitProducerHash.load(std::memory_order_relaxed) == &initialImplicitProducerHash) { + other.implicitProducerHash.store(&other.initialImplicitProducerHash, std::memory_order_relaxed); + } else { + ImplicitProducerHash* hash; + for (hash = other.implicitProducerHash.load(std::memory_order_relaxed); hash->prev != &initialImplicitProducerHash; + hash = hash->prev) { + continue; + } + hash->prev = &other.initialImplicitProducerHash; + } + } + } + + // Only fails (returns nullptr) if memory allocation fails + ImplicitProducer* get_or_add_implicit_producer() + { + // Note that since the data is essentially thread-local (key is thread ID), + // there's a reduced need for fences (memory ordering is already consistent + // for any individual thread), except for the current table itself. + + // Start by looking for the thread ID in the current and all previous hash tables. + // If it's not found, it must not be in there yet, since this same thread would + // have added it previously to one of the tables that we traversed. + + // Code and algorithm adapted from http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + + auto id = details::thread_id(); + auto hashedId = details::hash_thread_id(id); + + auto mainHash = implicitProducerHash.load(std::memory_order_acquire); + assert(mainHash != nullptr); // silence clang-tidy and MSVC warnings (hash cannot be null) + for (auto hash = mainHash; hash != nullptr; hash = hash->prev) { + // Look for the id in this hash + auto index = hashedId; + while (true) { // Not an infinite loop because at least one slot is free in the hash table + index &= hash->capacity - 1u; + + auto probedKey = hash->entries[index].key.load(std::memory_order_relaxed); + if (probedKey == id) { + // Found it! If we had to search several hashes deep, though, we should lazily add it + // to the current main hash table to avoid the extended search next time. + // Note there's guaranteed to be room in the current hash table since every subsequent + // table implicitly reserves space for all previous tables (there's only one + // implicitProducerHashCount). + auto value = hash->entries[index].value; + if (hash != mainHash) { + index = hashedId; + while (true) { + index &= mainHash->capacity - 1u; + auto empty = details::invalid_thread_id; +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + auto reusable = details::invalid_thread_id2; + if (mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_seq_cst, + std::memory_order_relaxed) + || mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, + std::memory_order_relaxed)) { +#else + if (mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_seq_cst, + std::memory_order_relaxed)) { +#endif + mainHash->entries[index].value = value; + break; + } + ++index; + } + } + + return value; + } + if (probedKey == details::invalid_thread_id) { + break; // Not in this hash table + } + ++index; + } + } + + // Insert! + auto newCount = 1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed); + while (true) { + // NOLINTNEXTLINE(clang-analyzer-core.NullDereference) + if (newCount >= (mainHash->capacity >> 1) && !implicitProducerHashResizeInProgress.test_and_set(std::memory_order_acquire)) { + // We've acquired the resize lock, try to allocate a bigger hash table. + // Note the acquire fence synchronizes with the release fence at the end of this block, and hence when + // we reload implicitProducerHash it must be the most recent version (it only gets changed within this + // locked block). + mainHash = implicitProducerHash.load(std::memory_order_acquire); + if (newCount >= (mainHash->capacity >> 1)) { + size_t newCapacity = mainHash->capacity << 1; + while (newCount >= (newCapacity >> 1)) { + newCapacity <<= 1; + } + auto raw = static_cast((Traits::malloc)(sizeof(ImplicitProducerHash) + + std::alignment_of::value - 1 + + sizeof(ImplicitProducerKVP) * newCapacity)); + if (raw == nullptr) { + // Allocation failed + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); + implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed); + return nullptr; + } + + auto newHash = new (raw) ImplicitProducerHash; + newHash->capacity = static_cast(newCapacity); + newHash->entries = reinterpret_cast(details::align_for(raw + + sizeof( + ImplicitProducerHash))); + for (size_t i = 0; i != newCapacity; ++i) { + new (newHash->entries + i) ImplicitProducerKVP; + newHash->entries[i].key.store(details::invalid_thread_id, std::memory_order_relaxed); + } + newHash->prev = mainHash; + implicitProducerHash.store(newHash, std::memory_order_release); + implicitProducerHashResizeInProgress.clear(std::memory_order_release); + mainHash = newHash; + } else { + implicitProducerHashResizeInProgress.clear(std::memory_order_release); + } + } + + // If it's < three-quarters full, add to the old one anyway so that we don't have to wait for the next table + // to finish being allocated by another thread (and if we just finished allocating above, the condition will + // always be true) + if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) { + auto producer = static_cast(recycle_or_create_producer(false)); + if (producer == nullptr) { + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); + return nullptr; + } + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + producer->threadExitListener.callback = &ConcurrentQueue::implicit_producer_thread_exited_callback; + producer->threadExitListener.userData = producer; + details::ThreadExitNotifier::subscribe(&producer->threadExitListener); +#endif + + auto index = hashedId; + while (true) { + index &= mainHash->capacity - 1u; + auto empty = details::invalid_thread_id; +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + auto reusable = details::invalid_thread_id2; + if (mainHash->entries[index].key.compare_exchange_strong(reusable, id, std::memory_order_seq_cst, + std::memory_order_relaxed)) { + implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed); // already counted as a used slot + mainHash->entries[index].value = producer; + break; + } +#endif + if (mainHash->entries[index].key.compare_exchange_strong(empty, id, std::memory_order_seq_cst, + std::memory_order_relaxed)) { + mainHash->entries[index].value = producer; + break; + } + ++index; + } + return producer; + } + + // Hmm, the old hash is quite full and somebody else is busy allocating a new one. + // We need to wait for the allocating thread to finish (if it succeeds, we add, if not, + // we try to allocate ourselves). + mainHash = implicitProducerHash.load(std::memory_order_acquire); + } + } + +#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED + void implicit_producer_thread_exited(ImplicitProducer* producer) + { + // Remove from hash +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugLock lock(implicitProdMutex); +#endif + auto hash = implicitProducerHash.load(std::memory_order_acquire); + assert(hash != nullptr); // The thread exit listener is only registered if we were added to a hash in the first place + auto id = details::thread_id(); + auto hashedId = details::hash_thread_id(id); + details::thread_id_t probedKey; + + // We need to traverse all the hashes just in case other threads aren't on the current one yet and are + // trying to add an entry thinking there's a free slot (because they reused a producer) + for (; hash != nullptr; hash = hash->prev) { + auto index = hashedId; + do{ + index &= hash->capacity - 1u; + probedKey = id; + if (hash->entries[index].key.compare_exchange_strong(probedKey, details::invalid_thread_id2, std::memory_order_seq_cst, + std::memory_order_relaxed)) { + break; + } + ++index; + } while (probedKey != details::invalid_thread_id); // Can happen if the hash has changed but we weren't put back in it yet, or if we weren't added to this hash in the first place + } + + // Mark the queue as being recyclable + producer->inactive.store(true, std::memory_order_release); + } + + static void implicit_producer_thread_exited_callback(void* userData) + { + auto producer = static_cast(userData); + auto queue = producer->parent; + queue->implicit_producer_thread_exited(producer); + } + +#endif + + ////////////////////////////////// + // Utility functions + ////////////////////////////////// + + template + static inline void* aligned_malloc(size_t size) + { + MOODYCAMEL_CONSTEXPR_IF(std::alignment_of::value <= std::alignment_of::value) + return (Traits::malloc)(size); + else { + size_t alignment = std::alignment_of::value; + void* raw = (Traits::malloc)(size + alignment - 1 + sizeof(void*)); + if (!raw) { + return nullptr; + } + char* ptr = details::align_for(reinterpret_cast(raw) + sizeof(void*)); + *(reinterpret_cast(ptr) - 1) = raw; + return ptr; + } + } + + template + static inline void aligned_free(void* ptr) + { + MOODYCAMEL_CONSTEXPR_IF(std::alignment_of::value <= std::alignment_of::value) + return (Traits::free)(ptr); + else { + (Traits::free)(ptr ? *(reinterpret_cast(ptr) - 1) : nullptr); + } + } + + template + static inline U* create_array(size_t count) + { + assert(count > 0); + U* p = static_cast(aligned_malloc(sizeof(U) * count)); + if (p == nullptr) { + return nullptr; + } + + for (size_t i = 0; i != count; ++i) { + new (p + i) U(); + } + return p; + } + + template + static inline void destroy_array(U* p, size_t count) + { + if (p != nullptr) { + assert(count > 0); + for (size_t i = count; i != 0;) { + (p + --i)->~U(); + } + } + aligned_free(p); + } + + template + static inline U* create() + { + void* p = aligned_malloc(sizeof(U)); + return p != nullptr ? new (p) U : nullptr; + } + + template + static inline U* create(A1&& a1) + { + void* p = aligned_malloc(sizeof(U)); + return p != nullptr ? new (p) U(std::forward(a1)) : nullptr; + } + + template + static inline void destroy(U* p) + { + if (p != nullptr) { + p->~U(); + } + aligned_free(p); + } + +private: + std::atomic producerListTail; + std::atomic producerCount; + + std::atomic initialBlockPoolIndex; + Block* initialBlockPool; + size_t initialBlockPoolSize; + +#ifndef MCDBGQ_USEDEBUGFREELIST + FreeList freeList; +#else + debug::DebugFreeList freeList; +#endif + + std::atomic implicitProducerHash; + std::atomic implicitProducerHashCount; // Number of slots logically used + ImplicitProducerHash initialImplicitProducerHash; + std::array initialImplicitProducerHashEntries; + std::atomic_flag implicitProducerHashResizeInProgress; + + std::atomic nextExplicitConsumerId; + std::atomic globalExplicitConsumerOffset; + +#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH + debug::DebugMutex implicitProdMutex; +#endif + +#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG + std::atomic explicitProducers; + std::atomic implicitProducers; +#endif +}; + +template +ProducerToken::ProducerToken(ConcurrentQueue& queue) + : producer(queue.recycle_or_create_producer(true)) +{ + if (producer != nullptr) { + producer->token = this; + } +} + +template +ProducerToken::ProducerToken(BlockingConcurrentQueue& queue) + : producer(reinterpret_cast*>(&queue)->recycle_or_create_producer(true)) +{ + if (producer != nullptr) { + producer->token = this; + } +} + +template +ConsumerToken::ConsumerToken(ConcurrentQueue& queue) + : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) +{ + initialOffset = queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release); + lastKnownGlobalOffset = static_cast(-1); +} + +template +ConsumerToken::ConsumerToken(BlockingConcurrentQueue& queue) + : itemsConsumedFromCurrent(0), currentProducer(nullptr), desiredProducer(nullptr) +{ + initialOffset = reinterpret_cast*>(&queue)->nextExplicitConsumerId.fetch_add(1, std::memory_order_release); + lastKnownGlobalOffset = static_cast(-1); +} + +template +inline void swap(ConcurrentQueue& a, ConcurrentQueue& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +inline void swap(ProducerToken& a, ProducerToken& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +inline void swap(ConsumerToken& a, ConsumerToken& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} + +template +inline void swap(typename ConcurrentQueue::ImplicitProducerKVP& a, + typename ConcurrentQueue::ImplicitProducerKVP& b) MOODYCAMEL_NOEXCEPT +{ + a.swap(b); +} +} + +#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17) +#pragma warning(pop) +#endif + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) +#pragma GCC diagnostic pop +#endif diff --git a/framework/audio/thirdparty/moodycamel/lightweightsemaphore.h b/framework/audio/thirdparty/moodycamel/lightweightsemaphore.h new file mode 100644 index 0000000000..dcaf945bdf --- /dev/null +++ b/framework/audio/thirdparty/moodycamel/lightweightsemaphore.h @@ -0,0 +1,436 @@ +// Provides an efficient implementation of a semaphore (LightweightSemaphore). +// This is an extension of Jeff Preshing's sempahore implementation (licensed +// under the terms of its separate zlib license) that has been adapted and +// extended by Cameron Desrochers. + +#pragma once + +#include +#include // For std::size_t +#include +#include // For std::make_signed + +#if defined(_WIN32) +// Avoid including windows.h in a header; we only need a handful of +// items, so we'll redeclare them here (this is relatively safe since +// the API generally has to remain stable between Windows versions). +// I know this is an ugly hack but it still beats polluting the global +// namespace with thousands of generic names or adding a .cpp for nothing. +extern "C" { +struct _SECURITY_ATTRIBUTES; +__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, + const wchar_t* lpName); +__declspec(dllimport) int __stdcall CloseHandle(void* hObject); +__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds); +__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount); +} +#elif defined(__MACH__) +#include +#elif defined(__MVS__) +#include +#elif defined(__unix__) +#include + +#if defined(__GLIBC_PREREQ) && defined(_GNU_SOURCE) +#if __GLIBC_PREREQ(2, 30) +#define MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC +#endif +#endif +#endif + +namespace moodycamel { +namespace details { +// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's +// portable + lightweight semaphore implementations, originally from +// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h +// LICENSE: +// Copyright (c) 2015 Jeff Preshing +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgement in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. +#if defined(_WIN32) +class Semaphore +{ +private: + void* m_hSema; + + Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + +public: + Semaphore(int initialCount = 0) + { + assert(initialCount >= 0); + const long maxLong = 0x7fffffff; + m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr); + assert(m_hSema); + } + + ~Semaphore() + { + CloseHandle(m_hSema); + } + + bool wait() + { + const unsigned long infinite = 0xffffffff; + return WaitForSingleObject(m_hSema, infinite) == 0; + } + + bool try_wait() + { + return WaitForSingleObject(m_hSema, 0) == 0; + } + + bool timed_wait(std::uint64_t usecs) + { + return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0; + } + + void signal(int count = 1) + { + while (!ReleaseSemaphore(m_hSema, count, nullptr)) {} + } +}; +#elif defined(__MACH__) +//--------------------------------------------------------- +// Semaphore (Apple iOS and OSX) +// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html +//--------------------------------------------------------- +class Semaphore +{ +private: + semaphore_t m_sema; + + Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + +public: + Semaphore(int initialCount = 0) + { + assert(initialCount >= 0); + kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount); + assert(rc == KERN_SUCCESS); + (void)rc; + } + + ~Semaphore() + { + semaphore_destroy(mach_task_self(), m_sema); + } + + bool wait() + { + return semaphore_wait(m_sema) == KERN_SUCCESS; + } + + bool try_wait() + { + return timed_wait(0); + } + + bool timed_wait(std::uint64_t timeout_usecs) + { + mach_timespec_t ts; + ts.tv_sec = static_cast(timeout_usecs / 1000000); + ts.tv_nsec = static_cast((timeout_usecs % 1000000) * 1000); + + // added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html + kern_return_t rc = semaphore_timedwait(m_sema, ts); + return rc == KERN_SUCCESS; + } + + void signal() + { + while (semaphore_signal(m_sema) != KERN_SUCCESS) {} + } + + void signal(int count) + { + while (count-- > 0) + { + while (semaphore_signal(m_sema) != KERN_SUCCESS) {} + } + } +}; +#elif defined(__unix__) || defined(__MVS__) +//--------------------------------------------------------- +// Semaphore (POSIX, Linux, zOS) +//--------------------------------------------------------- +class Semaphore +{ +private: + sem_t m_sema; + + Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION; + +public: + Semaphore(int initialCount = 0) + { + assert(initialCount >= 0); + int rc = sem_init(&m_sema, 0, static_cast(initialCount)); + assert(rc == 0); + (void)rc; + } + + ~Semaphore() + { + sem_destroy(&m_sema); + } + + bool wait() + { + // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error + int rc; + do { + rc = sem_wait(&m_sema); + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + bool try_wait() + { + int rc; + do { + rc = sem_trywait(&m_sema); + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + bool timed_wait(std::uint64_t usecs) + { + struct timespec ts; + const int usecs_in_1_sec = 1000000; + const int nsecs_in_1_sec = 1000000000; +#ifdef MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &ts); +#else + clock_gettime(CLOCK_REALTIME, &ts); +#endif + ts.tv_sec += (time_t)(usecs / usecs_in_1_sec); + ts.tv_nsec += (long)(usecs % usecs_in_1_sec) * 1000; + // sem_timedwait bombs if you have more than 1e9 in tv_nsec + // so we have to clean things up before passing it in + if (ts.tv_nsec >= nsecs_in_1_sec) { + ts.tv_nsec -= nsecs_in_1_sec; + ++ts.tv_sec; + } + + int rc; + do { +#ifdef MOODYCAMEL_LIGHTWEIGHTSEMAPHORE_MONOTONIC + rc = sem_clockwait(&m_sema, CLOCK_MONOTONIC, &ts); +#else + rc = sem_timedwait(&m_sema, &ts); +#endif + } while (rc == -1 && errno == EINTR); + return rc == 0; + } + + void signal() + { + while (sem_post(&m_sema) == -1) {} + } + + void signal(int count) + { + while (count-- > 0) + { + while (sem_post(&m_sema) == -1) {} + } + } +}; +#else +#error Unsupported platform! (No semaphore wrapper available) +#endif +} // end namespace details + +//--------------------------------------------------------- +// LightweightSemaphore +//--------------------------------------------------------- +class LightweightSemaphore +{ +public: + typedef std::make_signed::type ssize_t; + +private: + std::atomic m_count; + details::Semaphore m_sema; + int m_maxSpins; + + bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1) + { + ssize_t oldCount; + int spin = m_maxSpins; + while (--spin >= 0) + { + oldCount = m_count.load(std::memory_order_relaxed); + if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, + std::memory_order_relaxed)) { + return true; + } + std::atomic_signal_fence(std::memory_order_acquire); // Prevent the compiler from collapsing the loop. + } + oldCount = m_count.fetch_sub(1, std::memory_order_acquire); + if (oldCount > 0) { + return true; + } + if (timeout_usecs < 0) { + if (m_sema.wait()) { + return true; + } + } + if (timeout_usecs > 0 && m_sema.timed_wait((std::uint64_t)timeout_usecs)) { + return true; + } + // At this point, we've timed out waiting for the semaphore, but the + // count is still decremented indicating we may still be waiting on + // it. So we have to re-adjust the count, but only if the semaphore + // wasn't signaled enough times for us too since then. If it was, we + // need to release the semaphore too. + while (true) + { + oldCount = m_count.load(std::memory_order_acquire); + if (oldCount >= 0 && m_sema.try_wait()) { + return true; + } + if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, + std::memory_order_relaxed)) { + return false; + } + } + } + + ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1) + { + assert(max > 0); + ssize_t oldCount; + int spin = m_maxSpins; + while (--spin >= 0) + { + oldCount = m_count.load(std::memory_order_relaxed); + if (oldCount > 0) { + ssize_t newCount = oldCount > max ? oldCount - max : 0; + if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) { + return oldCount - newCount; + } + } + std::atomic_signal_fence(std::memory_order_acquire); + } + oldCount = m_count.fetch_sub(1, std::memory_order_acquire); + if (oldCount <= 0) { + if ((timeout_usecs == 0) || (timeout_usecs < 0 && !m_sema.wait()) + || (timeout_usecs > 0 && !m_sema.timed_wait((std::uint64_t)timeout_usecs))) { + while (true) + { + oldCount = m_count.load(std::memory_order_acquire); + if (oldCount >= 0 && m_sema.try_wait()) { + break; + } + if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, + std::memory_order_relaxed)) { + return 0; + } + } + } + } + if (max > 1) { + return 1 + tryWaitMany(max - 1); + } + return 1; + } + +public: + LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000) + : m_count(initialCount), m_maxSpins(maxSpins) + { + assert(initialCount >= 0); + assert(maxSpins >= 0); + } + + bool tryWait() + { + ssize_t oldCount = m_count.load(std::memory_order_relaxed); + while (oldCount > 0) + { + if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed)) { + return true; + } + } + return false; + } + + bool wait() + { + return tryWait() || waitWithPartialSpinning(); + } + + bool wait(std::int64_t timeout_usecs) + { + return tryWait() || waitWithPartialSpinning(timeout_usecs); + } + + // Acquires between 0 and (greedily) max, inclusive + ssize_t tryWaitMany(ssize_t max) + { + assert(max >= 0); + ssize_t oldCount = m_count.load(std::memory_order_relaxed); + while (oldCount > 0) + { + ssize_t newCount = oldCount > max ? oldCount - max : 0; + if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed)) { + return oldCount - newCount; + } + } + return 0; + } + + // Acquires at least one, and (greedily) at most max + ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs) + { + assert(max >= 0); + ssize_t result = tryWaitMany(max); + if (result == 0 && max > 0) { + result = waitManyWithPartialSpinning(max, timeout_usecs); + } + return result; + } + + ssize_t waitMany(ssize_t max) + { + ssize_t result = waitMany(max, -1); + assert(result > 0); + return result; + } + + void signal(ssize_t count = 1) + { + assert(count >= 0); + ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release); + ssize_t toRelease = -oldCount < count ? -oldCount : count; + if (toRelease > 0) { + m_sema.signal((int)toRelease); + } + } + + std::size_t availableApprox() const + { + ssize_t count = m_count.load(std::memory_order_relaxed); + return count > 0 ? static_cast(count) : 0; + } +}; +} // end namespace moodycamel diff --git a/framework/global/CMakeLists.txt b/framework/global/CMakeLists.txt index be1bc7363a..2b1e27f7de 100644 --- a/framework/global/CMakeLists.txt +++ b/framework/global/CMakeLists.txt @@ -40,6 +40,9 @@ target_sources(muse_global PRIVATE logremover.cpp logremover.h profiler.h + signpost.h + functional/inplace_function.h + functional/inplace_function_mv.h dataformatter.cpp dataformatter.h stringutils.cpp @@ -72,6 +75,8 @@ target_sources(muse_global PRIVATE ticker.cpp ticker.h + thirdparty/sg14/inplace_function.h + ${KORS_MODULARITY_SRC} modularity/ioccontext.cpp modularity/ioc.h diff --git a/framework/global/functional/inplace_function.h b/framework/global/functional/inplace_function.h new file mode 100644 index 0000000000..d891d93052 --- /dev/null +++ b/framework/global/functional/inplace_function.h @@ -0,0 +1,9 @@ +#pragma once + +#include "global/thirdparty/sg14/inplace_function.h" + +namespace muse::functional { +template)> +using inplace_function = stdext::inplace_function; +} diff --git a/framework/global/functional/inplace_function_mv.h b/framework/global/functional/inplace_function_mv.h new file mode 100644 index 0000000000..58fe8c67f2 --- /dev/null +++ b/framework/global/functional/inplace_function_mv.h @@ -0,0 +1,215 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only + * MuseScore-CLA-applies + * + * MuseScore + * Music Composition & Notation + * + * Copyright (C) 2026 MuseScore Limited and others + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#pragma once + +#include +#include +#include +#include +#include + +namespace muse::functional { +template +class MoveOnlyInplaceFunction; + +namespace detail { +template +struct IsMoveOnlyInplaceFunction : std::false_type {}; + +template +struct IsMoveOnlyInplaceFunction > : std::true_type {}; + +template +struct MoveOnlyInplaceFunctionVTable +{ + using Invoke = Ret (*)(void*, Args&&...); + using Relocate = void (*)(void*, void*) noexcept; + using Destroy = void (*)(void*) noexcept; + + Invoke invoke = nullptr; + Relocate relocate = nullptr; + Destroy destroy = nullptr; +}; + +template +Ret invokeMoveOnlyInplaceFunction(void* storage, Args&&... args) +{ + if constexpr (std::is_void_v) { + (*static_cast(storage))(std::forward(args)...); + } else { + return (*static_cast(storage))(std::forward(args)...); + } +} + +template +void relocateMoveOnlyInplaceFunction(void* destination, void* source) noexcept +{ + auto* callable = static_cast(source); + new (destination) Callable(std::move(*callable)); + callable->~Callable(); +} + +template +void destroyMoveOnlyInplaceFunction(void* storage) noexcept +{ + static_cast(storage)->~Callable(); +} + +template +const MoveOnlyInplaceFunctionVTable* moveOnlyInplaceFunctionVTable() noexcept +{ + static const MoveOnlyInplaceFunctionVTable vtable { + &invokeMoveOnlyInplaceFunction, + &relocateMoveOnlyInplaceFunction, + &destroyMoveOnlyInplaceFunction + }; + + return &vtable; +} +} // namespace detail + +template +class MoveOnlyInplaceFunction +{ +public: + MoveOnlyInplaceFunction() noexcept = default; + + MoveOnlyInplaceFunction(std::nullptr_t) noexcept {} + + MoveOnlyInplaceFunction(const MoveOnlyInplaceFunction&) = delete; + MoveOnlyInplaceFunction& operator=(const MoveOnlyInplaceFunction&) = delete; + + MoveOnlyInplaceFunction(MoveOnlyInplaceFunction&& other) noexcept + { + moveFrom(std::move(other)); + } + + MoveOnlyInplaceFunction& operator=(MoveOnlyInplaceFunction&& other) noexcept + { + if (this != &other) { + reset(); + moveFrom(std::move(other)); + } + + return *this; + } + + template, + std::enable_if_t::value + && std::is_invocable_r_v, int> = 0> + MoveOnlyInplaceFunction(Callable&& callable) + { + static_assert(sizeof(Fn) <= Capacity, "Callable is too large for this MoveOnlyInplaceFunction"); + static_assert(alignof(Fn) <= Alignment, "Callable alignment is too large for this MoveOnlyInplaceFunction"); + static_assert(std::is_nothrow_move_constructible_v, + "MoveOnlyInplaceFunction requires nothrow move-constructible callables"); + + m_vtable = detail::moveOnlyInplaceFunctionVTable(); + new (storage()) Fn(std::forward(callable)); + } + + template, + std::enable_if_t::value + && std::is_invocable_r_v, int> = 0> + MoveOnlyInplaceFunction& operator=(Callable&& callable) + { + *this = MoveOnlyInplaceFunction(std::forward(callable)); + return *this; + } + + MoveOnlyInplaceFunction& operator=(std::nullptr_t) noexcept + { + reset(); + return *this; + } + + ~MoveOnlyInplaceFunction() noexcept + { + reset(); + } + + Ret operator()(Args... args) const + { + if (!m_vtable) { + throw std::bad_function_call(); + } + + if constexpr (std::is_void_v) { + m_vtable->invoke(storage(), std::forward(args)...); + } else { + return m_vtable->invoke(storage(), std::forward(args)...); + } + } + + explicit operator bool() const noexcept + { + return m_vtable != nullptr; + } + + void reset() noexcept + { + if (!m_vtable) { + return; + } + + m_vtable->destroy(storage()); + m_vtable = nullptr; + } + +private: + using VTable = detail::MoveOnlyInplaceFunctionVTable; + using Storage = std::aligned_storage_t; + + void* storage() const noexcept + { + return const_cast(&m_storage); + } + + void moveFrom(MoveOnlyInplaceFunction&& other) noexcept + { + if (!other.m_vtable) { + return; + } + + m_vtable = other.m_vtable; + m_vtable->relocate(storage(), other.storage()); + other.m_vtable = nullptr; + } + + const VTable* m_vtable = nullptr; + mutable Storage m_storage {}; +}; + +template +bool operator==(const MoveOnlyInplaceFunction& function, std::nullptr_t) noexcept +{ + return !function; +} + +template +bool operator!=(const MoveOnlyInplaceFunction& function, std::nullptr_t) noexcept +{ + return bool(function); +} +} // namespace muse::functional diff --git a/framework/global/signpost.h b/framework/global/signpost.h new file mode 100644 index 0000000000..d0ee0edd89 --- /dev/null +++ b/framework/global/signpost.h @@ -0,0 +1,46 @@ +#pragma once +#ifdef __APPLE__ +#include + +#include "global/defer.h" +#include "muse_framework_config.h" + +#define MSS_SIGNPOST_CONCAT_IMPL(x, y) x##y +#define MSS_SIGNPOST_CONCAT(x, y) MSS_SIGNPOST_CONCAT_IMPL(x, y) + +// Call this macro in your source file (at function or namespace scope) to initialize the signpost log. +#define MSS_SIGNPOST_PREPARE \ + static os_log_t sn_log = os_log_create(MUSE_APP_NAME_MACHINE_READABLE, OS_LOG_CATEGORY_POINTS_OF_INTEREST); + +// Call these macros to begin and end a signpost interval +#define MSS_SIGNPOST_BEGIN(name) \ + auto spid = os_signpost_id_generate(sn_log); \ + os_signpost_interval_begin(sn_log, spid, name); + +#define MSS_SIGNPOST_END(name) os_signpost_interval_end(sn_log, spid, name); +#define MSS_SIGNPOST_SCOPE(name) MSS_SIGNPOST_SCOPE_IMPL(name, __COUNTER__) +#define MSS_SIGNPOST_SCOPE_IMPL(name, id) \ + auto MSS_SIGNPOST_CONCAT(spid_, id) = os_signpost_id_generate(sn_log); \ + os_signpost_interval_begin(sn_log, MSS_SIGNPOST_CONCAT(spid_, id), name); \ + muse::Defer MSS_SIGNPOST_CONCAT(g_, id)([spid = MSS_SIGNPOST_CONCAT(spid_, id)] { \ + os_signpost_interval_end(sn_log, spid, name); \ + }); + +#define MSS_SIGNPOST_FUNCTION MSS_SIGNPOST_FUNCTION_IMPL(__COUNTER__) +#define MSS_SIGNPOST_FUNCTION_IMPL(id) \ + auto MSS_SIGNPOST_CONCAT(spid_, id) = os_signpost_id_generate(sn_log); \ + const char* MSS_SIGNPOST_CONCAT(function_, id) = __PRETTY_FUNCTION__; \ + os_signpost_interval_begin(sn_log, MSS_SIGNPOST_CONCAT(spid_, id), "Function", "%s", \ + MSS_SIGNPOST_CONCAT(function_, id)); \ + muse::Defer MSS_SIGNPOST_CONCAT(g_, id)([spid = MSS_SIGNPOST_CONCAT(spid_, id), \ + function = MSS_SIGNPOST_CONCAT(function_, id)] { \ + os_signpost_interval_end(sn_log, spid, "Function", "%s", function); \ + }); + +#else // !__APPLE__ +#define MSS_SIGNPOST_PREPARE +#define MSS_SIGNPOST_BEGIN(name) +#define MSS_SIGNPOST_END(name) +#define MSS_SIGNPOST_SCOPE(name) +#define MSS_SIGNPOST_FUNCTION +#endif diff --git a/framework/global/tests/ringqueue_tests.cpp b/framework/global/tests/ringqueue_tests.cpp index 055f0e608c..3754a05908 100644 --- a/framework/global/tests/ringqueue_tests.cpp +++ b/framework/global/tests/ringqueue_tests.cpp @@ -70,6 +70,10 @@ TEST_F(Global_Concurrency_RingQueueTests, FixedSizeQueue) if (successCount == 10) { break; } + if (successCount == 0) { + // simple wait until the producer has added its first item + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } } EXPECT_EQ(successCount, 10); diff --git a/framework/global/thirdparty/sg14/inplace_function.h b/framework/global/thirdparty/sg14/inplace_function.h new file mode 100644 index 0000000000..5066d3a246 --- /dev/null +++ b/framework/global/thirdparty/sg14/inplace_function.h @@ -0,0 +1,387 @@ +/* + * Boost Software License - Version 1.0 - August 17th, 2003 + * + * Permission is hereby granted, free of charge, to any person or organization + * obtaining a copy of the software and accompanying documentation covered by + * this license (the "Software") to use, reproduce, display, distribute, + * execute, and transmit the Software, and to prepare derivative works of the + * Software, and to permit third-parties to whom the Software is furnished to + * do so, all subject to the following: + * + * The copyright notices in the Software and this entire statement, including + * the above license grant, this restriction and the following disclaimer, + * must be included in all copies of the Software, in whole or in part, and + * all derivative works of the Software, unless such copies or derivative + * works are solely in the form of machine-executable object code generated by + * a source language processor. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT + * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE + * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include +#include +#include + +#ifndef SG14_INPLACE_FUNCTION_THROW +#define SG14_INPLACE_FUNCTION_THROW(x) throw (x) +#endif + +namespace stdext { +namespace inplace_function_detail { +static constexpr size_t InplaceFunctionDefaultCapacity = 32; + +#ifndef SG14_USE_STD_ALIGNED_STORAGE +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61458 +// MSVC 32-bit has the same bug. +// libc++ and MSVC 64-bit seem to work fine right now, but why run the risk? +template +union aligned_storage_helper { + struct double1 { + double a; + }; + struct double4 { + double a[4]; + }; + template using maybe = std::conditional_t<(Cap >= sizeof(T)), T, char>; + char real_data[Cap]; + maybe a; + maybe b; + maybe c; + maybe d; + maybe e; + maybe f; + maybe g; + maybe h; +}; + +template)> +struct aligned_storage { + using type = std::aligned_storage_t; +}; + +template)> +using aligned_storage_t = typename aligned_storage::type; +static_assert(sizeof(aligned_storage_t) == sizeof(void*), "A"); +static_assert(alignof(aligned_storage_t) == alignof(void*), "B"); +#else +using std::aligned_storage; +using std::aligned_storage_t; +static_assert(sizeof(std::aligned_storage_t) == sizeof(void*), "C"); +static_assert(alignof(std::aligned_storage_t) == alignof(void*), "D"); +#endif + +template struct wrapper +{ + using type = T; +}; + +template struct vtable +{ + using storage_ptr_t = void*; + + using invoke_ptr_t = R (*)(storage_ptr_t, Args&&...); + using process_ptr_t = void (*)(storage_ptr_t, storage_ptr_t); + using destructor_ptr_t = void (*)(storage_ptr_t); + + const invoke_ptr_t invoke_ptr; + const process_ptr_t copy_ptr; + const process_ptr_t relocate_ptr; + const destructor_ptr_t destructor_ptr; + + explicit constexpr vtable() noexcept + : invoke_ptr{[](storage_ptr_t, Args&&...) -> R + { SG14_INPLACE_FUNCTION_THROW(std::bad_function_call()); } + }, + copy_ptr{ [](storage_ptr_t, storage_ptr_t) -> void {} }, + relocate_ptr{ [](storage_ptr_t, storage_ptr_t) -> void {} }, + destructor_ptr{ [](storage_ptr_t) -> void {} } + {} + + template explicit constexpr vtable(wrapper) noexcept + : invoke_ptr{[](storage_ptr_t storage_ptr, Args&&... args) -> R + { + return (*static_cast(storage_ptr))( + static_cast(args)... + ); + } + }, + copy_ptr{ [](storage_ptr_t dst_ptr, storage_ptr_t src_ptr) -> void + { ::new (dst_ptr) C{ (*static_cast(src_ptr)) }; } + }, + relocate_ptr{ [](storage_ptr_t dst_ptr, storage_ptr_t src_ptr) -> void + { + ::new (dst_ptr) C{ std::move(*static_cast(src_ptr)) }; + static_cast(src_ptr)->~C(); + } + }, + destructor_ptr{ [](storage_ptr_t src_ptr) -> void + { static_cast(src_ptr)->~C(); } + } + {} + + vtable(const vtable&) = delete; + vtable(vtable&&) = delete; + + vtable& operator=(const vtable&) = delete; + vtable& operator=(vtable&&) = delete; + + ~vtable() = default; +}; + +template +#if __cplusplus >= 201703L +inline constexpr +#endif +vtable empty_vtable{}; + +template +struct is_valid_inplace_dst : std::true_type +{ + static_assert(DstCap >= SrcCap, + "Can't squeeze larger inplace_function into a smaller one" + ); + + static_assert(DstAlign % SrcAlign == 0, + "Incompatible inplace_function alignments" + ); +}; + +// C++11 MSVC compatible implementation of std::is_invocable_r. + +template void accept(R); + +template struct is_invocable_r_impl : std::false_type {}; + +template struct is_invocable_r_impl< + decltype(std::declval()(std::declval()...), void()), + void, + F, + Args... + > : std::true_type {}; + +template struct is_invocable_r_impl< + decltype(std::declval()(std::declval()...), void()), + const void, + F, + Args... + > : std::true_type {}; + +template struct is_invocable_r_impl< + decltype(accept(std::declval()(std::declval()...))), + R, + F, + Args... + > : std::true_type {}; + +template using is_invocable_r = is_invocable_r_impl< + void, + R, + F, + Args... + >; +} // namespace inplace_function_detail + +template< + class Signature, + size_t Capacity = inplace_function_detail::InplaceFunctionDefaultCapacity, + size_t Alignment = alignof(inplace_function_detail::aligned_storage_t) + > +class inplace_function; // unspecified + +namespace inplace_function_detail { +template struct is_inplace_function : std::false_type {}; +template +struct is_inplace_function > : std::true_type {}; +} // namespace inplace_function_detail + +template< + class R, + class ... Args, + size_t Capacity, + size_t Alignment + > +class inplace_function +{ + using storage_t = inplace_function_detail::aligned_storage_t; + using vtable_t = inplace_function_detail::vtable; + using vtable_ptr_t = const vtable_t*; + + template friend class inplace_function; + +public: + using capacity = std::integral_constant; + using alignment = std::integral_constant; + + inplace_function() noexcept + : vtable_ptr_{std::addressof(inplace_function_detail::empty_vtable)} + {} + + template< + class T, + class C = std::decay_t, + class = std::enable_if_t< + !inplace_function_detail::is_inplace_function::value + && inplace_function_detail::is_invocable_r::value + > + > + inplace_function(T&& closure) + { + static_assert(std::is_copy_constructible::value, + "inplace_function cannot be constructed from non-copyable type" + ); + + static_assert(sizeof(C) <= Capacity, + "inplace_function cannot be constructed from object with this (large) size" + ); + + static_assert(Alignment % alignof(C) == 0, + "inplace_function cannot be constructed from object with this (large) alignment" + ); + + static const vtable_t vt{ inplace_function_detail::wrapper {} }; + vtable_ptr_ = std::addressof(vt); + + ::new (std::addressof(storage_)) C{ std::forward(closure) }; + } + + template + inplace_function(const inplace_function& other) + : inplace_function(other.vtable_ptr_, other.vtable_ptr_->copy_ptr, std::addressof(other.storage_)) + { + static_assert(inplace_function_detail::is_valid_inplace_dst< + Capacity, Alignment, Cap, Align + >::value, "conversion not allowed"); + } + + template + inplace_function(inplace_function&& other) noexcept + : inplace_function(other.vtable_ptr_, other.vtable_ptr_->relocate_ptr, std::addressof(other.storage_)) + { + static_assert(inplace_function_detail::is_valid_inplace_dst< + Capacity, Alignment, Cap, Align + >::value, "conversion not allowed"); + + other.vtable_ptr_ = std::addressof(inplace_function_detail::empty_vtable); + } + + inplace_function(std::nullptr_t) noexcept + : vtable_ptr_{std::addressof(inplace_function_detail::empty_vtable)} + {} + + inplace_function(const inplace_function& other) + : vtable_ptr_{other.vtable_ptr_} + { + vtable_ptr_->copy_ptr( + std::addressof(storage_), + std::addressof(other.storage_) + ); + } + + inplace_function(inplace_function&& other) noexcept + : vtable_ptr_{std::exchange(other.vtable_ptr_, std::addressof(inplace_function_detail::empty_vtable))} + { + vtable_ptr_->relocate_ptr( + std::addressof(storage_), + std::addressof(other.storage_) + ); + } + + inplace_function& operator=(std::nullptr_t) noexcept + { + vtable_ptr_->destructor_ptr(std::addressof(storage_)); + vtable_ptr_ = std::addressof(inplace_function_detail::empty_vtable); + return *this; + } + + inplace_function& operator=(inplace_function other) noexcept + { + vtable_ptr_->destructor_ptr(std::addressof(storage_)); + + vtable_ptr_ = std::exchange(other.vtable_ptr_, std::addressof(inplace_function_detail::empty_vtable)); + vtable_ptr_->relocate_ptr( + std::addressof(storage_), + std::addressof(other.storage_) + ); + return *this; + } + + ~inplace_function() + { + vtable_ptr_->destructor_ptr(std::addressof(storage_)); + } + + R operator()(Args... args) const + { + return vtable_ptr_->invoke_ptr( + std::addressof(storage_), + std::forward(args)... + ); + } + + constexpr bool operator==(std::nullptr_t) const noexcept + { + return !operator bool(); + } + + constexpr bool operator!=(std::nullptr_t) const noexcept + { + return operator bool(); + } + + explicit constexpr operator bool() const noexcept + { + return vtable_ptr_ != std::addressof(inplace_function_detail::empty_vtable); + } + + void swap(inplace_function& other) noexcept + { + if (this == std::addressof(other)) { + return; + } + + storage_t tmp; + vtable_ptr_->relocate_ptr( + std::addressof(tmp), + std::addressof(storage_) + ); + + other.vtable_ptr_->relocate_ptr( + std::addressof(storage_), + std::addressof(other.storage_) + ); + + vtable_ptr_->relocate_ptr( + std::addressof(other.storage_), + std::addressof(tmp) + ); + + std::swap(vtable_ptr_, other.vtable_ptr_); + } + + friend void swap(inplace_function& lhs, inplace_function& rhs) noexcept + { + lhs.swap(rhs); + } + +private: + vtable_ptr_t vtable_ptr_; + mutable storage_t storage_; + + inplace_function( + vtable_ptr_t vtable_ptr, + typename vtable_t::process_ptr_t process_ptr, + typename vtable_t::storage_ptr_t storage_ptr) + : vtable_ptr_{vtable_ptr} + { + process_ptr(std::addressof(storage_), storage_ptr); + } +}; +} // namespace stdext diff --git a/framework/stubs/audio/CMakeLists.txt b/framework/stubs/audio/CMakeLists.txt index 837d3ff9cd..90eea43010 100644 --- a/framework/stubs/audio/CMakeLists.txt +++ b/framework/stubs/audio/CMakeLists.txt @@ -37,4 +37,5 @@ target_sources(muse_audio PRIVATE audiodriverstub.h audiodrivercontrollerstub.cpp audiodrivercontrollerstub.h + audioworkgroupstub.cpp ) diff --git a/framework/stubs/audio/audioworkgroupstub.cpp b/framework/stubs/audio/audioworkgroupstub.cpp new file mode 100644 index 0000000000..aadcba5a12 --- /dev/null +++ b/framework/stubs/audio/audioworkgroupstub.cpp @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: GPL-3.0-only + * MuseScore-CLA-applies + * + * MuseScore + * Music Composition & Notation + * + * Copyright (C) 2026 MuseScore Limited and others + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 3 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "audio/common/audioworkgroup.h" + +#include +#include +#include + +namespace muse::audio { +class AudioWorkgroupProvider +{ +}; + +AudioWorkGroup::AudioWorkGroup() = default; + +AudioWorkGroup::AudioWorkGroup(std::unique_ptr provider) + : m_provider(std::move(provider)) {} + +AudioWorkGroup::~AudioWorkGroup() = default; + +AudioWorkGroup::AudioWorkGroup(const AudioWorkGroup& other) + : m_provider(other.m_provider ? std::make_unique(*other.m_provider) : nullptr) {} + +AudioWorkGroup::AudioWorkGroup(AudioWorkGroup&& other) noexcept = default; + +AudioWorkGroup& AudioWorkGroup::operator=(const AudioWorkGroup& other) +{ + if (this != &other) { + m_provider = other.m_provider ? std::make_unique(*other.m_provider) : nullptr; + } + return *this; +} + +AudioWorkGroup& AudioWorkGroup::operator=(AudioWorkGroup&& other) noexcept = default; + +bool AudioWorkGroup::join(AudioWorkgroupToken&) +{ + return false; +} + +size_t AudioWorkGroup::getMaxParallelThreadCount() const +{ + return std::thread::hardware_concurrency(); +} + +AudioWorkGroup makeAudioWorkgroup(void*) +{ + return {}; +} +} // namespace muse::audio