diff --git a/examples/extended/user_owned_mpi.c b/examples/extended/user_owned_mpi.c new file mode 100644 index 000000000..4e3c766f4 --- /dev/null +++ b/examples/extended/user_owned_mpi.c @@ -0,0 +1,49 @@ +/** @file + * + * An example of using QuEST's experimental + * initCustomMpiQuESTEnv() function, to + * initialise QuEST in an environment where + * MPI is owned and controlled by the user. + * + * @author Oliver Brown + * @author Tyson Jones (doc) + */ + +#include "quest.h" +#include + + +// This example requires linking with MPI, which the CMake +// build only enables when QUEST_ENABLE_SUBCOMM is ON, which +// results in quest.h defining QUEST_COMPILE_SUBCOMM. To +// enable this example to always be compilable (like during +// our CI), we guard against when QUEST_ENABLE_SUBCOMM is OFF. +#if ! QUEST_COMPILE_SUBCOMM +int main(void) +{ + printf("Example skipped since MPI is not linked.\n"); + return 0; +} +#else + + +#include + +int main(void) +{ + const int USE_DISTRIB = 1; + const bool USER_MPI = 1; + const int USE_OPENMP = 1; + const int USE_GPU = 0; + + MPI_Init(NULL, NULL); + initCustomMpiQuESTEnv(USE_DISTRIB, USER_MPI, USE_GPU, USE_OPENMP); + reportQuESTEnv(); + finalizeQuESTEnv(); + MPI_Finalize(); + + return 0; +} + + +#endif // QUEST_COMPILE_SUBCOMM diff --git a/examples/extended/user_owned_mpi.cpp b/examples/extended/user_owned_mpi.cpp new file mode 100644 index 000000000..54345d576 --- /dev/null +++ b/examples/extended/user_owned_mpi.cpp @@ -0,0 +1,49 @@ +/** @file + * + * An example of using QuEST's experimental + * initCustomMpiQuESTEnv() function to + * initialise QuEST in an environment where + * MPI is owned and controlled by the user. + * + * @author Oliver Brown + * @author Tyson Jones (doc) + */ + +#include "quest.h" +#include + + +// This example requires linking with MPI, which the CMake +// build only enables when QUEST_ENABLE_SUBCOMM is ON, which +// results in quest.h defining QUEST_COMPILE_SUBCOMM. To +// enable this example to always be compilable (like during +// our CI), we guard against when QUEST_ENABLE_SUBCOMM is OFF. +#if ! QUEST_COMPILE_SUBCOMM +int main(void) +{ + std::printf("Example skipped since MPI is not linked.\n"); + return 0; +} +#else + + +#include + +int main(void) +{ + const int USE_DISTRIB = 1; + const bool USER_MPI = 1; + const int USE_OPENMP = 1; + const int USE_GPU = 0; + + MPI_Init(NULL, NULL); + initCustomMpiQuESTEnv(USE_DISTRIB, USER_MPI, USE_GPU, USE_OPENMP); + reportQuESTEnv(); + finalizeQuESTEnv(); + MPI_Finalize(); + + return 0; +} + + +#endif // QUEST_COMPILE_SUBCOMM diff --git a/examples/extended/user_owned_submpi.c b/examples/extended/user_owned_submpi.c new file mode 100644 index 000000000..6f2ea6290 --- /dev/null +++ b/examples/extended/user_owned_submpi.c @@ -0,0 +1,84 @@ +/** @file + * + * An example of using QuEST's experimental + * initCustomMpiCommQuESTEnv() function to + * dedicate only some user-owned MPI processes + * to QuEST, and dedicate the remainder to + * other tasks. + * + * @author Oliver Brown + * @author Tyson Jones (doc) + */ + +#include "quest.h" +#include + + +// This example requires linking with MPI, which the CMake +// build only enables when QUEST_ENABLE_SUBCOMM is ON, which +// results in quest.h defining QUEST_COMPILE_SUBCOMM. To +// enable this example to always be compilable (like during +// our CI), we guard against when QUEST_ENABLE_SUBCOMM is OFF. +#if ! QUEST_COMPILE_SUBCOMM +int main() +{ + printf("Example skipped since MPI is not linked.\n"); + return 0; +} +#else + + +#include + +int main (void) +{ + int nprocs, quest_nprocs, world_rank, quest_rank; + MPI_Comm comm_split, comm_quantum, comm_classical; + + MPI_Init(NULL, NULL); + + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + + const int I_AM_QUANTUM = world_rank % 2; + + printf("[%d] Hello from rank %d of %d in MPI_COMM_WORLD.\n", world_rank, world_rank, nprocs); + + MPI_Comm_split(MPI_COMM_WORLD, I_AM_QUANTUM, world_rank, &comm_split); + + if (I_AM_QUANTUM) { + MPI_Comm_dup(comm_split, &comm_quantum); + MPI_Comm_size(comm_quantum, &quest_nprocs); + MPI_Comm_rank(comm_quantum, &quest_rank); + printf("[%d] Hello from rank %d of %d in comm_quantum.\n", world_rank, quest_rank, quest_nprocs); + } else { + MPI_Comm_dup(comm_split, &comm_classical); + quest_rank = -1; + quest_nprocs = -1; + } + + // only procs in quantum comm initialise QuEST + if (I_AM_QUANTUM) { + printf("[%d] Initialising QuEST.\n", world_rank); + initCustomMpiCommQuESTEnv(comm_quantum, -1, -1); // -1 = auto-deployments + + reportQuESTEnv(); + + printf("[%d] Finalising QuEST.\n", world_rank); + finalizeQuESTEnv(); + } + + MPI_Comm_free(&comm_split); + if (I_AM_QUANTUM) { + MPI_Comm_free(&comm_quantum); + } else { + MPI_Comm_free(&comm_classical); + } + + MPI_Finalize(); + + return 0; +} + + +#endif // QUEST_COMPILE_SUBCOMM diff --git a/examples/extended/user_owned_submpi.cpp b/examples/extended/user_owned_submpi.cpp new file mode 100644 index 000000000..ea82a4f9d --- /dev/null +++ b/examples/extended/user_owned_submpi.cpp @@ -0,0 +1,84 @@ +/** @file + * + * An example of using QuEST's experimental + * initCustomMpiCommQuESTEnv() function to + * dedicate only some user-owned MPI processes + * to QuEST, and dedicate the remainder to + * other tasks. + * + * @author Oliver Brown + * @author Tyson Jones (doc) + */ + +#include "quest.h" +#include + + +// This example requires linking with MPI, which the CMake +// build only enables when QUEST_ENABLE_SUBCOMM is ON, which +// results in quest.h defining QUEST_COMPILE_SUBCOMM. To +// enable this example to always be compilable (like during +// our CI), we guard against when QUEST_ENABLE_SUBCOMM is OFF. +#if ! QUEST_COMPILE_SUBCOMM +int main() +{ + std::printf("Example skipped since MPI is not linked.\n"); + return 0; +} +#else + + +#include + +int main (void) +{ + int nprocs, quest_nprocs, world_rank, quest_rank; + MPI_Comm comm_split, comm_quantum, comm_classical; + + MPI_Init(NULL, NULL); + + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + + const int I_AM_QUANTUM = world_rank % 2; + + std::printf("[%d] Hello from rank %d of %d in MPI_COMM_WORLD.\n", world_rank, world_rank, nprocs); + + MPI_Comm_split(MPI_COMM_WORLD, I_AM_QUANTUM, world_rank, &comm_split); + + if (I_AM_QUANTUM) { + MPI_Comm_dup(comm_split, &comm_quantum); + MPI_Comm_size(comm_quantum, &quest_nprocs); + MPI_Comm_rank(comm_quantum, &quest_rank); + std::printf("[%d] Hello from rank %d of %d in comm_quantum.\n", world_rank, quest_rank, quest_nprocs); + } else { + MPI_Comm_dup(comm_split, &comm_classical); + quest_rank = -1; + quest_nprocs = -1; + } + + // only procs in quantum comm initialise QuEST + if (I_AM_QUANTUM) { + std::printf("[%d] Initialising QuEST.\n", world_rank); + initCustomMpiCommQuESTEnv(comm_quantum, modeflag::USE_AUTO, modeflag::USE_AUTO); + + reportQuESTEnv(); + + std::printf("[%d] Finalising QuEST.\n", world_rank); + finalizeQuESTEnv(); + } + + MPI_Comm_free(&comm_split); + if (I_AM_QUANTUM) { + MPI_Comm_free(&comm_quantum); + } else { + MPI_Comm_free(&comm_classical); + } + + MPI_Finalize(); + + return 0; +} + + +#endif // QUEST_COMPILE_SUBCOMM diff --git a/quest/include/environment.h b/quest/include/environment.h index 15a6ac5e2..cdefa7d7d 100644 --- a/quest/include/environment.h +++ b/quest/include/environment.h @@ -35,13 +35,13 @@ extern "C" { typedef struct { // deployment modes which can be runtime disabled - int isMultithreaded; - int isGpuAccelerated; - int isDistributed; - bool userOwnsMpi; + bool isMultithreaded; + bool isGpuAccelerated; + bool isDistributed; + bool isMpiUserOwned; // deployment modes which cannot be directly changed after compilation - int isCuQuantumEnabled; + bool isCuQuantumEnabled; // deployment configurations which can be changed via environment variables int isGpuSharingEnabled; @@ -65,12 +65,6 @@ void initQuESTEnv(); */ void initCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMultithread); -/** @notyetdoced - * Advanced initialiser which lets the user positively declare that they take responsibility for MPI. - * This means we assume they have called MPI_Init, and that they will call MPI_Finalize. - */ -void initCustomMpiQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread); - /// @notyetdoced void finalizeQuESTEnv(); diff --git a/quest/include/experimental.h b/quest/include/experimental.h new file mode 100644 index 000000000..2fabdc34f --- /dev/null +++ b/quest/include/experimental.h @@ -0,0 +1,75 @@ +/** @file + * Experimental functions which are liable to + * API breaks within QuEST minor version releases. + * Some optional functions require compiling this + * file against MPI, despite being outside of /comm/, + * and so require opt-in macros (QUEST_COMPILE_SUBCOMM) + * + * @author Oliver Brown + * @author Tyson Jones (formatting) + * + * @defgroup experimental Experimental + * @ingroup api + * @brief Experimental functions with tentative APIs + * @{ + */ + +#ifndef EXPERIMENTAL_H +#define EXPERIMENTAL_H + +#include "quest/include/config.h" + +#if QUEST_COMPILE_SUBCOMM && ! QUEST_COMPILE_MPI + #error "Macro QUEST_COMPILE_SUBCOMM was true, but QUEST_COMPILE_MPI was illegally false." +#endif + +#if QUEST_COMPILE_SUBCOMM + #include +#endif + +// enable invocation by both C and C++ binaries +#ifdef __cplusplus +extern "C" { +#endif + + +/** @notyetdoced + * + * Advanced initialiser which lets the user positively declare that they take responsibility for MPI. + * This means we assume they have called MPI_Init, and that they will call MPI_Finalize. + * + * @author Oliver Brown + */ +void initCustomMpiQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread); + + +#if QUEST_COMPILE_SUBCOMM + +/** @notyetdoced + * + * Advanced initialiser which allows the user to provide an MPI communicator for QuEST to use. + * Use of this initialiser implies userOwnsMpi = true, (exposed by initCustomMpiQuESTEnv) and + * therefore that they have already initialised MPI, and they will call MPI_Finalize at the + * appropriate time. + * + * The user-provided MPI communicator undergoes the same validation procedure as any that QuEST + * would use, and so must contain a power-of-2 number of processes. + * + * This function is only compiled and exposed when macro QUEST_COMPILE_SUBCOMM is 1, as is + * defined when providing CMake option QUEST_ENABLE_SUBCOMM during building. + * + * @author Oliver Brown + */ +void initCustomMpiCommQuESTEnv(MPI_Comm questComm, int useGpuAccel, int useMultithread); + +#endif // QUEST_COMPILE_SUBCOMM + + +// end de-mangler +#ifdef __cplusplus +} +#endif + +#endif // EXPERIMENTAL_H + +/** @} */ // (end file-wide doxygen defgroup) diff --git a/quest/include/quest.h b/quest/include/quest.h index 16f8e9b49..da1c778e2 100644 --- a/quest/include/quest.h +++ b/quest/include/quest.h @@ -38,6 +38,7 @@ #include "quest/include/debug.h" #include "quest/include/decoherence.h" #include "quest/include/environment.h" +#include "quest/include/experimental.h" #include "quest/include/trotterisation.h" #include "quest/include/initialisations.h" #include "quest/include/channels.h" @@ -45,7 +46,6 @@ #include "quest/include/operations.h" #include "quest/include/paulis.h" #include "quest/include/qureg.h" -#include "quest/include/subcommunicator.h" #include "quest/include/matrices.h" #include "quest/include/wrappers.h" diff --git a/quest/include/subcommunicator.h b/quest/include/subcommunicator.h deleted file mode 100644 index 8854404d6..000000000 --- a/quest/include/subcommunicator.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef SUBCOMMUNICATOR_H -#define SUBCOMMUNICATOR_H - -#include "quest/include/config.h" - -#if QUEST_COMPILE_MPI && QUEST_COMPILE_SUBCOMM - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/** @notyetdoced - * Advanced initialiser which allows the user to provide an MPI communicator for QuEST to use. - * Use of this initialiser implies userOwnsMpi = true, (exposed by initCustomMpiQuESTEnv) and - * therefore that they have already initialised MPI, and they will call MPI_Finalize at the - * appropriate time. - * - * The user-provided MPI communicator undergoes the same validation procedure as any that QuEST - * would use, and so must contain a power-of-2 number of processes. - */ -void initCustomMpiCommQuESTEnv(MPI_Comm questComm, int useGpuAccel, int useMultithread); - -#ifdef __cplusplus -} -#endif - -#endif - -#endif diff --git a/quest/src/api/CMakeLists.txt b/quest/src/api/CMakeLists.txt index 43b61df7d..7f90dcf17 100644 --- a/quest/src/api/CMakeLists.txt +++ b/quest/src/api/CMakeLists.txt @@ -5,6 +5,7 @@ target_sources(QuEST debug.cpp decoherence.cpp environment.cpp + experimental.cpp initialisations.cpp matrices.cpp modes.cpp @@ -12,7 +13,6 @@ target_sources(QuEST operations.cpp paulis.cpp qureg.cpp - subcommunicator.cpp trotterisation.cpp types.cpp ) diff --git a/quest/src/api/channels.cpp b/quest/src/api/channels.cpp index 450d04cbd..c6702438a 100644 --- a/quest/src/api/channels.cpp +++ b/quest/src/api/channels.cpp @@ -107,7 +107,7 @@ void freeAllMemoryIfAnyAllocsFailed(T& obj) { // determine whether any node experienced a failure bool anyFail = didAnyLocalAllocsFail(obj); - if (comm_isInit()) + if (comm_isActive()) anyFail = comm_isTrueOnAllNodes(anyFail); // if so, free all memory before subsequent validation diff --git a/quest/src/api/environment.cpp b/quest/src/api/environment.cpp index b0eb38c77..abf3127e8 100644 --- a/quest/src/api/environment.cpp +++ b/quest/src/api/environment.cpp @@ -48,7 +48,7 @@ using std::string; */ -static QuESTEnv* globalEnvPtr = nullptr; +static QuESTEnv* global_envPtr = nullptr; @@ -62,7 +62,7 @@ static QuESTEnv* globalEnvPtr = nullptr; */ -static bool hasEnvBeenFinalized = false; +static bool global_hasEnvBeenFinalized = false; @@ -74,9 +74,12 @@ static bool hasEnvBeenFinalized = false; void validateAndInitCustomQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread, const char* caller) { // ensure that we are never re-initialising QuEST (even after finalize) because - // this leads to undefined behaviour in distributed mode, as per the MPI - validate_envNeverInit(globalEnvPtr != nullptr, hasEnvBeenFinalized, caller); + // this leads to undefined behaviour in distributed mode, as per the MPI std, + // regardless of whether the user owns MPI + validate_envNeverInit(global_envPtr != nullptr, global_hasEnvBeenFinalized, caller); + // load env-vars before validating deployment mode, because some env vars can + // affect validation (such as QUEST_PERMIT_NODES_TO_SHARE_GPU) envvars_validateAndLoadEnvVars(caller); validateconfig_setEpsilonToDefault(); @@ -86,14 +89,19 @@ void validateAndInitCustomQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuA // by mpirun believe they are each the main rank. This seems unavoidable. validate_newEnvDeploymentMode(useDistrib, useGpuAccel, useMultithread, caller); - // overwrite deployments left as modeflag::USE_AUTO + // overwrite deployments (left as modeflag::USE_AUTO=-1) with 0,1 (a bool), + // which crucially, resolves useDistrib, permitting its consultation below autodep_chooseQuESTEnvDeployment(useDistrib, useGpuAccel, useMultithread); + // ensure that current state of MPI is valid + validate_mpiInitStatus(useDistrib, userOwnsMpi, caller); + // optionally initialise MPI; necessary before completing validation, // and before any GPU initialisation and validation, since we will // perform that specifically upon the MPI-process-bound GPU(s). Further, // we can make sure validation errors are reported only by the root node. - comm_init(useDistrib, userOwnsMpi); + if (useDistrib) + comm_init(userOwnsMpi); validate_newEnvDistributedBetweenPower2Nodes(caller); @@ -138,31 +146,26 @@ void validateAndInitCustomQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuA rand_setSeedsToDefault(); // allocate space for the global QuESTEnv singleton (overwriting nullptr, unless malloc fails) - globalEnvPtr = (QuESTEnv*) malloc(sizeof(QuESTEnv)); + global_envPtr = (QuESTEnv*) malloc(sizeof(QuESTEnv)); // pedantically check that teeny tiny malloc just succeeded - if (globalEnvPtr == nullptr) + if (global_envPtr == nullptr) error_allocOfQuESTEnvFailed(); - // bind deployment info to global instance - globalEnvPtr->isMultithreaded = useMultithread; - globalEnvPtr->isGpuAccelerated = useGpuAccel; - globalEnvPtr->isDistributed = useDistrib; - globalEnvPtr->userOwnsMpi = userOwnsMpi; - globalEnvPtr->isCuQuantumEnabled = useCuQuantum; - globalEnvPtr->isGpuSharingEnabled = permitGpuSharing; - globalEnvPtr->isMpiGpuAware = isMpiGpuAware; + // bind deployment info to global instance (autocasting int to bool) + global_envPtr->isMultithreaded = useMultithread; + global_envPtr->isGpuAccelerated = useGpuAccel; + global_envPtr->isDistributed = useDistrib; + global_envPtr->isMpiUserOwned = userOwnsMpi; + global_envPtr->isMpiGpuAware = isMpiGpuAware; + global_envPtr->isCuQuantumEnabled = useCuQuantum; + global_envPtr->isGpuSharingEnabled = permitGpuSharing; // bind distributed info - globalEnvPtr->rank = (useDistrib)? comm_getRank() : 0; - globalEnvPtr->numNodes = (useDistrib)? comm_getNumNodes() : 1; + global_envPtr->rank = (useDistrib)? comm_getRank() : 0; + global_envPtr->numNodes = (useDistrib)? comm_getNumNodes() : 1; } -void updateQuESTEnvDistInfo() { - globalEnvPtr->rank = (globalEnvPtr->isDistributed)? comm_getRank() : 0; - globalEnvPtr->numNodes = (globalEnvPtr->isDistributed)? comm_getNumNodes() : 1; - return; -} /* @@ -197,11 +200,11 @@ void printCompilationInfo() { print_table( "compilation", { - {"isMpiCompiled", comm_isMpiCompiled()}, - {"isMpiSubCommunicatorCompiled", comm_isMpiSubCommunicatorCompiled()}, - {"isGpuCompiled", gpu_isGpuCompiled()}, - {"isOmpCompiled", cpu_isOpenmpCompiled()}, - {"isCuQuantumCompiled", gpu_isCuQuantumCompiled()}, + {"isMpiCompiled", comm_isMpiCompiled()}, + {"isMpiSubCommCompiled", comm_isMpiSubCommCompiled()}, + {"isGpuCompiled", gpu_isGpuCompiled()}, + {"isOmpCompiled", cpu_isOpenmpCompiled()}, + {"isCuQuantumCompiled", gpu_isCuQuantumCompiled()}, }); } @@ -210,13 +213,10 @@ void printDeploymentInfo() { print_table( "deployment", { - {"isMpiEnabled", globalEnvPtr->isDistributed}, - {"doesUserOwnMpi", globalEnvPtr->userOwnsMpi}, - {"isGpuEnabled", globalEnvPtr->isGpuAccelerated}, - {"isOmpEnabled", globalEnvPtr->isMultithreaded}, - {"isCuQuantumEnabled", globalEnvPtr->isCuQuantumEnabled}, - {"isGpuSharingEnabled", globalEnvPtr->isGpuSharingEnabled}, - {"isMpiGpuAware", globalEnvPtr->isMpiGpuAware}, + {"isMpiEnabled", global_envPtr->isDistributed}, + {"isGpuEnabled", global_envPtr->isGpuAccelerated}, + {"isOmpEnabled", global_envPtr->isMultithreaded}, + {"isCuQuantumEnabled", global_envPtr->isCuQuantumEnabled}, }); } @@ -273,10 +273,16 @@ void printDistributionInfo() { using namespace printer_substrings; + bool comm = global_envPtr->isDistributed; + bool gpu = global_envPtr->isGpuAccelerated; + bool both = comm && gpu; + print_table( "distribution", { - {"isMpiGpuAware", comm_isInit()? printer_toStr(globalEnvPtr->isMpiGpuAware) : na}, - {"numMpiNodes", printer_toStr(globalEnvPtr->numNodes)}, + {"isMpiUserOwned", comm? printer_toStr(global_envPtr->isMpiUserOwned) : na}, + {"isMpiGpuAware", comm? printer_toStr(global_envPtr->isMpiGpuAware ) : na}, + {"isGpuSharingEnabled", both? printer_toStr(global_envPtr->isGpuSharingEnabled) : na}, + {"numMpiNodes", printer_toStr(global_envPtr->numNodes)}, }); } @@ -286,7 +292,7 @@ void printQuregSizeLimits(bool isDensMatr) { using namespace printer_substrings; // for brevity - int numNodes = globalEnvPtr->numNodes; + int numNodes = global_envPtr->numNodes; // by default, CPU limits are unknown (because memory query might fail) string maxQbForCpu = un; @@ -298,7 +304,7 @@ void printQuregSizeLimits(bool isDensMatr) { maxQbForCpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, 1, cpuMem)); // and the max MPI sizes are only relevant when env is distributed - if (globalEnvPtr->isDistributed) + if (global_envPtr->isDistributed) maxQbForMpiCpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, numNodes, cpuMem)); // when MPI irrelevant, change their status from "unknown" to "N/A" @@ -313,12 +319,12 @@ void printQuregSizeLimits(bool isDensMatr) { string maxQbForMpiGpu = na; // max GPU registers only relevant if env is GPU-accelerated - if (globalEnvPtr->isGpuAccelerated) { + if (global_envPtr->isGpuAccelerated) { qindex gpuMem = gpu_getCurrentAvailableMemoryInBytes(); maxQbForGpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, 1, gpuMem)); // and the max MPI sizes are further only relevant when env is distributed - if (globalEnvPtr->isDistributed) + if (global_envPtr->isDistributed) maxQbForMpiGpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, numNodes, gpuMem)); } @@ -355,7 +361,7 @@ void printQuregAutoDeployments(bool isDensMatr) { // test to theoretically max #qubits, surpassing max that can fit in RAM and GPUs, because // auto-deploy will still try to deploy there to (then subsequent validation will fail) - int maxQubits = mem_getMaxNumQuregQubitsBeforeGlobalMemSizeofOverflow(isDensMatr, globalEnvPtr->numNodes); + int maxQubits = mem_getMaxNumQuregQubitsBeforeGlobalMemSizeofOverflow(isDensMatr, global_envPtr->numNodes); for (int numQubits=1; numQubitsisGpuAccelerated) + if (global_envPtr->isGpuAccelerated) gpu_clearCache(); // syncs first - if (globalEnvPtr->isGpuAccelerated && gpu_isCuQuantumCompiled()) + if (global_envPtr->isGpuAccelerated && gpu_isCuQuantumCompiled()) gpu_finalizeCuQuantum(); - if (globalEnvPtr->isDistributed) { + if (global_envPtr->isDistributed) { comm_sync(); - comm_end(globalEnvPtr->userOwnsMpi); + comm_end(); } // free global env's heap memory and flag it as unallocated - free(globalEnvPtr); - globalEnvPtr = nullptr; + free(global_envPtr); + global_envPtr = nullptr; // flag that the environment was finalised, to ensure it is never re-initialised - hasEnvBeenFinalized = true; + global_hasEnvBeenFinalized = true; } void syncQuESTEnv() { validate_envIsInit(__func__); - if (globalEnvPtr->isGpuAccelerated) + if (global_envPtr->isGpuAccelerated) gpu_sync(); - if (globalEnvPtr->isDistributed) { + if (global_envPtr->isDistributed) comm_sync(); - #if QUEST_COMPILE_SUBCOMM - updateQuESTEnvDistInfo(); - #endif - } } @@ -519,19 +517,17 @@ void reportQuESTEnv() { void getQuESTEnvironmentString(char str[200]) { validate_envIsInit(__func__); - QuESTEnv env = getQuESTEnv(); - int numThreads = cpu_isOpenmpCompiled()? cpu_getAvailableNumThreads() : 1; - int cuQuantum = env.isGpuAccelerated && gpu_isCuQuantumCompiled(); - int gpuDirect = env.isGpuAccelerated && gpu_isDirectGpuCommPossible(); + int cuQuantum = global_envPtr->isGpuAccelerated && gpu_isCuQuantumCompiled(); + int gpuDirect = global_envPtr->isGpuAccelerated && gpu_isDirectGpuCommPossible(); snprintf(str, 200, "CUDA=%d OpenMP=%d MPI=%d userOwnsMPI=%d threads=%d ranks=%d cuQuantum=%d gpuDirect=%d", - env.isGpuAccelerated, - env.isMultithreaded, - env.isDistributed, - env.userOwnsMpi, + global_envPtr->isGpuAccelerated, + global_envPtr->isMultithreaded, + global_envPtr->isDistributed, + global_envPtr->isMpiUserOwned, numThreads, - env.numNodes, + global_envPtr->numNodes, cuQuantum, gpuDirect); } diff --git a/quest/src/api/experimental.cpp b/quest/src/api/experimental.cpp new file mode 100644 index 000000000..1ad6fdb42 --- /dev/null +++ b/quest/src/api/experimental.cpp @@ -0,0 +1,89 @@ +/** @file + * Experimental functions which are liable to + * API breaks within QuEST minor version releases. + * Some optional functions require compiling this + * file against MPI, despite being outside of /comm/, + * and so require opt-in macros (QUEST_COMPILE_SUBCOMM) + * + * @author Oliver Brown + */ + +#include "quest/include/config.h" +#include "quest/include/environment.h" + +#include "quest/src/core/validation.hpp" +#include "quest/src/comm/comm_config.hpp" + +#if QUEST_COMPILE_SUBCOMM && ! QUEST_COMPILE_MPI + #error "Macro QUEST_COMPILE_SUBCOMM was true, but QUEST_COMPILE_MPI was illegally false." +#endif + +#if QUEST_COMPILE_SUBCOMM + #include +#endif + + + +/* + * EXTERNAL FUNCTIONS + * + * which we here regretfully 'extern' because we are either + * unsure which header should expose them, or because they + * contain deployment-specific types (like MPI_Comm) which + * we do not wish to expose within internal headers + */ + + +extern void validateAndInitCustomQuESTEnv( + int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread, const char* caller); + + +#if QUEST_COMPILE_SUBCOMM // hide MPI_Comm + extern bool comm_setMpiComm(MPI_Comm newComm, bool userOwnsMpi); +#endif + + + +/* + * API FUNCTIONS + */ + + +// enable invocation by both C and C++ binaries +extern "C" { + + +void initCustomMpiQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread) { + validateAndInitCustomQuESTEnv(useDistrib, userOwnsMpi, useGpuAccel, useMultithread, __func__); +} + + +#if QUEST_COMPILE_SUBCOMM // hide MPI_Comm + +void initCustomMpiCommQuESTEnv(MPI_Comm userQuestComm, int useGpuAccel, int useMultithread) { + + // useDistrib and userOwnsMpi are implied by the user of this initialiser + const int useDistrib = 1; + const bool userOwnsMpi = true; + + // pre-validate that we are able to set the MPI communicator + validate_mpiInitStatus(useDistrib, userOwnsMpi, __func__); + validate_mpiSubCommIsNonNull(userQuestComm != MPI_COMM_NULL, __func__); + + // avoid re-setting the MPI comm (to avoid an internal error), which happens + // if a user illegally re-calls this function, which will be subsequently + // caught by the validation in validateAndInitCustomQuESTEnv() below + if (!comm_isActive()) { + bool success = comm_setMpiComm(userQuestComm, userOwnsMpi); + validate_mpiSubCommSetSucceeded(success, __func__); + } + + // perform remaining validation (some is harmlessly repeated) and init QuEST env + validateAndInitCustomQuESTEnv(useDistrib, userOwnsMpi, useGpuAccel, useMultithread, __func__); +} + +#endif // QUEST_COMPILE_SUBCOMM + + +// end de-mangler +} diff --git a/quest/src/api/matrices.cpp b/quest/src/api/matrices.cpp index c7e963359..07e37025c 100644 --- a/quest/src/api/matrices.cpp +++ b/quest/src/api/matrices.cpp @@ -165,7 +165,7 @@ void freeAllMemoryIfAnyAllocsFailed(T matr) { // ascertain whether any allocs failed on any node bool anyFail = didAnyLocalAllocsFail(matr); - if (comm_isInit()) + if (comm_isActive()) anyFail = comm_isTrueOnAllNodes(anyFail); // if so, free all heap fields diff --git a/quest/src/api/paulis.cpp b/quest/src/api/paulis.cpp index a996f83c0..d5ac4d8e7 100644 --- a/quest/src/api/paulis.cpp +++ b/quest/src/api/paulis.cpp @@ -38,7 +38,7 @@ bool didAnyAllocsFailOnAnyNode(PauliStrSum sum) { ! mem_isAllocated(sum.coeffs) || ! mem_isAllocated(sum.isApproxHermitian) ); - if (comm_isInit()) + if (comm_isActive()) anyFail = comm_isTrueOnAllNodes(anyFail); return anyFail; diff --git a/quest/src/api/qureg.cpp b/quest/src/api/qureg.cpp index 3bad734e4..84bcd2bd0 100644 --- a/quest/src/api/qureg.cpp +++ b/quest/src/api/qureg.cpp @@ -116,7 +116,7 @@ bool didAnyLocalAllocsFail(Qureg qureg) { bool didAnyAllocsFailOnAnyNode(Qureg qureg) { bool anyFail = didAnyLocalAllocsFail(qureg); - if (comm_isInit()) + if (comm_isActive()) anyFail = comm_isTrueOnAllNodes(anyFail); return anyFail; diff --git a/quest/src/api/subcommunicator.cpp b/quest/src/api/subcommunicator.cpp deleted file mode 100644 index e248f0dba..000000000 --- a/quest/src/api/subcommunicator.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include "quest/include/config.h" -#include "quest/include/environment.h" -#include "quest/include/subcommunicator.h" - -#include "quest/src/comm/comm_config.hpp" -#include "quest/src/core/errors.hpp" - -#if QUEST_COMPILE_MPI && QUEST_COMPILE_SUBCOMM - -#include -#include - -void initCustomMpiCommQuESTEnv(MPI_Comm userQuestComm, int useGpuAccel, int useMultithread) { - // useDistrib and userOwnsMpi are implied by the user of this initialiser - const int useDistrib = 1; - const bool userOwnsMpi = true; - - // set mpiCommQuest to user provided communicator - if (comm_isInit()) { - comm_setMpiComm(userQuestComm); - } else { - error_commNotInit(); - } - - // initialise QuEST around that communicator - initCustomMpiQuESTEnv(useDistrib, userOwnsMpi, useGpuAccel, useMultithread); - - return; -} - -#endif diff --git a/quest/src/comm/comm_config.cpp b/quest/src/comm/comm_config.cpp index 67fd53231..5c59477ca 100644 --- a/quest/src/comm/comm_config.cpp +++ b/quest/src/comm/comm_config.cpp @@ -6,8 +6,11 @@ * * Note that even when QUEST_COMPILE_MPI=1, the user may have * disabled distribution when creating the QuEST environment - * at runtime. Ergo we use comm_isInit() to determine whether - * functions should invoke the MPI API. + * at runtime - even despite they themselves initialising and + * using MPI. So we must be careful about consulting MPI status! + * Furthermore, all routines here will only ever consult/affect + * the QuEST communicator, never the entire MPI environment, + * the latter of which may contain non-participating processes. * * @author Tyson Jones */ @@ -22,8 +25,6 @@ #if QUEST_COMPILE_MPI #include - - static MPI_Comm mpiCommQuest = MPI_COMM_NULL; #endif @@ -32,6 +33,7 @@ * WARN ABOUT CUDA-AWARENESS */ + #if QUEST_COMPILE_MPI && QUEST_COMPILE_CUDA // this check is OpenMPI specific @@ -54,9 +56,98 @@ +/* + * COMMUNICATOR MANAGEMENT + * + * QuEST will only ever use the overridable global_mpiComm communicator, + * so that superusers can dedicate external MPI processes to other tasks. + * Beware that it's valid for QuEST to be compiled with MPI, but have + * distribution runtime-disabled, while the user is themselves using + * (and ergo have initialised) MPI. In that scenario, we must not touch + * MPI, hence why comm_isActive() below is distinct from comm_isMpiInit(). + */ + + +// We must record whether the user owns MPI, so that we do not ever attempt +// to kill it when gracefully exiting, or due to a validation error +static bool global_isMpiUserOwned = false; + + +// Guarded since MPI_Comm cannot be exposed when not compiling MPI. This +// communicator is overridden from NULL either BEFORE or DURING comm_init() +#if QUEST_COMPILE_MPI + static MPI_Comm global_mpiComm = MPI_COMM_NULL; +#endif + + +bool comm_isActive() { +#if QUEST_COMPILE_MPI + + // comm_init(), or potentially comm_setMpiComm() before it, will only + // ever override mpiComm with non-NULL, indicating active comm. Note + // it's principally for mpiComm to later return to NULL, via comm_end(), + // and for QuEST execution to continue (though not supported presently). + // if comm_isActive() is true, then it is guaranteed MPI is initialised + return global_mpiComm != MPI_COMM_NULL; + + // note it is legal for QuEST distribution to be disabled (and ergo + // mpiComm never initialised) even when the user is themselves accessing + // MPI, hence this function is semantically distinct from comm_isMpiInit() +#else + + // QuEST communication is obviously never active if + // not even MPI is compiled; though this does not + // imply at all the user isn't themselves using MPI! + return false; + +#endif +} + + +// Hide MPI_Comm from signatures when MPI is not compiled. Beware that +// these are not exposed in comm_config.hpp; callers must 'extern' them! +#if QUEST_COMPILE_MPI + + +MPI_Comm comm_getMpiComm() { + + // illegal to call before communicator has been overridden + if (global_mpiComm == MPI_COMM_NULL) + error_commMpiCommIsNull(); + + return global_mpiComm; +} + + +bool comm_setMpiComm(MPI_Comm newComm, bool userOwnsMpi) { + + // illegal to re-set, or set to null + if (global_mpiComm != MPI_COMM_NULL) + error_commAlreadyHasSetMpiComm(); + if (newComm == MPI_COMM_NULL) + error_commNewMpiCommIsNull(); + + // detect bad communicator, and inform validation + auto status = MPI_Comm_dup(newComm, &global_mpiComm); + if (status != MPI_SUCCESS) + return false; + + // record ownership as soon as QuEST communication becomes active, so + // validation errors during env initialisation never kill user-owned MPI + global_isMpiUserOwned = userOwnsMpi; + return true; +} + + +#endif // QUEST_COMPILE_MPI + + + /* * MPI ENVIRONMENT MANAGEMENT - * all of which is safely callable in non-distributed mode + * + * which queries MPI itself (as may be user-activated), rather + * than QuEST's (possibly more limited) MPI environment */ @@ -64,8 +155,7 @@ bool comm_isMpiCompiled() { return (bool) QUEST_COMPILE_MPI; } - -bool comm_isMpiSubCommunicatorCompiled() { +bool comm_isMpiSubCommCompiled() { return (bool) QUEST_COMPILE_SUBCOMM; } @@ -96,89 +186,93 @@ bool comm_isMpiGpuAware() { } -bool comm_isInit() { +bool comm_isMpiInit() { #if QUEST_COMPILE_MPI // safely callable before MPI initialisation, but NOT after comm_end() int isInit; MPI_Initialized(&isInit); + + // when MPI is not initialised, it is guaranteed that QuEST's communicator + // is inactive, which we double check here so callers can be absolutely sure + if (!isInit && comm_isActive()) + error_commActiveButMpiNotInit(); + return (bool) isInit; #else // obviously MPI is never initialised if not even compiled return false; + #endif } -void comm_init(int useDistrib, bool userOwnsMpi) { + +/* + * QUEST COMMUNICATION MANAGEMENT + * + * which interacts only with QuEST's MPI environment, + * which may be smaller than the user-controlled MPI env + */ + + +void comm_init(bool userOwnsMpi) { #if QUEST_COMPILE_MPI - // error if user owns MPI but has not initialised - if (userOwnsMpi && !comm_isInit()) { + // re-assert prior user-validations for clarity + if (userOwnsMpi && !comm_isMpiInit()) error_commNotInit(); - } + if (!userOwnsMpi && comm_isMpiInit()) + error_commAlreadyInit(); - // Overall mpiCommQuest should be set in the following ways - // however only useDistrib = 1 and userOwnsMpi = false - // and useDistrib = 0 and userOwnsMpi = true - // require action here - // - // | useDistrib | userOwnsMpi | mpiCommQuest | - // | ---------- | ----------- | -------------- | - // | 0 | false | MPI_COMM_NULL | - // | ---------- | ----------- | -------------- | - // | 1 | false | MPI_COMM_WORLD | - // | ---------- | ----------- | -------------- | - // | 0 | true | MPI_COMM_SELF | - // | ---------- | ----------- | -------------- | - // | | | MPI_COMM_WORLD | - // | 1 | true | or | - // | | | userQuestComm | - // | ---------- | ----------- | -------------- | - + // init MPI only when it's not the user's responsibility + if (!userOwnsMpi) + MPI_Init(NULL, NULL); - if (useDistrib && !userOwnsMpi) { - // error if attempting re-initialisation - if (comm_isInit()) { - error_commAlreadyInit(); - } else { - MPI_Init(NULL, NULL); - // The user wants MPI and is leaving it to QuEST - MPI_Comm_dup(MPI_COMM_WORLD, &mpiCommQuest); - } - } else if (!useDistrib && userOwnsMpi) { - // The user has initialised MPI but wants QuEST to ignore it - MPI_Comm_dup(MPI_COMM_SELF, &mpiCommQuest); - } else if (useDistrib && userOwnsMpi) { - // if mpiCommQuEST is still MPI_COMM_NULL the user is not - // providing their own MPI_Comm and we should set mpiCommQuest - // to MPI_COMM_WORLD - if (mpiCommQuest == MPI_COMM_NULL) - MPI_Comm_dup(MPI_COMM_WORLD, &mpiCommQuest); - } + // choose communicator only when the user hasn't already + // (via comm_setMpiComm, during custom env initialisation) + if (global_mpiComm == MPI_COMM_NULL) + comm_setMpiComm(MPI_COMM_WORLD, userOwnsMpi); #endif - return; } - -void comm_end(bool userOwnsMpi) { +void comm_end() { #if QUEST_COMPILE_MPI - // gracefully permit comm_end() before comm_init(), as input validation can trigger - if (!comm_isInit()) + // If QuEST isn't using distribution, regardless of whether the user is using MPI, + // then we gracefully exit. We do NOT attempt to end MPI on the user's behalf (as we + // may be tempted to do during validation failure to avoid their MPI-crash), because + // it's possible/legal that not all processes are participating in this comm_end() + // call, in which case so MPI_Finalize() could just cause a hang. + if (!comm_isActive()) return; - MPI_Barrier(mpiCommQuest); - MPI_Comm_free(&mpiCommQuest); + // Syncing is not strictly necessary, but it ensures that finalizeQuESTEnv() never + // completes on one process while another process is still performing simulation + // (though that'd be weird), and so may avoid a silly user benchmarking pitfall + MPI_Barrier(global_mpiComm); + MPI_Comm_free(&global_mpiComm); - // QuEST must finalise MPI if the user does not own it - if (!userOwnsMpi) + // Do NOT close MPI if the user owns; they may still wish to use it after QuEST! + if (!global_isMpiUserOwned) MPI_Finalize(); + // Presently, comm_end() is only ever called during QuESTEnv destruction (either + // deliberately, or because of failed validation during QuESTEnv initialisation). + // This means any comm_*() call hereafter is invalid/illegal and will be prevented + // by validation. However, we can imagine a future where distribution gets runtime + // disabled while QuEST execution continues (e.g. initQuESTEnv automatically + // disabled distribution), and so we must indicate that communication is no longer + // active by overwriting comm to NULL. BEWARE that this is "hacky"; we have + // updated mpiComm here without MPI_Comm_dup(), but that's fine, because hereafter + // MPI will never be used again (illegal to re-init both MPI, and QuEST!) + global_mpiComm = MPI_COMM_NULL; + global_isMpiUserOwned = false; + #endif } @@ -187,19 +281,22 @@ int comm_getRank() { #if QUEST_COMPILE_MPI // if distribution was not runtime enabled (or a validation error was - // triggered), every node (if many MPI processes were launched) - // believes it is the root rank - if (!comm_isInit()) + // triggered during distributed initialisation), every process believes + // it is the root rank; this may lead to unavoidable error msg spam! + if (!comm_isActive()) return ROOT_RANK; + // obtain the process rank within the QuEST communicator, which can + // differ from the global MPI process rank when users own MPI int rank; - MPI_Comm_rank(mpiCommQuest, &rank); + MPI_Comm_rank(global_mpiComm, &rank); return rank; #else // if MPI isn't compiled, we're definitely non-distributed; return main rank return ROOT_RANK; + #endif } @@ -216,19 +313,25 @@ int comm_getNumNodes() { #if QUEST_COMPILE_MPI // if distribution was not runtime enabled (or a validation error was - // triggered), every node (if many MPI processes were launched) - // believes it is the one and only node - if (!comm_isInit()) + // triggered during distributed initialisation), every process is told + // it is the one and only node; this may lead to error msg spam, but + // appears unavoidable! + if (!comm_isActive()) return 1; + // obtain the number of processes within the QuEST communicator, which + // can be smaller than global MPI process count when users own MPI int numNodes; - MPI_Comm_size(mpiCommQuest, &numNodes); + MPI_Comm_size(global_mpiComm, &numNodes); return numNodes; #else - // if MPI isn't compiled, we're definitely non-distributed; return single node + // if MPI isn't compiled, QuEST is definitely non-distributed and + // each process only knows itself (though users may own MPI and + // actually have many processes; that's none of our business!) return 1; + #endif } @@ -236,35 +339,13 @@ int comm_getNumNodes() { void comm_sync() { #if QUEST_COMPILE_MPI - // gracefully handle when not distributed, needed by e.g. pre-MPI-setup validation - if (!comm_isInit()) + // gracefully handle when not distributed, needed by e.g. pre-MPI-setup validation + if (!comm_isActive()) return; - MPI_Barrier(mpiCommQuest); -#endif -} + MPI_Barrier(global_mpiComm); -#if QUEST_COMPILE_MPI - MPI_Comm comm_getMpiComm() { - return mpiCommQuest; - } - - #if QUEST_COMPILE_SUBCOMM - void comm_setMpiComm(MPI_Comm newComm) { - - // error if mpiCommQuEST is already set! - if (mpiCommQuest != MPI_COMM_NULL) { - MPI_Barrier(mpiCommQuest); - MPI_Comm_free(&mpiCommQuest); - error_commDoubleSetMpiComm(); - } - - int mpi_err = MPI_Comm_dup(newComm, &mpiCommQuest); - if (mpi_err != MPI_SUCCESS) { - error_commInvalidMpiComm(); - } - - return; - } - #endif #endif + + // do nothing at all when MPI is not compiled (user owned MPI processes go unsynced) +} diff --git a/quest/src/comm/comm_config.hpp b/quest/src/comm/comm_config.hpp index b2d038cd5..826ebdf1c 100644 --- a/quest/src/comm/comm_config.hpp +++ b/quest/src/comm/comm_config.hpp @@ -10,34 +10,28 @@ #ifndef COMM_CONFIG_HPP #define COMM_CONFIG_HPP -#include "quest/include/config.h" - -#if QUEST_COMPILE_MPI - #include -#endif - constexpr int ROOT_RANK = 0; +// queries of MPI's global/general status (when visible) bool comm_isMpiCompiled(); -bool comm_isMpiSubCommunicatorCompiled(); +bool comm_isMpiSubCommCompiled(); bool comm_isMpiGpuAware(); +bool comm_isMpiInit(); -void comm_init(int useDistrib, bool userOwnsMpi); -void comm_end(bool userOwnsMpi); +// control of QuEST's (possibly more limited) MPI env +bool comm_isActive(); +void comm_init(bool userOwnsMpi); +void comm_end(); void comm_sync(); +// queries of QuEST's (possibly more limited) MPI env int comm_getRank(); int comm_getNumNodes(); - -bool comm_isInit(); bool comm_isRootNode(); bool comm_isRootNode(int rank); -#if QUEST_COMPILE_MPI - MPI_Comm comm_getMpiComm(); - #if QUEST_COMPILE_SUBCOMM - void comm_setMpiComm(MPI_Comm newComm); - #endif -#endif +// Signatures containing MPI types which callers must extern: +// extern MPI_Comm comm_getMpiComm() +// extern bool comm_setMpiComm(MPI_Comm newComm, bool userOwnsMpi) #endif // COMM_CONFIG_HPP diff --git a/quest/src/comm/comm_routines.cpp b/quest/src/comm/comm_routines.cpp index 0bc90563b..cf6956454 100644 --- a/quest/src/comm/comm_routines.cpp +++ b/quest/src/comm/comm_routines.cpp @@ -6,7 +6,7 @@ * * @author Tyson Jones * @author Jakub Adamski (sped-up large comm by asynch messages) - * @author Oliver Brown (patched max-message inference, consulted on AR and MPICH support) + * @author Oliver Brown (added custom communicators, patched max-message inference, consulted on AR and MPICH support) * @author Ania (Anna) Brown (developed QuEST v1 logic) */ @@ -24,6 +24,7 @@ #if QUEST_COMPILE_MPI #include + extern MPI_Comm comm_getMpiComm(); // comm_config.cpp does not leak MPI_Comm #endif #include @@ -149,8 +150,7 @@ int getMaxNumMessages() { // messages. Beware the max is obtained via a void pointer and might be unset... void* tagUpperBoundPtr; int isAttribSet; - MPI_Comm mpiCommQuest = comm_getMpiComm(); - MPI_Comm_get_attr(mpiCommQuest, MPI_TAG_UB, &tagUpperBoundPtr, &isAttribSet); + MPI_Comm_get_attr(comm_getMpiComm(), MPI_TAG_UB, &tagUpperBoundPtr, &isAttribSet); // if something went wrong with obtaining the tag bound, return the safe minimum if (!isAttribSet) @@ -217,7 +217,7 @@ std::array dividePayloadIntoMessages(qindex numAmps) { void exchangeArrays(qcomp* send, qcomp* recv, qindex numElems, int pairRank) { #if QUEST_COMPILE_MPI - MPI_Comm mpiCommQuest = comm_getMpiComm(); + MPI_Comm mpiComm = comm_getMpiComm(); // each message is asynchronously dispatched with a final wait, as per arxiv.org/abs/2308.07402 @@ -229,8 +229,8 @@ void exchangeArrays(qcomp* send, qcomp* recv, qindex numElems, int pairRank) { // so that messages are permitted to arrive out-of-order (supporting UCX adaptive-routing) for (qindex m=0; m(m); // gauranteed int, but m*messageSize needs qindex - MPI_Irecv(&recv[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiCommQuest, &requests[2*m]); - MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiCommQuest, &requests[2*m+1]); + MPI_Irecv(&recv[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiComm, &requests[2*m]); + MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiComm, &requests[2*m+1]); } // wait for all exchanges to complete (MPI will automatically free the request memory) @@ -251,7 +251,7 @@ void exchangeArrays(qcomp* send, qcomp* recv, qindex numElems, int pairRank) { void asynchSendArray(qcomp* send, qindex numElems, int pairRank) { #if QUEST_COMPILE_MPI - MPI_Comm mpiCommQuest = comm_getMpiComm(); + MPI_Comm mpiComm = comm_getMpiComm(); // we will not track nor wait for the asynch send; instead, the caller will later comm_sync() MPI_Request nullReq = MPI_REQUEST_NULL; @@ -262,7 +262,7 @@ void asynchSendArray(qcomp* send, qindex numElems, int pairRank) { // asynchronously send the uniquely-tagged messages for (qindex m=0; m(m); // gauranteed int, but m*messageSize needs qindex - MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiCommQuest, &nullReq); + MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiComm, &nullReq); } #else @@ -274,7 +274,7 @@ void asynchSendArray(qcomp* send, qindex numElems, int pairRank) { void receiveArray(qcomp* dest, qindex numElems, int pairRank) { #if QUEST_COMPILE_MPI - MPI_Comm mpiCommQuest = comm_getMpiComm(); + MPI_Comm mpiComm = comm_getMpiComm(); // expect the data in multiple messages auto [messageSize, numMessages] = dividePow2PayloadIntoMessages(numElems); @@ -285,7 +285,7 @@ void receiveArray(qcomp* dest, qindex numElems, int pairRank) { // listen to receive each uniquely-tagged message asynchronously (as per arxiv.org/abs/2308.07402) for (qindex m=0; m(m); // gauranteed int, but m*messageSize needs qindex - MPI_Irecv(&dest[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiCommQuest, &requests[m]); + MPI_Irecv(&dest[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiComm, &requests[m]); } // receivers wait for all messages to be received (while sender asynch proceeds) @@ -310,8 +310,7 @@ void globallyCombineNonUniformSubArrays( ) { #if QUEST_COMPILE_MPI - MPI_Comm mpiCommQuest = comm_getMpiComm(); - + auto mpiComm = comm_getMpiComm(); int myRank = comm_getRank(); int numNodes = comm_getNumNodes(); @@ -345,14 +344,14 @@ void globallyCombineNonUniformSubArrays( for (int m=0; m 0) { qindex recvInd = globalRecvIndPerRank[sendRank] + (numBigMsgs * bigMsgSize); requests.push_back(MPI_REQUEST_NULL); - MPI_Ibcast(&recv[recvInd], remMsgSize, MPI_QCOMP, sendRank, mpiCommQuest, &requests.back()); + MPI_Ibcast(&recv[recvInd], remMsgSize, MPI_QCOMP, sendRank, mpiComm, &requests.back()); } } @@ -648,9 +647,7 @@ void comm_exchangeAmpsToBuffers(Qureg qureg, int pairRank) { void comm_broadcastAmp(int sendRank, qcomp* sendAmp) { #if QUEST_COMPILE_MPI - MPI_Comm mpiCommQuest = comm_getMpiComm(); - - MPI_Bcast(sendAmp, 1, MPI_QCOMP, sendRank, mpiCommQuest); + MPI_Bcast(sendAmp, 1, MPI_QCOMP, sendRank, comm_getMpiComm()); #else error_commButEnvNotDistributed(); @@ -661,7 +658,7 @@ void comm_broadcastAmp(int sendRank, qcomp* sendAmp) { void comm_sendAmpsToRoot(int sendRank, qcomp* send, qcomp* recv, qindex numAmps) { #if QUEST_COMPILE_MPI - MPI_Comm mpiCommQuest = comm_getMpiComm(); + MPI_Comm mpiComm = comm_getMpiComm(); // only the sender and root nodes need to continue int recvRank = ROOT_RANK; @@ -678,8 +675,8 @@ void comm_sendAmpsToRoot(int sendRank, qcomp* send, qcomp* recv, qindex numAmps) for (qindex m=0; m(m); (myRank == sendRank)? - MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, recvRank, tag, mpiCommQuest, &requests[m]): // sender - MPI_Irecv(&recv[m*messageSize], messageSize, MPI_QCOMP, sendRank, tag, mpiCommQuest, &requests[m]); // root + MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, recvRank, tag, mpiComm, &requests[m]): // sender + MPI_Irecv(&recv[m*messageSize], messageSize, MPI_QCOMP, sendRank, tag, mpiComm, &requests[m]); // root } // wait for all exchanges to complete (MPI will automatically free the request memory) @@ -692,13 +689,10 @@ void comm_sendAmpsToRoot(int sendRank, qcomp* send, qcomp* recv, qindex numAmps) void comm_broadcastIntsFromRoot(int* arr, qindex length) { - #if QUEST_COMPILE_MPI - MPI_Comm mpiCommQuest = comm_getMpiComm(); - int sendRank = ROOT_RANK; - MPI_Bcast(arr, length, MPI_INT, sendRank, mpiCommQuest); + MPI_Bcast(arr, length, MPI_INT, sendRank, comm_getMpiComm()); #else error_commButEnvNotDistributed(); @@ -709,10 +703,8 @@ void comm_broadcastIntsFromRoot(int* arr, qindex length) { void comm_broadcastUnsignedsFromRoot(unsigned* arr, qindex length) { #if QUEST_COMPILE_MPI - MPI_Comm mpiCommQuest = comm_getMpiComm(); - int sendRank = ROOT_RANK; - MPI_Bcast(arr, length, MPI_UNSIGNED, sendRank, mpiCommQuest); + MPI_Bcast(arr, length, MPI_UNSIGNED, sendRank, comm_getMpiComm()); #else error_commButEnvNotDistributed(); @@ -739,9 +731,7 @@ void comm_combineSubArrays(qcomp* recv, vector recvInds, vector void comm_reduceAmp(qcomp* localAmp) { #if QUEST_COMPILE_MPI - MPI_Comm mpiCommQuest = comm_getMpiComm(); - - MPI_Allreduce(MPI_IN_PLACE, localAmp, 1, MPI_QCOMP, MPI_SUM, mpiCommQuest); + MPI_Allreduce(MPI_IN_PLACE, localAmp, 1, MPI_QCOMP, MPI_SUM, comm_getMpiComm()); #else error_commButEnvNotDistributed(); @@ -752,9 +742,7 @@ void comm_reduceAmp(qcomp* localAmp) { void comm_reduceReal(qreal* localReal) { #if QUEST_COMPILE_MPI - MPI_Comm mpiCommQuest = comm_getMpiComm(); - - MPI_Allreduce(MPI_IN_PLACE, localReal, 1, MPI_QREAL, MPI_SUM, mpiCommQuest); + MPI_Allreduce(MPI_IN_PLACE, localReal, 1, MPI_QREAL, MPI_SUM, comm_getMpiComm()); #else error_commButEnvNotDistributed(); @@ -765,9 +753,7 @@ void comm_reduceReal(qreal* localReal) { void comm_reduceReals(qreal* localReals, qindex numLocalReals) { #if QUEST_COMPILE_MPI - MPI_Comm mpiCommQuest = comm_getMpiComm(); - - MPI_Allreduce(MPI_IN_PLACE, localReals, numLocalReals, MPI_QREAL, MPI_SUM, mpiCommQuest); + MPI_Allreduce(MPI_IN_PLACE, localReals, numLocalReals, MPI_QREAL, MPI_SUM, comm_getMpiComm()); #else error_commButEnvNotDistributed(); @@ -778,12 +764,10 @@ void comm_reduceReals(qreal* localReals, qindex numLocalReals) { bool comm_isTrueOnAllNodes(bool val) { #if QUEST_COMPILE_MPI - MPI_Comm mpiCommQuest = comm_getMpiComm(); - // perform global AND and broadcast result back to all nodes int local = (int) val; int global; - MPI_Allreduce(&local, &global, 1, MPI_INT, MPI_LAND, mpiCommQuest); + MPI_Allreduce(&local, &global, 1, MPI_INT, MPI_LAND, comm_getMpiComm()); return (bool) global; #else @@ -819,8 +803,6 @@ bool comm_isTrueOnRootNode(bool val) { vector comm_gatherStringsToRoot(char* localChars, int maxNumLocalChars) { #if QUEST_COMPILE_MPI - MPI_Comm mpiCommQuest = comm_getMpiComm(); - // no need to validate array sizes and memory alloc successes; // these are trivial O(#nodes)-size arrays containing <20 chars int numNodes = comm_getNumNodes(); @@ -831,7 +813,7 @@ vector comm_gatherStringsToRoot(char* localChars, int maxNumLocalChars) // all nodes send root all their local chars int recvRank = ROOT_RANK; MPI_Gather(localChars, maxNumLocalChars, MPI_CHAR, allChars.data(), - maxNumLocalChars, MPI_CHAR, recvRank, mpiCommQuest); + maxNumLocalChars, MPI_CHAR, recvRank, comm_getMpiComm()); // divide allChars into stings, delimited by each node's terminal char vector out(numNodes); diff --git a/quest/src/core/errors.cpp b/quest/src/core/errors.cpp index 8e61a2d1c..8879fc7a1 100644 --- a/quest/src/core/errors.cpp +++ b/quest/src/core/errors.cpp @@ -160,11 +160,6 @@ void error_commAlreadyInit() { raiseInternalError("The MPI communication environment was attemptedly re-initialised despite the QuEST environment already existing."); } -void error_commInvalidMpiComm() { - - raiseInternalError("The supplied MPI communicator was MPI_COMM_NULL, or duplication failed."); -} - void error_commButEnvNotDistributed() { raiseInternalError("A function attempted to invoke communication despite QuEST being compiled in non-distributed mode."); @@ -190,9 +185,24 @@ void error_commNumMessagesExceedTagMax() { raiseInternalError("A function attempted to communicate via more messages than permitted (since there would be more uniquely-tagged messages than the tag upperbound)."); } -void error_commDoubleSetMpiComm() { +void error_commAlreadyHasSetMpiComm() { - raiseInternalError("An attempt was made to set mpiCommQuest after it had already been set, as indicated by mpiCommQuest != MPI_COMM_NULL."); + raiseInternalError("An attempt was made to set the QuEST MPI communicator after it had already been set (and changed from MPI_COMM_NULL)."); +} + +void error_commMpiCommIsNull() { + + raiseInternalError("The MPI communicator was queried but was unexpectedly MPI_COMM_NULL."); +} + +void error_commNewMpiCommIsNull() { + + raiseInternalError("The MPI communicator was attemptedly set to MPI_COMM_NULL, which validation should have prior caught."); +} + +void error_commActiveButMpiNotInit() { + + raiseInternalError("QuEST believed communication was active, but MPI_Init reported MPI was not initialised."); } void assert_commBoundsAreValid(Qureg qureg, qindex sendInd, qindex recvInd, qindex numAmps) { diff --git a/quest/src/core/errors.hpp b/quest/src/core/errors.hpp index f276c06ad..33cc182c7 100644 --- a/quest/src/core/errors.hpp +++ b/quest/src/core/errors.hpp @@ -81,8 +81,6 @@ void error_commNotInit(); void error_commAlreadyInit(); -void error_commInvalidMpiComm(); - void error_commButEnvNotDistributed(); void error_commOutOfBounds(); @@ -93,7 +91,13 @@ void error_commGivenInconsistentNumSubArraysANodes(); void error_commNumMessagesExceedTagMax(); -void error_commDoubleSetMpiComm(); +void error_commAlreadyHasSetMpiComm(); + +void error_commMpiCommIsNull(); + +void error_commNewMpiCommIsNull(); + +void error_commActiveButMpiNotInit(); void assert_commBoundsAreValid(Qureg qureg, qindex sendInd, qindex recvInd, qindex numAmps); diff --git a/quest/src/core/randomiser.cpp b/quest/src/core/randomiser.cpp index 65c6da4eb..7b35a29fc 100644 --- a/quest/src/core/randomiser.cpp +++ b/quest/src/core/randomiser.cpp @@ -66,14 +66,14 @@ void rand_setSeeds(vector seeds) { // all nodes learn root node's #seeds unsigned numRootSeeds = seeds.size(); - if (comm_isInit()) + if (comm_isActive()) comm_broadcastUnsignedsFromRoot(&numRootSeeds, 1); // all nodes ensure they have space to receive root node's seeds seeds.resize(numRootSeeds); // all nodes receive root seeds - if (comm_isInit()) + if (comm_isActive()) comm_broadcastUnsignedsFromRoot(seeds.data(), seeds.size()); // all nodes remember seeds (in case user wishes to later recall them) diff --git a/quest/src/core/validation.cpp b/quest/src/core/validation.cpp index fc6adc58f..c727ad1c5 100644 --- a/quest/src/core/validation.cpp +++ b/quest/src/core/validation.cpp @@ -107,6 +107,21 @@ namespace report { string CUQUANTUM_DEPLOYED_ON_GPU_WITHOUT_MEM_POOLS = "Cannot use cuQuantum since your GPU does not support memory pools. Recompile with cuQuantum disabled to fall-back to using Thrust and custom kernels."; + string USER_OWNED_MPI_WAS_NOT_INIT = + "User owns MPI but did not prior initialise MPI before initialising QuEST."; + + string USER_GIVEN_MPI_COMMUNICATOR_IS_NULL = + "The provided MPI communicator was null (MPI_COMM_NULL)."; + + string USER_GIVEN_MPI_COMMUNICATOR_FAILED_TO_SET = + "The provided MPI communicator could not be used; MPI_Comm_dup() was not successful."; + + string QUEST_OWNED_MPI_WAS_PRE_INIT = + "MPI was already initialised prior to QuESTEnv initialisation, but the user did not declare MPI ownership."; + + string QUEST_IS_NON_DISTRIBUTED_BUT_MPI_WAS_INIT = + "QuESTEnv was initialised to be non-distributed but MPI was externally initialised - this is presently unsupported due to a (very minor) technical limitation. If you need this facility, please raise a Github issue!"; + /* * EXISTING QUESTENV @@ -1159,10 +1174,11 @@ void default_inputErrorHandler(const char* func, const char* msg) { // will then attempt to instantly abort all nodes, losing the error message. printer_sync(); - // finalise MPI before error-exit to avoid scaring user with giant MPI error message - // we always "take ownership" of MPI here since we're about to kill the whole program - if (comm_isInit()) - comm_end(0); + // finalise QuEST-owned MPI before error-exit to avoid scaring user with giant MPI crash + // message. note user-owned MPI is NOT killed because it's possible only SOME processes + // reach here, and attempting to sync/kill them would result in an MPI hang/crash anyway + if (comm_isActive()) + comm_end(); // keeps user-owned MPI alive // simply exit, interrupting any other process (potentially leaking) exit(EXIT_FAILURE); @@ -1344,7 +1360,7 @@ void assertAllNodesAgreeThat(bool valid, string msg, tokenSubs vars, const char* // when performing validation that may be non-uniform between nodes. For // example, mallocs may succeed on one node but fail on another due to // inhomogeneous loads. - if (comm_isInit()) + if (comm_isActive()) valid = comm_isTrueOnAllNodes(valid); // prepare error message only if validation will fail @@ -1486,6 +1502,53 @@ void validate_gpuIsCuQuantumCompatible(const char* caller) { assertAllNodesAgreeThat(hasMemPools, report::CUQUANTUM_DEPLOYED_ON_GPU_WITHOUT_MEM_POOLS, caller); } +void validate_mpiInitStatus(bool useDistrib, bool userOwnsMpi, const char* caller) { + + // Validation prior to this function confirms init(Custom*)QuESTEnv is only ever called + // once, but we must additionally confirm the user has interacted with MPI legally + + if (!global_isValidationEnabled) + return; + + // We consult whether MPI itself has been initialised, NOT whether QuEST is using it + bool isMpiInit = comm_isMpiInit(); + + // (A) If the user does not declare ownership of MPI, they are forbidden to initialise it, + // even when they are not distributing QuEST (i.e. useDistrib=0), just for clarity! + if (!userOwnsMpi) + assertThat(!isMpiInit, report::QUEST_OWNED_MPI_WAS_PRE_INIT, caller); + + // (B) If QuEST will use MPI owned by the user, the user must have pre-initialised it + if (useDistrib && userOwnsMpi) + assertThat(isMpiInit, report::USER_OWNED_MPI_WAS_NOT_INIT, caller); + + // Confirmation that all 8 scenarios are handled: + // useDistrib=0, userOwnsMpi=0, isMpiInit=0 (legal: nobody wants MPI) + // (A) useDistrib=0, userOwnsMpi=0, isMpiInit=1 (illegal: user lied about ownership) + // useDistrib=0, userOwnsMpi=1, isMpiInit=0 (legal: user owns MPI but does nothing!) + // useDistrib=0, userOwnsMpi=1, isMpiInit=1 (legal: user owns MPI, QuEST won't use it) + // useDistrib=1, userOwnsMpi=0, isMpiInit=0 (legal: QuEST will init MPI) + // (A) useDistrib=1, userOwnsMpi=0, isMpiInit=1 (illegal: user lied about ownership) + // (B) useDistrib=1, userOwnsMpi=1, isMpiInit=0 (illegal: user has reponsibility to pre-init) + // useDistrib=1, userOwnsMpi=1, isMpiInit=1 (legal: user fulfilled responsibility to pre-init) +} + +void validate_mpiSubCommIsNonNull(bool isNonNull, const char* caller) { + + if (!global_isValidationEnabled) + return; + + assertThat(isNonNull, report::USER_GIVEN_MPI_COMMUNICATOR_IS_NULL, caller); +} + +void validate_mpiSubCommSetSucceeded(bool success, const char* caller) { + + if (!global_isValidationEnabled) + return; + + assertThat(success, report::USER_GIVEN_MPI_COMMUNICATOR_FAILED_TO_SET, caller); +} + /* diff --git a/quest/src/core/validation.hpp b/quest/src/core/validation.hpp index 66fb8f546..787316326 100644 --- a/quest/src/core/validation.hpp +++ b/quest/src/core/validation.hpp @@ -77,6 +77,12 @@ void validate_newEnvNodesEachHaveUniqueGpu(const char* caller); void validate_gpuIsCuQuantumCompatible(const char* caller); +void validate_mpiInitStatus(bool useDistrib, bool userOwnsMpi, const char* caller); + +void validate_mpiSubCommIsNonNull(bool isNonNull, const char* caller); + +void validate_mpiSubCommSetSucceeded(bool success, const char* caller); + /* diff --git a/quest/src/gpu/gpu_config.cpp b/quest/src/gpu/gpu_config.cpp index 5bf4b257f..4e03217e5 100644 --- a/quest/src/gpu/gpu_config.cpp +++ b/quest/src/gpu/gpu_config.cpp @@ -395,7 +395,7 @@ bool gpu_areAnyNodesBoundToSameGpu() { #if QUEST_COMPILE_CUDA assert_gpuHasBeenBound(hasGpuBeenBound); - if (!comm_isInit()) + if (!comm_isActive()) return false; // obtain bound GPU's UUID; a unique identifier 16-char identifier diff --git a/tests/unit/environment.cpp b/tests/unit/environment.cpp index 344ac5864..9ecf8e376 100644 --- a/tests/unit/environment.cpp +++ b/tests/unit/environment.cpp @@ -158,13 +158,6 @@ TEST_CASE( "getQuESTEnv", TEST_CATEGORY ) { QuESTEnv env = getQuESTEnv(); - REQUIRE( (env.isMultithreaded == 0 || env.isMultithreaded == 1) ); - REQUIRE( (env.isGpuAccelerated == 0 || env.isGpuAccelerated == 1) ); - REQUIRE( (env.isDistributed == 0 || env.isDistributed == 1) ); - REQUIRE( (env.userOwnsMpi == 0 || env.userOwnsMpi == 1) ); - REQUIRE( (env.isCuQuantumEnabled == 0 || env.isCuQuantumEnabled == 1) ); - REQUIRE( (env.isGpuSharingEnabled == 0 || env.isGpuSharingEnabled == 1) ); - REQUIRE( env.rank >= 0 ); REQUIRE( env.numNodes >= 0 );