diff --git a/examples/extended/user_owned_mpi.c b/examples/extended/user_owned_mpi.c
new file mode 100644
index 000000000..4e3c766f4
--- /dev/null
+++ b/examples/extended/user_owned_mpi.c
@@ -0,0 +1,49 @@
+/** @file
+ * 
+ * An example of using QuEST's experimental
+ * initCustomMpiQuESTEnv() function, to
+ * initialise QuEST in an environment where
+ * MPI is owned and controlled by the user.
+ * 
+ * @author Oliver Brown
+ * @author Tyson Jones (doc)
+ */
+
+#include "quest.h"
+#include <stdio.h>
+
+
+// This example requires linking with MPI, which the CMake
+// build only enables when QUEST_ENABLE_SUBCOMM is ON, which
+// results in quest.h defining QUEST_COMPILE_SUBCOMM. To
+// enable this example to always be compilable (like during
+// our CI), we guard against when QUEST_ENABLE_SUBCOMM is OFF.
+#if ! QUEST_COMPILE_SUBCOMM
+int main(void)
+{    
+    printf("Example skipped since MPI is not linked.\n");
+    return 0;
+}
+#else 
+
+
+#include <mpi.h>
+
+int main(void)
+{
+    const int  USE_DISTRIB = 1;
+    const bool USER_MPI    = 1;
+    const int  USE_OPENMP  = 1;
+    const int  USE_GPU     = 0;
+
+    MPI_Init(NULL, NULL);
+    initCustomMpiQuESTEnv(USE_DISTRIB, USER_MPI, USE_GPU, USE_OPENMP);
+    reportQuESTEnv();
+    finalizeQuESTEnv();
+    MPI_Finalize();
+
+    return 0;
+}
+
+
+#endif // QUEST_COMPILE_SUBCOMM
diff --git a/examples/extended/user_owned_mpi.cpp b/examples/extended/user_owned_mpi.cpp
new file mode 100644
index 000000000..54345d576
--- /dev/null
+++ b/examples/extended/user_owned_mpi.cpp
@@ -0,0 +1,49 @@
+/** @file
+ * 
+ * An example of using QuEST's experimental
+ * initCustomMpiQuESTEnv() function to
+ * initialise QuEST in an environment where
+ * MPI is owned and controlled by the user.
+ * 
+ * @author Oliver Brown
+ * @author Tyson Jones (doc)
+ */
+
+#include "quest.h"
+#include <cstdio>
+
+
+// This example requires linking with MPI, which the CMake
+// build only enables when QUEST_ENABLE_SUBCOMM is ON, which
+// results in quest.h defining QUEST_COMPILE_SUBCOMM. To
+// enable this example to always be compilable (like during
+// our CI), we guard against when QUEST_ENABLE_SUBCOMM is OFF.
+#if ! QUEST_COMPILE_SUBCOMM
+int main(void)
+{    
+    std::printf("Example skipped since MPI is not linked.\n");
+    return 0;
+}
+#else 
+
+
+#include <mpi.h>
+
+int main(void)
+{
+    const int  USE_DISTRIB = 1;
+    const bool USER_MPI    = 1;
+    const int  USE_OPENMP  = 1;
+    const int  USE_GPU     = 0;
+
+    MPI_Init(NULL, NULL);
+    initCustomMpiQuESTEnv(USE_DISTRIB, USER_MPI, USE_GPU, USE_OPENMP);
+    reportQuESTEnv();
+    finalizeQuESTEnv();
+    MPI_Finalize();
+
+    return 0;
+}
+
+
+#endif // QUEST_COMPILE_SUBCOMM
diff --git a/examples/extended/user_owned_submpi.c b/examples/extended/user_owned_submpi.c
new file mode 100644
index 000000000..6f2ea6290
--- /dev/null
+++ b/examples/extended/user_owned_submpi.c
@@ -0,0 +1,84 @@
+/** @file
+ * 
+ * An example of using QuEST's experimental
+ * initCustomMpiCommQuESTEnv() function to
+ * dedicate only some user-owned MPI processes
+ * to QuEST, and dedicate the remainder to
+ * other tasks.
+ * 
+ * @author Oliver Brown
+ * @author Tyson Jones (doc)
+ */
+
+#include "quest.h"
+#include <stdio.h>
+
+
+// This example requires linking with MPI, which the CMake
+// build only enables when QUEST_ENABLE_SUBCOMM is ON, which
+// results in quest.h defining QUEST_COMPILE_SUBCOMM. To
+// enable this example to always be compilable (like during
+// our CI), we guard against when QUEST_ENABLE_SUBCOMM is OFF.
+#if ! QUEST_COMPILE_SUBCOMM
+int main()
+{    
+    printf("Example skipped since MPI is not linked.\n");
+    return 0;
+}
+#else 
+
+
+#include <mpi.h>
+
+int main (void)
+{
+    int nprocs, quest_nprocs, world_rank, quest_rank;
+    MPI_Comm comm_split, comm_quantum, comm_classical;
+
+    MPI_Init(NULL, NULL);
+
+    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+
+    const int I_AM_QUANTUM = world_rank % 2;
+
+    printf("[%d] Hello from rank %d of %d in MPI_COMM_WORLD.\n", world_rank, world_rank, nprocs);
+
+    MPI_Comm_split(MPI_COMM_WORLD, I_AM_QUANTUM, world_rank, &comm_split);
+
+    if (I_AM_QUANTUM) {
+        MPI_Comm_dup(comm_split, &comm_quantum);
+        MPI_Comm_size(comm_quantum, &quest_nprocs);
+        MPI_Comm_rank(comm_quantum, &quest_rank);
+        printf("[%d] Hello from rank %d of %d in comm_quantum.\n", world_rank, quest_rank, quest_nprocs);
+    } else {
+        MPI_Comm_dup(comm_split, &comm_classical);
+        quest_rank = -1;
+        quest_nprocs = -1;
+    }
+
+    // only procs in quantum comm initialise QuEST
+    if (I_AM_QUANTUM) {
+        printf("[%d] Initialising QuEST.\n", world_rank);
+        initCustomMpiCommQuESTEnv(comm_quantum, -1, -1); // -1 = auto-deployments
+
+        reportQuESTEnv();
+
+        printf("[%d] Finalising QuEST.\n", world_rank);
+        finalizeQuESTEnv();
+    }
+
+    MPI_Comm_free(&comm_split);
+    if (I_AM_QUANTUM) {
+        MPI_Comm_free(&comm_quantum);
+    } else {
+        MPI_Comm_free(&comm_classical);
+    }
+
+    MPI_Finalize();
+
+    return 0;
+}
+
+
+#endif // QUEST_COMPILE_SUBCOMM
diff --git a/examples/extended/user_owned_submpi.cpp b/examples/extended/user_owned_submpi.cpp
new file mode 100644
index 000000000..ea82a4f9d
--- /dev/null
+++ b/examples/extended/user_owned_submpi.cpp
@@ -0,0 +1,84 @@
+/** @file
+ * 
+ * An example of using QuEST's experimental
+ * initCustomMpiCommQuESTEnv() function to
+ * dedicate only some user-owned MPI processes
+ * to QuEST, and dedicate the remainder to
+ * other tasks.
+ * 
+ * @author Oliver Brown
+ * @author Tyson Jones (doc)
+ */
+
+#include "quest.h"
+#include <cstdio>
+
+
+// This example requires linking with MPI, which the CMake
+// build only enables when QUEST_ENABLE_SUBCOMM is ON, which
+// results in quest.h defining QUEST_COMPILE_SUBCOMM. To
+// enable this example to always be compilable (like during
+// our CI), we guard against when QUEST_ENABLE_SUBCOMM is OFF.
+#if ! QUEST_COMPILE_SUBCOMM
+int main()
+{    
+    std::printf("Example skipped since MPI is not linked.\n");
+    return 0;
+}
+#else 
+
+
+#include <mpi.h>
+
+int main (void)
+{
+    int nprocs, quest_nprocs, world_rank, quest_rank;
+    MPI_Comm comm_split, comm_quantum, comm_classical;
+
+    MPI_Init(NULL, NULL);
+
+    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+
+    const int I_AM_QUANTUM = world_rank % 2;
+
+    std::printf("[%d] Hello from rank %d of %d in MPI_COMM_WORLD.\n", world_rank, world_rank, nprocs);
+
+    MPI_Comm_split(MPI_COMM_WORLD, I_AM_QUANTUM, world_rank, &comm_split);
+
+    if (I_AM_QUANTUM) {
+        MPI_Comm_dup(comm_split, &comm_quantum);
+        MPI_Comm_size(comm_quantum, &quest_nprocs);
+        MPI_Comm_rank(comm_quantum, &quest_rank);
+        std::printf("[%d] Hello from rank %d of %d in comm_quantum.\n", world_rank, quest_rank, quest_nprocs);
+    } else {
+        MPI_Comm_dup(comm_split, &comm_classical);
+        quest_rank = -1;
+        quest_nprocs = -1;
+    }
+
+    // only procs in quantum comm initialise QuEST
+    if (I_AM_QUANTUM) {
+        std::printf("[%d] Initialising QuEST.\n", world_rank);
+        initCustomMpiCommQuESTEnv(comm_quantum, modeflag::USE_AUTO, modeflag::USE_AUTO);
+
+        reportQuESTEnv();
+
+        std::printf("[%d] Finalising QuEST.\n", world_rank);
+        finalizeQuESTEnv();
+    }
+
+    MPI_Comm_free(&comm_split);
+    if (I_AM_QUANTUM) {
+        MPI_Comm_free(&comm_quantum);
+    } else {
+        MPI_Comm_free(&comm_classical);
+    }
+
+    MPI_Finalize();
+
+    return 0;
+}
+
+
+#endif // QUEST_COMPILE_SUBCOMM
diff --git a/quest/include/environment.h b/quest/include/environment.h
index 15a6ac5e2..cdefa7d7d 100644
--- a/quest/include/environment.h
+++ b/quest/include/environment.h
@@ -35,13 +35,13 @@ extern "C" {
 typedef struct {
 
     // deployment modes which can be runtime disabled
-    int isMultithreaded;
-    int isGpuAccelerated;
-    int isDistributed;
-    bool userOwnsMpi;
+    bool isMultithreaded;
+    bool isGpuAccelerated;
+    bool isDistributed;
+    bool isMpiUserOwned;
 
     // deployment modes which cannot be directly changed after compilation
-    int isCuQuantumEnabled;
+    bool isCuQuantumEnabled;
 
     // deployment configurations which can be changed via environment variables
     int isGpuSharingEnabled;
@@ -65,12 +65,6 @@ void initQuESTEnv();
  */
 void initCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMultithread);
 
-/** @notyetdoced
- *  Advanced initialiser which lets the user positively declare that they take responsibility for MPI.
- *  This means we assume they have called MPI_Init, and that they will call MPI_Finalize.
- */
-void initCustomMpiQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread);
-
 /// @notyetdoced
 void finalizeQuESTEnv();
 
diff --git a/quest/include/experimental.h b/quest/include/experimental.h
new file mode 100644
index 000000000..2fabdc34f
--- /dev/null
+++ b/quest/include/experimental.h
@@ -0,0 +1,75 @@
+/** @file
+ * Experimental functions which are liable to
+ * API breaks within QuEST minor version releases.
+ * Some optional functions require compiling this
+ * file against MPI, despite being outside of /comm/, 
+ * and so require opt-in macros (QUEST_COMPILE_SUBCOMM)
+ * 
+ * @author Oliver Brown
+ * @author Tyson Jones (formatting)
+ * 
+ * @defgroup experimental Experimental
+ * @ingroup api
+ * @brief Experimental functions with tentative APIs
+ * @{
+ */
+
+#ifndef EXPERIMENTAL_H
+#define EXPERIMENTAL_H
+
+#include "quest/include/config.h"
+
+#if QUEST_COMPILE_SUBCOMM && ! QUEST_COMPILE_MPI
+    #error "Macro QUEST_COMPILE_SUBCOMM was true, but QUEST_COMPILE_MPI was illegally false."
+#endif
+
+#if QUEST_COMPILE_SUBCOMM
+    #include <mpi.h>
+#endif
+
+// enable invocation by both C and C++ binaries
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** @notyetdoced
+ *
+ *  Advanced initialiser which lets the user positively declare that they take responsibility for MPI.
+ *  This means we assume they have called MPI_Init, and that they will call MPI_Finalize.
+ * 
+ * @author Oliver Brown
+ */
+void initCustomMpiQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread);
+
+
+#if QUEST_COMPILE_SUBCOMM
+
+/** @notyetdoced
+ * 
+ *  Advanced initialiser which allows the user to provide an MPI communicator for QuEST to use.
+ *  Use of this initialiser implies userOwnsMpi = true, (exposed by initCustomMpiQuESTEnv) and 
+ *  therefore that they have already initialised MPI, and they will call MPI_Finalize at the 
+ *  appropriate time.
+ *
+ *  The user-provided MPI communicator undergoes the same validation procedure as any that QuEST
+ *  would use, and so must contain a power-of-2 number of processes.
+ * 
+ * This function is only compiled and exposed when macro QUEST_COMPILE_SUBCOMM is 1, as is
+ * defined when providing CMake option QUEST_ENABLE_SUBCOMM during building.
+ *
+ * @author Oliver Brown
+ */
+void initCustomMpiCommQuESTEnv(MPI_Comm questComm, int useGpuAccel, int useMultithread);
+
+#endif // QUEST_COMPILE_SUBCOMM
+
+
+// end de-mangler
+#ifdef __cplusplus
+}
+#endif
+
+#endif // EXPERIMENTAL_H
+
+/** @} */ // (end file-wide doxygen defgroup)
diff --git a/quest/include/quest.h b/quest/include/quest.h
index 16f8e9b49..da1c778e2 100644
--- a/quest/include/quest.h
+++ b/quest/include/quest.h
@@ -38,6 +38,7 @@
 #include "quest/include/debug.h"
 #include "quest/include/decoherence.h"
 #include "quest/include/environment.h"
+#include "quest/include/experimental.h"
 #include "quest/include/trotterisation.h"
 #include "quest/include/initialisations.h"
 #include "quest/include/channels.h"
@@ -45,7 +46,6 @@
 #include "quest/include/operations.h"
 #include "quest/include/paulis.h"
 #include "quest/include/qureg.h"
-#include "quest/include/subcommunicator.h"
 #include "quest/include/matrices.h"
 #include "quest/include/wrappers.h"
 
diff --git a/quest/include/subcommunicator.h b/quest/include/subcommunicator.h
deleted file mode 100644
index 8854404d6..000000000
--- a/quest/include/subcommunicator.h
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef SUBCOMMUNICATOR_H
-#define SUBCOMMUNICATOR_H
-
-#include "quest/include/config.h" 
-
-#if QUEST_COMPILE_MPI && QUEST_COMPILE_SUBCOMM
-
-#include <mpi.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/** @notyetdoced
- *  Advanced initialiser which allows the user to provide an MPI communicator for QuEST to use.
- *  Use of this initialiser implies userOwnsMpi = true, (exposed by initCustomMpiQuESTEnv) and 
- *  therefore that they have already initialised MPI, and they will call MPI_Finalize at the 
- *  appropriate time.
- *
- *  The user-provided MPI communicator undergoes the same validation procedure as any that QuEST
- *  would use, and so must contain a power-of-2 number of processes.
- */
-void initCustomMpiCommQuESTEnv(MPI_Comm questComm, int useGpuAccel, int useMultithread);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
-#endif
diff --git a/quest/src/api/CMakeLists.txt b/quest/src/api/CMakeLists.txt
index 43b61df7d..7f90dcf17 100644
--- a/quest/src/api/CMakeLists.txt
+++ b/quest/src/api/CMakeLists.txt
@@ -5,6 +5,7 @@ target_sources(QuEST
   debug.cpp
   decoherence.cpp
   environment.cpp
+  experimental.cpp
   initialisations.cpp
   matrices.cpp
   modes.cpp
@@ -12,7 +13,6 @@ target_sources(QuEST
   operations.cpp
   paulis.cpp
   qureg.cpp
-  subcommunicator.cpp
   trotterisation.cpp
   types.cpp
 )
diff --git a/quest/src/api/channels.cpp b/quest/src/api/channels.cpp
index 450d04cbd..c6702438a 100644
--- a/quest/src/api/channels.cpp
+++ b/quest/src/api/channels.cpp
@@ -107,7 +107,7 @@ void freeAllMemoryIfAnyAllocsFailed(T& obj) {
 
     // determine whether any node experienced a failure
     bool anyFail = didAnyLocalAllocsFail(obj);
-    if (comm_isInit())
+    if (comm_isActive())
         anyFail = comm_isTrueOnAllNodes(anyFail);
 
     // if so, free all memory before subsequent validation
diff --git a/quest/src/api/environment.cpp b/quest/src/api/environment.cpp
index b0eb38c77..abf3127e8 100644
--- a/quest/src/api/environment.cpp
+++ b/quest/src/api/environment.cpp
@@ -48,7 +48,7 @@ using std::string;
  */
 
 
-static QuESTEnv* globalEnvPtr = nullptr;
+static QuESTEnv* global_envPtr = nullptr;
 
 
 
@@ -62,7 +62,7 @@ static QuESTEnv* globalEnvPtr = nullptr;
  */
 
 
-static bool hasEnvBeenFinalized = false;
+static bool global_hasEnvBeenFinalized = false;
 
 
 
@@ -74,9 +74,12 @@ static bool hasEnvBeenFinalized = false;
 void validateAndInitCustomQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread, const char* caller) {
 
     // ensure that we are never re-initialising QuEST (even after finalize) because
-    // this leads to undefined behaviour in distributed mode, as per the MPI
-    validate_envNeverInit(globalEnvPtr != nullptr, hasEnvBeenFinalized, caller);
+    // this leads to undefined behaviour in distributed mode, as per the MPI std,
+    // regardless of whether the user owns MPI
+    validate_envNeverInit(global_envPtr != nullptr, global_hasEnvBeenFinalized, caller);
 
+    // load env-vars before validating deployment mode, because some env vars can
+    // affect validation (such as QUEST_PERMIT_NODES_TO_SHARE_GPU)
     envvars_validateAndLoadEnvVars(caller);
     validateconfig_setEpsilonToDefault();
 
@@ -86,14 +89,19 @@ void validateAndInitCustomQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuA
     // by mpirun believe they are each the main rank. This seems unavoidable.
     validate_newEnvDeploymentMode(useDistrib, useGpuAccel, useMultithread, caller);
 
-    // overwrite deployments left as modeflag::USE_AUTO
+    // overwrite deployments (left as modeflag::USE_AUTO=-1) with 0,1 (a bool),
+    // which crucially, resolves useDistrib, permitting its consultation below
     autodep_chooseQuESTEnvDeployment(useDistrib, useGpuAccel, useMultithread);
 
+    // ensure that current state of MPI is valid
+    validate_mpiInitStatus(useDistrib, userOwnsMpi, caller);
+
     // optionally initialise MPI; necessary before completing validation,
     // and before any GPU initialisation and validation, since we will
     // perform that specifically upon the MPI-process-bound GPU(s). Further,
     // we can make sure validation errors are reported only by the root node.
-    comm_init(useDistrib, userOwnsMpi);
+    if (useDistrib)
+        comm_init(userOwnsMpi);
 
     validate_newEnvDistributedBetweenPower2Nodes(caller);
 
@@ -138,31 +146,26 @@ void validateAndInitCustomQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuA
     rand_setSeedsToDefault();
 
     // allocate space for the global QuESTEnv singleton (overwriting nullptr, unless malloc fails)
-    globalEnvPtr = (QuESTEnv*) malloc(sizeof(QuESTEnv));
+    global_envPtr = (QuESTEnv*) malloc(sizeof(QuESTEnv));
 
     // pedantically check that teeny tiny malloc just succeeded
-    if (globalEnvPtr == nullptr)
+    if (global_envPtr == nullptr)
         error_allocOfQuESTEnvFailed();
 
-    // bind deployment info to global instance
-    globalEnvPtr->isMultithreaded     = useMultithread;
-    globalEnvPtr->isGpuAccelerated    = useGpuAccel;
-    globalEnvPtr->isDistributed       = useDistrib;
-    globalEnvPtr->userOwnsMpi         = userOwnsMpi;
-    globalEnvPtr->isCuQuantumEnabled  = useCuQuantum;
-    globalEnvPtr->isGpuSharingEnabled = permitGpuSharing;
-    globalEnvPtr->isMpiGpuAware       = isMpiGpuAware;
+    // bind deployment info to global instance (autocasting int to bool)
+    global_envPtr->isMultithreaded     = useMultithread;
+    global_envPtr->isGpuAccelerated    = useGpuAccel;
+    global_envPtr->isDistributed       = useDistrib;
+    global_envPtr->isMpiUserOwned      = userOwnsMpi;
+    global_envPtr->isMpiGpuAware        = isMpiGpuAware;
+    global_envPtr->isCuQuantumEnabled  = useCuQuantum;
+    global_envPtr->isGpuSharingEnabled = permitGpuSharing;
 
     // bind distributed info
-    globalEnvPtr->rank     = (useDistrib)? comm_getRank()     : 0;
-    globalEnvPtr->numNodes = (useDistrib)? comm_getNumNodes() : 1;
+    global_envPtr->rank     = (useDistrib)? comm_getRank()     : 0;
+    global_envPtr->numNodes = (useDistrib)? comm_getNumNodes() : 1;
 }
 
-void updateQuESTEnvDistInfo() {
-    globalEnvPtr->rank     = (globalEnvPtr->isDistributed)? comm_getRank()     : 0;
-    globalEnvPtr->numNodes = (globalEnvPtr->isDistributed)? comm_getNumNodes() : 1;
-    return;
-}
 
 
 /*
@@ -197,11 +200,11 @@ void printCompilationInfo() {
 
     print_table(
         "compilation", {
-        {"isMpiCompiled",                comm_isMpiCompiled()},
-        {"isMpiSubCommunicatorCompiled", comm_isMpiSubCommunicatorCompiled()},
-        {"isGpuCompiled",                gpu_isGpuCompiled()},
-        {"isOmpCompiled",                cpu_isOpenmpCompiled()},
-        {"isCuQuantumCompiled",          gpu_isCuQuantumCompiled()},
+        {"isMpiCompiled",         comm_isMpiCompiled()},
+        {"isMpiSubCommCompiled",  comm_isMpiSubCommCompiled()},
+        {"isGpuCompiled",         gpu_isGpuCompiled()},
+        {"isOmpCompiled",         cpu_isOpenmpCompiled()},
+        {"isCuQuantumCompiled",   gpu_isCuQuantumCompiled()},
     });
 }
 
@@ -210,13 +213,10 @@ void printDeploymentInfo() {
 
     print_table(
         "deployment", {
-        {"isMpiEnabled",        globalEnvPtr->isDistributed},
-        {"doesUserOwnMpi",      globalEnvPtr->userOwnsMpi},
-        {"isGpuEnabled",        globalEnvPtr->isGpuAccelerated},
-        {"isOmpEnabled",        globalEnvPtr->isMultithreaded},
-        {"isCuQuantumEnabled",  globalEnvPtr->isCuQuantumEnabled},
-        {"isGpuSharingEnabled", globalEnvPtr->isGpuSharingEnabled},
-        {"isMpiGpuAware",       globalEnvPtr->isMpiGpuAware},
+        {"isMpiEnabled",        global_envPtr->isDistributed},
+        {"isGpuEnabled",        global_envPtr->isGpuAccelerated},
+        {"isOmpEnabled",        global_envPtr->isMultithreaded},
+        {"isCuQuantumEnabled",  global_envPtr->isCuQuantumEnabled},
     });
 }
 
@@ -273,10 +273,16 @@ void printDistributionInfo() {
 
     using namespace printer_substrings;
 
+    bool comm = global_envPtr->isDistributed;
+    bool gpu  = global_envPtr->isGpuAccelerated;
+    bool both = comm && gpu;
+
     print_table(
         "distribution", {
-        {"isMpiGpuAware", comm_isInit()? printer_toStr(globalEnvPtr->isMpiGpuAware) : na},
-        {"numMpiNodes",   printer_toStr(globalEnvPtr->numNodes)},
+        {"isMpiUserOwned",      comm? printer_toStr(global_envPtr->isMpiUserOwned) : na},
+        {"isMpiGpuAware",       comm? printer_toStr(global_envPtr->isMpiGpuAware ) : na},
+        {"isGpuSharingEnabled", both? printer_toStr(global_envPtr->isGpuSharingEnabled) : na},
+        {"numMpiNodes",         printer_toStr(global_envPtr->numNodes)},
     });
 }
 
@@ -286,7 +292,7 @@ void printQuregSizeLimits(bool isDensMatr) {
     using namespace printer_substrings;
 
     // for brevity
-    int numNodes = globalEnvPtr->numNodes;
+    int numNodes = global_envPtr->numNodes;
 
     // by default, CPU limits are unknown (because memory query might fail)
     string maxQbForCpu = un;
@@ -298,7 +304,7 @@ void printQuregSizeLimits(bool isDensMatr) {
         maxQbForCpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, 1, cpuMem));
 
         // and the max MPI sizes are only relevant when env is distributed
-        if (globalEnvPtr->isDistributed)
+        if (global_envPtr->isDistributed)
             maxQbForMpiCpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, numNodes, cpuMem));
 
         // when MPI irrelevant, change their status from "unknown" to "N/A"
@@ -313,12 +319,12 @@ void printQuregSizeLimits(bool isDensMatr) {
     string maxQbForMpiGpu = na;
 
     // max GPU registers only relevant if env is GPU-accelerated
-    if (globalEnvPtr->isGpuAccelerated) {
+    if (global_envPtr->isGpuAccelerated) {
         qindex gpuMem = gpu_getCurrentAvailableMemoryInBytes();
         maxQbForGpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, 1, gpuMem));
 
         // and the max MPI sizes are further only relevant when env is distributed 
-        if (globalEnvPtr->isDistributed)
+        if (global_envPtr->isDistributed)
             maxQbForMpiGpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, numNodes, gpuMem));
     }
 
@@ -355,7 +361,7 @@ void printQuregAutoDeployments(bool isDensMatr) {
 
     // test to theoretically max #qubits, surpassing max that can fit in RAM and GPUs, because
     // auto-deploy will still try to deploy there to (then subsequent validation will fail)
-    int maxQubits = mem_getMaxNumQuregQubitsBeforeGlobalMemSizeofOverflow(isDensMatr, globalEnvPtr->numNodes);
+    int maxQubits = mem_getMaxNumQuregQubitsBeforeGlobalMemSizeofOverflow(isDensMatr, global_envPtr->numNodes);
 
     for (int numQubits=1; numQubits<maxQubits; numQubits++) {
 
@@ -363,7 +369,7 @@ void printQuregAutoDeployments(bool isDensMatr) {
         useDistrib  = modeflag::USE_AUTO;
         useGpuAccel = modeflag::USE_AUTO;
         useMulti    = modeflag::USE_AUTO;;
-        autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMulti, *globalEnvPtr);
+        autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMulti, *global_envPtr);
 
         // skip if deployments are unchanged
         if (useDistrib  == prevDistrib  &&
@@ -415,10 +421,6 @@ void initCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMultithread) {
 }
 
 
-void initCustomMpiQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread) {
-    validateAndInitCustomQuESTEnv(useDistrib, userOwnsMpi, useGpuAccel, useMultithread, __func__);
-}
-
 void initQuESTEnv() {
 
     const bool userOwnsMpi = false;
@@ -428,7 +430,7 @@ void initQuESTEnv() {
 
 int isQuESTEnvInit() {
 
-    return (int) (globalEnvPtr != nullptr);
+    return (int) (global_envPtr != nullptr);
 }
 
 
@@ -436,7 +438,7 @@ QuESTEnv getQuESTEnv() {
     validate_envIsInit(__func__);
 
     // returns a copy, so cheeky users calling memcpy() upon const struct still won't mutate
-    return *globalEnvPtr;
+    return *global_envPtr;
 }
 
 
@@ -447,38 +449,34 @@ void finalizeQuESTEnv() {
     // calling this will not automatically
     // free the memory of existing Quregs
 
-    if (globalEnvPtr->isGpuAccelerated)
+    if (global_envPtr->isGpuAccelerated)
         gpu_clearCache(); // syncs first
 
-    if (globalEnvPtr->isGpuAccelerated && gpu_isCuQuantumCompiled())
+    if (global_envPtr->isGpuAccelerated && gpu_isCuQuantumCompiled())
         gpu_finalizeCuQuantum();
 
-    if (globalEnvPtr->isDistributed) {
+    if (global_envPtr->isDistributed) {
         comm_sync();
-        comm_end(globalEnvPtr->userOwnsMpi);
+        comm_end();
     }
 
     // free global env's heap memory and flag it as unallocated
-    free(globalEnvPtr);
-    globalEnvPtr = nullptr;
+    free(global_envPtr);
+    global_envPtr = nullptr;
 
     // flag that the environment was finalised, to ensure it is never re-initialised
-    hasEnvBeenFinalized = true;
+    global_hasEnvBeenFinalized = true;
 }
 
 
 void syncQuESTEnv() {
     validate_envIsInit(__func__);
 
-    if (globalEnvPtr->isGpuAccelerated)
+    if (global_envPtr->isGpuAccelerated)
         gpu_sync();
 
-    if (globalEnvPtr->isDistributed) {
+    if (global_envPtr->isDistributed)
         comm_sync();
-        #if QUEST_COMPILE_SUBCOMM
-            updateQuESTEnvDistInfo();
-        #endif
-    }
 }
 
 
@@ -519,19 +517,17 @@ void reportQuESTEnv() {
 void getQuESTEnvironmentString(char str[200]) {
     validate_envIsInit(__func__);
 
-    QuESTEnv env = getQuESTEnv();
-
     int numThreads = cpu_isOpenmpCompiled()? cpu_getAvailableNumThreads() : 1;
-    int cuQuantum = env.isGpuAccelerated && gpu_isCuQuantumCompiled();
-    int gpuDirect = env.isGpuAccelerated && gpu_isDirectGpuCommPossible();
+    int cuQuantum = global_envPtr->isGpuAccelerated && gpu_isCuQuantumCompiled();
+    int gpuDirect = global_envPtr->isGpuAccelerated && gpu_isDirectGpuCommPossible();
 
     snprintf(str, 200, "CUDA=%d OpenMP=%d MPI=%d userOwnsMPI=%d threads=%d ranks=%d cuQuantum=%d gpuDirect=%d",
-        env.isGpuAccelerated,
-        env.isMultithreaded,
-        env.isDistributed,
-        env.userOwnsMpi,
+        global_envPtr->isGpuAccelerated,
+        global_envPtr->isMultithreaded,
+        global_envPtr->isDistributed,
+        global_envPtr->isMpiUserOwned,
         numThreads,
-        env.numNodes,
+        global_envPtr->numNodes,
         cuQuantum,
         gpuDirect);
 }
diff --git a/quest/src/api/experimental.cpp b/quest/src/api/experimental.cpp
new file mode 100644
index 000000000..1ad6fdb42
--- /dev/null
+++ b/quest/src/api/experimental.cpp
@@ -0,0 +1,89 @@
+/** @file
+ * Experimental functions which are liable to
+ * API breaks within QuEST minor version releases.
+ * Some optional functions require compiling this
+ * file against MPI, despite being outside of /comm/, 
+ * and so require opt-in macros (QUEST_COMPILE_SUBCOMM)
+ * 
+ * @author Oliver Brown
+ */
+
+#include "quest/include/config.h"
+#include "quest/include/environment.h"
+
+#include "quest/src/core/validation.hpp"
+#include "quest/src/comm/comm_config.hpp"
+
+#if QUEST_COMPILE_SUBCOMM && ! QUEST_COMPILE_MPI
+    #error "Macro QUEST_COMPILE_SUBCOMM was true, but QUEST_COMPILE_MPI was illegally false."
+#endif
+
+#if QUEST_COMPILE_SUBCOMM
+    #include <mpi.h>
+#endif
+
+
+
+/*
+ * EXTERNAL FUNCTIONS
+ *
+ * which we here regretfully 'extern' because we are either
+ * unsure which header should expose them, or because they
+ * contain deployment-specific types (like MPI_Comm) which
+ * we do not wish to expose within internal headers 
+ */
+
+
+extern void validateAndInitCustomQuESTEnv(
+    int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread, const char* caller);
+
+
+#if QUEST_COMPILE_SUBCOMM // hide MPI_Comm
+    extern bool comm_setMpiComm(MPI_Comm newComm, bool userOwnsMpi);
+#endif
+
+
+
+/*
+ * API FUNCTIONS
+ */
+
+
+// enable invocation by both C and C++ binaries
+extern "C" {
+
+
+void initCustomMpiQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread) {
+    validateAndInitCustomQuESTEnv(useDistrib, userOwnsMpi, useGpuAccel, useMultithread, __func__);
+}
+
+
+#if QUEST_COMPILE_SUBCOMM // hide MPI_Comm
+
+void initCustomMpiCommQuESTEnv(MPI_Comm userQuestComm, int useGpuAccel, int useMultithread) {
+
+    // useDistrib and userOwnsMpi are implied by the user of this initialiser
+    const int useDistrib = 1;
+    const bool userOwnsMpi = true;
+
+    // pre-validate that we are able to set the MPI communicator
+    validate_mpiInitStatus(useDistrib, userOwnsMpi, __func__);
+    validate_mpiSubCommIsNonNull(userQuestComm != MPI_COMM_NULL, __func__);
+
+    // avoid re-setting the MPI comm (to avoid an internal error), which happens
+    // if a user illegally re-calls this function, which will be subsequently
+    // caught by the validation in validateAndInitCustomQuESTEnv() below
+    if (!comm_isActive()) {
+        bool success = comm_setMpiComm(userQuestComm, userOwnsMpi);
+        validate_mpiSubCommSetSucceeded(success, __func__);
+    }
+
+    // perform remaining validation (some is harmlessly repeated) and init QuEST env
+    validateAndInitCustomQuESTEnv(useDistrib, userOwnsMpi, useGpuAccel, useMultithread, __func__);
+}
+
+#endif // QUEST_COMPILE_SUBCOMM
+
+
+// end de-mangler
+}
diff --git a/quest/src/api/matrices.cpp b/quest/src/api/matrices.cpp
index c7e963359..07e37025c 100644
--- a/quest/src/api/matrices.cpp
+++ b/quest/src/api/matrices.cpp
@@ -165,7 +165,7 @@ void freeAllMemoryIfAnyAllocsFailed(T matr) {
 
     // ascertain whether any allocs failed on any node
     bool anyFail = didAnyLocalAllocsFail(matr);
-    if (comm_isInit())
+    if (comm_isActive())
         anyFail = comm_isTrueOnAllNodes(anyFail);
 
     // if so, free all heap fields
diff --git a/quest/src/api/paulis.cpp b/quest/src/api/paulis.cpp
index a996f83c0..d5ac4d8e7 100644
--- a/quest/src/api/paulis.cpp
+++ b/quest/src/api/paulis.cpp
@@ -38,7 +38,7 @@ bool didAnyAllocsFailOnAnyNode(PauliStrSum sum) {
         ! mem_isAllocated(sum.coeffs)  || 
         ! mem_isAllocated(sum.isApproxHermitian) );
     
-    if (comm_isInit())
+    if (comm_isActive())
         anyFail = comm_isTrueOnAllNodes(anyFail);
 
     return anyFail;
diff --git a/quest/src/api/qureg.cpp b/quest/src/api/qureg.cpp
index 3bad734e4..84bcd2bd0 100644
--- a/quest/src/api/qureg.cpp
+++ b/quest/src/api/qureg.cpp
@@ -116,7 +116,7 @@ bool didAnyLocalAllocsFail(Qureg qureg) {
 bool didAnyAllocsFailOnAnyNode(Qureg qureg) {
 
     bool anyFail = didAnyLocalAllocsFail(qureg);
-    if (comm_isInit())
+    if (comm_isActive())
         anyFail = comm_isTrueOnAllNodes(anyFail);
 
     return anyFail;
diff --git a/quest/src/api/subcommunicator.cpp b/quest/src/api/subcommunicator.cpp
deleted file mode 100644
index e248f0dba..000000000
--- a/quest/src/api/subcommunicator.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-#include "quest/include/config.h"
-#include "quest/include/environment.h"
-#include "quest/include/subcommunicator.h"
-
-#include "quest/src/comm/comm_config.hpp"
-#include "quest/src/core/errors.hpp"
-
-#if QUEST_COMPILE_MPI && QUEST_COMPILE_SUBCOMM
-
-#include <stdbool.h>
-#include <mpi.h>
-
-void initCustomMpiCommQuESTEnv(MPI_Comm userQuestComm, int useGpuAccel, int useMultithread) {
-    // useDistrib and userOwnsMpi are implied by the user of this initialiser
-    const int useDistrib = 1;
-    const bool userOwnsMpi = true;
-
-    // set mpiCommQuest to user provided communicator
-    if (comm_isInit()) {
-        comm_setMpiComm(userQuestComm);
-    } else {
-        error_commNotInit();
-    }
-
-    // initialise QuEST around that communicator
-    initCustomMpiQuESTEnv(useDistrib, userOwnsMpi, useGpuAccel, useMultithread);
-
-    return;
-}
-
-#endif
diff --git a/quest/src/comm/comm_config.cpp b/quest/src/comm/comm_config.cpp
index 67fd53231..5c59477ca 100644
--- a/quest/src/comm/comm_config.cpp
+++ b/quest/src/comm/comm_config.cpp
@@ -6,8 +6,11 @@
  * 
  * Note that even when QUEST_COMPILE_MPI=1, the user may have
  * disabled distribution when creating the QuEST environment
- * at runtime. Ergo we use comm_isInit() to determine whether
- * functions should invoke the MPI API.
+ * at runtime - even despite they themselves initialising and
+ * using MPI. So we must be careful about consulting MPI status!
+ * Furthermore, all routines here will only ever consult/affect
+ * the QuEST communicator, never the entire MPI environment,
+ * the latter of which may contain non-participating processes.
  * 
  * @author Tyson Jones
  */
@@ -22,8 +25,6 @@
 
 #if QUEST_COMPILE_MPI
     #include <mpi.h>
-
-    static MPI_Comm mpiCommQuest = MPI_COMM_NULL;
 #endif
 
 
@@ -32,6 +33,7 @@
  * WARN ABOUT CUDA-AWARENESS
  */
 
+
 #if QUEST_COMPILE_MPI && QUEST_COMPILE_CUDA
 
     // this check is OpenMPI specific
@@ -54,9 +56,98 @@
 
 
 
+/*
+ * COMMUNICATOR MANAGEMENT
+ *
+ * QuEST will only ever use the overridable global_mpiComm communicator,
+ * so that superusers can dedicate external MPI processes to other tasks.
+ * Beware that it's valid for QuEST to be compiled with MPI, but have
+ * distribution runtime-disabled, while the user is themselves using
+ * (and ergo have initialised) MPI. In that scenario, we must not touch
+ * MPI, hence why comm_isActive() below is distinct from comm_isMpiInit().
+ */
+
+
+// We must record whether the user owns MPI, so that we do not ever attempt
+// to kill it when gracefully exiting, or due to a validation error
+static bool global_isMpiUserOwned = false;
+
+
+// Guarded since MPI_Comm cannot be exposed when not compiling MPI. This
+// communicator is overridden from NULL either BEFORE or DURING comm_init()
+#if QUEST_COMPILE_MPI
+    static MPI_Comm global_mpiComm = MPI_COMM_NULL;
+#endif
+
+
+bool comm_isActive() {
+#if QUEST_COMPILE_MPI
+
+    // comm_init(), or potentially comm_setMpiComm() before it, will only
+    // ever override mpiComm with non-NULL, indicating active comm. Note
+    // it's principally for mpiComm to later return to NULL, via comm_end(),
+    // and for QuEST execution to continue (though not supported presently).
+    // if comm_isActive() is true, then it is guaranteed MPI is initialised
+    return global_mpiComm != MPI_COMM_NULL;
+
+    // note it is legal for QuEST distribution to be disabled (and ergo
+    // mpiComm never initialised) even when the user is themselves accessing
+    // MPI, hence this function is semantically distinct from comm_isMpiInit()
+#else
+
+    // QuEST communication is obviously never active if
+    // not even MPI is compiled; though this does not
+    // imply at all the user isn't themselves using MPI!
+    return false;
+
+#endif
+}
+
+
+// Hide MPI_Comm from signatures when MPI is not compiled. Beware that
+// these are not exposed in comm_config.hpp; callers must 'extern' them!
+#if QUEST_COMPILE_MPI
+
+
+MPI_Comm comm_getMpiComm() {
+
+    // illegal to call before communicator has been overridden
+    if (global_mpiComm == MPI_COMM_NULL)
+        error_commMpiCommIsNull();
+
+    return global_mpiComm;
+}
+
+
+bool comm_setMpiComm(MPI_Comm newComm, bool userOwnsMpi) {
+
+    // illegal to re-set, or set to null
+    if (global_mpiComm != MPI_COMM_NULL)
+        error_commAlreadyHasSetMpiComm();
+    if (newComm == MPI_COMM_NULL)
+        error_commNewMpiCommIsNull();
+
+    // detect bad communicator, and inform validation
+    auto status = MPI_Comm_dup(newComm, &global_mpiComm);
+    if (status != MPI_SUCCESS)
+        return false;
+
+    // record ownership as soon as QuEST communication becomes active, so
+    // validation errors during env initialisation never kill user-owned MPI
+    global_isMpiUserOwned = userOwnsMpi;
+    return true;
+}
+
+
+#endif // QUEST_COMPILE_MPI
+
+
+
 /*
  * MPI ENVIRONMENT MANAGEMENT
- * all of which is safely callable in non-distributed mode
+ *
+ * which queries MPI itself (as may be user-activated), rather
+ * than QuEST's (possibly more limited) MPI environment
  */
 
 
@@ -64,8 +155,7 @@ bool comm_isMpiCompiled() {
     return (bool) QUEST_COMPILE_MPI;
 }
 
-
-bool comm_isMpiSubCommunicatorCompiled() {
+bool comm_isMpiSubCommCompiled() {
     return (bool) QUEST_COMPILE_SUBCOMM;
 }
 
@@ -96,89 +186,93 @@ bool comm_isMpiGpuAware() {
 }
 
 
-bool comm_isInit() {
+bool comm_isMpiInit() {
 #if QUEST_COMPILE_MPI
 
     // safely callable before MPI initialisation, but NOT after comm_end()
     int isInit;
     MPI_Initialized(&isInit);
+
+    // when MPI is not initialised, it is guaranteed that QuEST's communicator
+    // is inactive, which we double check here so callers can be absolutely sure
+    if (!isInit && comm_isActive())
+        error_commActiveButMpiNotInit();
+
     return (bool) isInit;
 
 #else
 
     // obviously MPI is never initialised if not even compiled
     return false;
+
 #endif
 }
 
 
-void comm_init(int useDistrib, bool userOwnsMpi) {
+
+/*
+ * QUEST COMMUNICATION MANAGEMENT
+ *
+ * which interacts only with QuEST's MPI environment,
+ * which may be smaller than the user-controlled MPI env
+ */
+
+
+void comm_init(bool userOwnsMpi) {
 #if QUEST_COMPILE_MPI
 
-    // error if user owns MPI but has not initialised
-    if (userOwnsMpi && !comm_isInit()) {
+    // re-assert prior user-validations for clarity
+    if (userOwnsMpi && !comm_isMpiInit())
         error_commNotInit();
-    }
+    if (!userOwnsMpi && comm_isMpiInit())
+        error_commAlreadyInit();
    
-    // Overall mpiCommQuest should be set in the following ways
-    // however only useDistrib = 1 and userOwnsMpi = false
-    // and useDistrib = 0 and userOwnsMpi = true 
-    // require action here
-    //
-    // | useDistrib | userOwnsMpi |  mpiCommQuest  |
-    // | ---------- | ----------- | -------------- |
-    // |     0      |    false    | MPI_COMM_NULL  |
-    // | ---------- | ----------- | -------------- |
-    // |     1      |    false    | MPI_COMM_WORLD |
-    // | ---------- | ----------- | -------------- |
-    // |     0      |    true     | MPI_COMM_SELF  |
-    // | ---------- | ----------- | -------------- |
-    // |            |             | MPI_COMM_WORLD |
-    // |     1      |    true     |      or        |
-    // |            |             | userQuestComm  |
-    // | ---------- | ----------- | -------------- |
-    
+    // init MPI only when it's not the user's responsibility
+    if (!userOwnsMpi)
+        MPI_Init(NULL, NULL);
 
-    if (useDistrib && !userOwnsMpi) {
-        // error if attempting re-initialisation
-        if (comm_isInit()) {
-            error_commAlreadyInit();
-        } else {
-            MPI_Init(NULL, NULL);
-            // The user wants MPI and is leaving it to QuEST
-            MPI_Comm_dup(MPI_COMM_WORLD, &mpiCommQuest);
-        }
-    } else if (!useDistrib && userOwnsMpi) {
-        // The user has initialised MPI but wants QuEST to ignore it
-        MPI_Comm_dup(MPI_COMM_SELF, &mpiCommQuest);
-    } else if (useDistrib && userOwnsMpi) {
-        // if mpiCommQuEST is still MPI_COMM_NULL the user is not 
-        // providing their own MPI_Comm and we should set mpiCommQuest
-        // to MPI_COMM_WORLD
-        if (mpiCommQuest == MPI_COMM_NULL)
-            MPI_Comm_dup(MPI_COMM_WORLD, &mpiCommQuest);
-    }
+    // choose communicator only when the user hasn't already
+    // (via comm_setMpiComm, during custom env initialisation)
+    if (global_mpiComm == MPI_COMM_NULL)
+        comm_setMpiComm(MPI_COMM_WORLD, userOwnsMpi);
 
 #endif
-    return;
 }
 
 
-
-void comm_end(bool userOwnsMpi) {
+void comm_end() {
 #if QUEST_COMPILE_MPI
 
-    // gracefully permit comm_end() before comm_init(), as input validation can trigger
-    if (!comm_isInit())
+    // If QuEST isn't using distribution, regardless of whether the user is using MPI,
+    // then we gracefully exit. We do NOT attempt to end MPI on the user's behalf (as we
+    // may be tempted to do during validation failure to avoid their MPI-crash), because
+    // it's possible/legal that not all processes are participating in this comm_end()
+    // call, in which case so MPI_Finalize() could just cause a hang.
+    if (!comm_isActive())
         return;
 
-    MPI_Barrier(mpiCommQuest);
-    MPI_Comm_free(&mpiCommQuest);
+    // Syncing is not strictly necessary, but it ensures that finalizeQuESTEnv() never
+    // completes on one process while another process is still performing simulation
+    // (though that'd be weird), and so may avoid a silly user benchmarking pitfall
+    MPI_Barrier(global_mpiComm);
+    MPI_Comm_free(&global_mpiComm);
     
-    // QuEST must finalise MPI if the user does not own it
-    if (!userOwnsMpi)
+    // Do NOT close MPI if the user owns; they may still wish to use it after QuEST!
+    if (!global_isMpiUserOwned)
         MPI_Finalize();
 
+    // Presently, comm_end() is only ever called during QuESTEnv destruction (either
+    // deliberately, or because of failed validation during QuESTEnv initialisation).
+    // This means any comm_*() call hereafter is invalid/illegal and will be prevented
+    // by validation. However, we can imagine a future where distribution gets runtime
+    // disabled while QuEST execution continues (e.g. initQuESTEnv automatically
+    // disabled distribution), and so we must indicate that communication is no longer
+    // active by overwriting comm to NULL. BEWARE that this is "hacky"; we have
+    // updated mpiComm here without MPI_Comm_dup(), but that's fine, because hereafter
+    // MPI will never be used again (illegal to re-init both MPI, and QuEST!)
+    global_mpiComm = MPI_COMM_NULL;
+    global_isMpiUserOwned = false;
+
 #endif
 }
 
@@ -187,19 +281,22 @@ int comm_getRank() {
 #if QUEST_COMPILE_MPI
 
     // if distribution was not runtime enabled (or a validation error was 
-    // triggered), every node (if many MPI processes were launched)
-    // believes it is the root rank
-    if (!comm_isInit())
+    // triggered during distributed initialisation), every process believes
+    // it is the root rank; this may lead to unavoidable error msg spam!
+    if (!comm_isActive())
         return ROOT_RANK;
 
+    // obtain the process rank within the QuEST communicator, which can
+    // differ from the global MPI process rank when users own MPI
     int rank;
-    MPI_Comm_rank(mpiCommQuest, &rank);
+    MPI_Comm_rank(global_mpiComm, &rank);
     return rank;
 
 #else
 
     // if MPI isn't compiled, we're definitely non-distributed; return main rank 
     return ROOT_RANK;
+
 #endif
 }
 
@@ -216,19 +313,25 @@ int comm_getNumNodes() {
 #if QUEST_COMPILE_MPI
 
     // if distribution was not runtime enabled (or a validation error was 
-    // triggered), every node (if many MPI processes were launched)
-    // believes it is the one and only node
-    if (!comm_isInit())
+    // triggered during distributed initialisation), every process is told
+    // it is the one and only node; this may lead to error msg spam, but
+    // appears unavoidable!
+    if (!comm_isActive())
         return 1;
 
+    // obtain the number of processes within the QuEST communicator, which
+    // can be smaller than global MPI process count when users own MPI
     int numNodes;
-    MPI_Comm_size(mpiCommQuest, &numNodes);
+    MPI_Comm_size(global_mpiComm, &numNodes);
     return numNodes;
 
 #else
 
-    // if MPI isn't compiled, we're definitely non-distributed; return single node
+    // if MPI isn't compiled, QuEST is definitely non-distributed and
+    // each process only knows itself (though users may own MPI and
+    // actually have many processes; that's none of our business!)
     return 1;
+
 #endif
 }
 
@@ -236,35 +339,13 @@ int comm_getNumNodes() {
 void comm_sync() {
 #if QUEST_COMPILE_MPI
 
-    // gracefully handle when not distributed, needed by e.g. pre-MPI-setup validation 
-    if (!comm_isInit())
+    // gracefully handle when not distributed, needed by e.g. pre-MPI-setup validation
+    if (!comm_isActive())
         return;
 
-    MPI_Barrier(mpiCommQuest);
-#endif
-}
+    MPI_Barrier(global_mpiComm);
 
-#if QUEST_COMPILE_MPI
-    MPI_Comm comm_getMpiComm() {
-        return mpiCommQuest;
-    }
-
-    #if QUEST_COMPILE_SUBCOMM
-        void comm_setMpiComm(MPI_Comm newComm) {
-
-            // error if mpiCommQuEST is already set!
-            if (mpiCommQuest != MPI_COMM_NULL) {
-                MPI_Barrier(mpiCommQuest);
-                MPI_Comm_free(&mpiCommQuest);
-                error_commDoubleSetMpiComm();
-            }
-
-            int mpi_err = MPI_Comm_dup(newComm, &mpiCommQuest);
-            if (mpi_err != MPI_SUCCESS) {
-                error_commInvalidMpiComm();
-            }
-
-            return;
-        }
-    #endif
 #endif
+
+    // do nothing at all when MPI is not compiled (user owned MPI processes go unsynced)
+}
diff --git a/quest/src/comm/comm_config.hpp b/quest/src/comm/comm_config.hpp
index b2d038cd5..826ebdf1c 100644
--- a/quest/src/comm/comm_config.hpp
+++ b/quest/src/comm/comm_config.hpp
@@ -10,34 +10,28 @@
 #ifndef COMM_CONFIG_HPP
 #define COMM_CONFIG_HPP
 
-#include "quest/include/config.h"
-
-#if QUEST_COMPILE_MPI
-  #include <mpi.h>
-#endif
-
 constexpr int ROOT_RANK = 0;
 
+// queries of MPI's global/general status (when visible)
 bool comm_isMpiCompiled();
-bool comm_isMpiSubCommunicatorCompiled();
+bool comm_isMpiSubCommCompiled();
 bool comm_isMpiGpuAware();
+bool comm_isMpiInit();
 
-void comm_init(int useDistrib, bool userOwnsMpi);
-void comm_end(bool userOwnsMpi);
+// control of QuEST's (possibly more limited) MPI env
+bool comm_isActive();
+void comm_init(bool userOwnsMpi);
+void comm_end();
 void comm_sync();
 
+// queries of QuEST's (possibly more limited) MPI env
 int comm_getRank();
 int comm_getNumNodes();
-
-bool comm_isInit();
 bool comm_isRootNode();
 bool comm_isRootNode(int rank);
 
-#if QUEST_COMPILE_MPI
-  MPI_Comm comm_getMpiComm();
-  #if QUEST_COMPILE_SUBCOMM
-    void comm_setMpiComm(MPI_Comm newComm);
-  #endif
-#endif
+// Signatures containing MPI types which callers must extern:
+// extern MPI_Comm comm_getMpiComm()
+// extern bool comm_setMpiComm(MPI_Comm newComm, bool userOwnsMpi)
 
 #endif // COMM_CONFIG_HPP
diff --git a/quest/src/comm/comm_routines.cpp b/quest/src/comm/comm_routines.cpp
index 0bc90563b..cf6956454 100644
--- a/quest/src/comm/comm_routines.cpp
+++ b/quest/src/comm/comm_routines.cpp
@@ -6,7 +6,7 @@
  * 
  * @author Tyson Jones
  * @author Jakub Adamski (sped-up large comm by asynch messages)
- * @author Oliver Brown (patched max-message inference, consulted on AR and MPICH support)
+ * @author Oliver Brown (added custom communicators, patched max-message inference, consulted on AR and MPICH support)
  * @author Ania (Anna) Brown (developed QuEST v1 logic)
  */
 
@@ -24,6 +24,7 @@
 
 #if QUEST_COMPILE_MPI
     #include <mpi.h>
+    extern MPI_Comm comm_getMpiComm(); // comm_config.cpp does not leak MPI_Comm
 #endif
 
 #include <vector>
@@ -149,8 +150,7 @@ int getMaxNumMessages() {
     // messages. Beware the max is obtained via a void pointer and might be unset...
     void* tagUpperBoundPtr;
     int isAttribSet;
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
-    MPI_Comm_get_attr(mpiCommQuest, MPI_TAG_UB, &tagUpperBoundPtr, &isAttribSet);
+    MPI_Comm_get_attr(comm_getMpiComm(), MPI_TAG_UB, &tagUpperBoundPtr, &isAttribSet);
 
     // if something went wrong with obtaining the tag bound, return the safe minimum
     if (!isAttribSet)
@@ -217,7 +217,7 @@ std::array<qindex,3> dividePayloadIntoMessages(qindex numAmps) {
 void exchangeArrays(qcomp* send, qcomp* recv, qindex numElems, int pairRank) {
 #if QUEST_COMPILE_MPI
 
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
+    MPI_Comm mpiComm = comm_getMpiComm();
 
     // each message is asynchronously dispatched with a final wait, as per arxiv.org/abs/2308.07402
 
@@ -229,8 +229,8 @@ void exchangeArrays(qcomp* send, qcomp* recv, qindex numElems, int pairRank) {
     // so that messages are permitted to arrive out-of-order (supporting UCX adaptive-routing)
     for (qindex m=0; m<numMessages; m++) {
         int tag = static_cast<int>(m); // gauranteed int, but m*messageSize needs qindex
-        MPI_Irecv(&recv[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiCommQuest, &requests[2*m]);
-        MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiCommQuest, &requests[2*m+1]);
+        MPI_Irecv(&recv[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiComm, &requests[2*m]);
+        MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiComm, &requests[2*m+1]);
     }
 
     // wait for all exchanges to complete (MPI will automatically free the request memory)
@@ -251,7 +251,7 @@ void exchangeArrays(qcomp* send, qcomp* recv, qindex numElems, int pairRank) {
 void asynchSendArray(qcomp* send, qindex numElems, int pairRank) {
 #if QUEST_COMPILE_MPI
 
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
+    MPI_Comm mpiComm = comm_getMpiComm();
 
     // we will not track nor wait for the asynch send; instead, the caller will later comm_sync()
     MPI_Request nullReq = MPI_REQUEST_NULL;
@@ -262,7 +262,7 @@ void asynchSendArray(qcomp* send, qindex numElems, int pairRank) {
     // asynchronously send the uniquely-tagged messages
     for (qindex m=0; m<numMessages; m++) {
         int tag = static_cast<int>(m); // gauranteed int, but m*messageSize needs qindex
-        MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiCommQuest, &nullReq);
+        MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiComm, &nullReq);
     }
 
 #else
@@ -274,7 +274,7 @@ void asynchSendArray(qcomp* send, qindex numElems, int pairRank) {
 void receiveArray(qcomp* dest, qindex numElems, int pairRank) {
 #if QUEST_COMPILE_MPI
 
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
+    MPI_Comm mpiComm = comm_getMpiComm();
 
     // expect the data in multiple messages
     auto [messageSize, numMessages] = dividePow2PayloadIntoMessages(numElems);
@@ -285,7 +285,7 @@ void receiveArray(qcomp* dest, qindex numElems, int pairRank) {
     // listen to receive each uniquely-tagged message asynchronously (as per arxiv.org/abs/2308.07402)
     for (qindex m=0; m<numMessages; m++) {
         int tag = static_cast<int>(m); // gauranteed int, but m*messageSize needs qindex
-        MPI_Irecv(&dest[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiCommQuest, &requests[m]);
+        MPI_Irecv(&dest[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiComm, &requests[m]);
     }
 
     // receivers wait for all messages to be received (while sender asynch proceeds)
@@ -310,8 +310,7 @@ void globallyCombineNonUniformSubArrays(
 ) {
 #if QUEST_COMPILE_MPI
 
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
-
+    auto mpiComm = comm_getMpiComm();
     int myRank = comm_getRank();
     int numNodes = comm_getNumNodes();
 
@@ -345,14 +344,14 @@ void globallyCombineNonUniformSubArrays(
         for (int m=0; m<numBigMsgs; m++) {
             qindex recvInd = globalRecvIndPerRank[sendRank] + (m * bigMsgSize);
             requests.push_back(MPI_REQUEST_NULL);
-            MPI_Ibcast(&recv[recvInd], bigMsgSize, MPI_QCOMP, sendRank, mpiCommQuest, &requests.back());
+            MPI_Ibcast(&recv[recvInd], bigMsgSize, MPI_QCOMP, sendRank, mpiComm, &requests.back());
         }
 
         // and potentially one remaining asynch message 
         if (remMsgSize > 0) {
             qindex recvInd = globalRecvIndPerRank[sendRank] + (numBigMsgs * bigMsgSize);
             requests.push_back(MPI_REQUEST_NULL);
-            MPI_Ibcast(&recv[recvInd], remMsgSize, MPI_QCOMP, sendRank, mpiCommQuest, &requests.back());
+            MPI_Ibcast(&recv[recvInd], remMsgSize, MPI_QCOMP, sendRank, mpiComm, &requests.back());
         }
     }
 
@@ -648,9 +647,7 @@ void comm_exchangeAmpsToBuffers(Qureg qureg, int pairRank) {
 void comm_broadcastAmp(int sendRank, qcomp* sendAmp) {
 #if QUEST_COMPILE_MPI
 
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
-
-    MPI_Bcast(sendAmp, 1, MPI_QCOMP, sendRank, mpiCommQuest);
+    MPI_Bcast(sendAmp, 1, MPI_QCOMP, sendRank, comm_getMpiComm());
 
 #else
     error_commButEnvNotDistributed();
@@ -661,7 +658,7 @@ void comm_broadcastAmp(int sendRank, qcomp* sendAmp) {
 void comm_sendAmpsToRoot(int sendRank, qcomp* send, qcomp* recv, qindex numAmps) {
 #if QUEST_COMPILE_MPI
 
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
+    MPI_Comm mpiComm = comm_getMpiComm();
 
     // only the sender and root nodes need to continue
     int recvRank = ROOT_RANK;
@@ -678,8 +675,8 @@ void comm_sendAmpsToRoot(int sendRank, qcomp* send, qcomp* recv, qindex numAmps)
     for (qindex m=0; m<numMessages; m++) {
         int tag = static_cast<int>(m);
         (myRank == sendRank)?
-            MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, recvRank, tag, mpiCommQuest, &requests[m]): // sender
-            MPI_Irecv(&recv[m*messageSize], messageSize, MPI_QCOMP, sendRank, tag, mpiCommQuest, &requests[m]); // root
+            MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, recvRank, tag, mpiComm, &requests[m]): // sender
+            MPI_Irecv(&recv[m*messageSize], messageSize, MPI_QCOMP, sendRank, tag, mpiComm, &requests[m]); // root
     }
 
     // wait for all exchanges to complete (MPI will automatically free the request memory)
@@ -692,13 +689,10 @@ void comm_sendAmpsToRoot(int sendRank, qcomp* send, qcomp* recv, qindex numAmps)
 
 
 void comm_broadcastIntsFromRoot(int* arr, qindex length) {
-
 #if QUEST_COMPILE_MPI
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
-
 
     int sendRank = ROOT_RANK;
-    MPI_Bcast(arr, length, MPI_INT, sendRank, mpiCommQuest);
+    MPI_Bcast(arr, length, MPI_INT, sendRank, comm_getMpiComm());
 
 #else
     error_commButEnvNotDistributed();
@@ -709,10 +703,8 @@ void comm_broadcastIntsFromRoot(int* arr, qindex length) {
 void comm_broadcastUnsignedsFromRoot(unsigned* arr, qindex length) {
 #if QUEST_COMPILE_MPI
 
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
-
     int sendRank = ROOT_RANK;
-    MPI_Bcast(arr, length, MPI_UNSIGNED, sendRank, mpiCommQuest);
+    MPI_Bcast(arr, length, MPI_UNSIGNED, sendRank, comm_getMpiComm());
 
 #else
     error_commButEnvNotDistributed();
@@ -739,9 +731,7 @@ void comm_combineSubArrays(qcomp* recv, vector<qindex> recvInds, vector<qindex>
 void comm_reduceAmp(qcomp* localAmp) {
 #if QUEST_COMPILE_MPI
 
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
-
-    MPI_Allreduce(MPI_IN_PLACE, localAmp, 1, MPI_QCOMP, MPI_SUM, mpiCommQuest);
+    MPI_Allreduce(MPI_IN_PLACE, localAmp, 1, MPI_QCOMP, MPI_SUM, comm_getMpiComm());
 
 #else
     error_commButEnvNotDistributed();
@@ -752,9 +742,7 @@ void comm_reduceAmp(qcomp* localAmp) {
 void comm_reduceReal(qreal* localReal) {
 #if QUEST_COMPILE_MPI
 
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
-
-    MPI_Allreduce(MPI_IN_PLACE, localReal, 1, MPI_QREAL, MPI_SUM, mpiCommQuest);
+    MPI_Allreduce(MPI_IN_PLACE, localReal, 1, MPI_QREAL, MPI_SUM, comm_getMpiComm());
 
 #else
     error_commButEnvNotDistributed();
@@ -765,9 +753,7 @@ void comm_reduceReal(qreal* localReal) {
 void comm_reduceReals(qreal* localReals, qindex numLocalReals) {
 #if QUEST_COMPILE_MPI
 
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
-
-    MPI_Allreduce(MPI_IN_PLACE, localReals, numLocalReals, MPI_QREAL, MPI_SUM, mpiCommQuest);
+    MPI_Allreduce(MPI_IN_PLACE, localReals, numLocalReals, MPI_QREAL, MPI_SUM, comm_getMpiComm());
 
 #else
     error_commButEnvNotDistributed();
@@ -778,12 +764,10 @@ void comm_reduceReals(qreal* localReals, qindex numLocalReals) {
 bool comm_isTrueOnAllNodes(bool val) {
 #if QUEST_COMPILE_MPI
 
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
-
     // perform global AND and broadcast result back to all nodes
     int local = (int) val;
     int global;
-    MPI_Allreduce(&local, &global, 1, MPI_INT, MPI_LAND, mpiCommQuest);
+    MPI_Allreduce(&local, &global, 1, MPI_INT, MPI_LAND, comm_getMpiComm());
     return (bool) global;
 
 #else
@@ -819,8 +803,6 @@ bool comm_isTrueOnRootNode(bool val) {
 vector<string> comm_gatherStringsToRoot(char* localChars, int maxNumLocalChars) {
 #if QUEST_COMPILE_MPI
 
-    MPI_Comm mpiCommQuest = comm_getMpiComm();
-
     // no need to validate array sizes and memory alloc successes;
     // these are trivial O(#nodes)-size arrays containing <20 chars
     int numNodes = comm_getNumNodes();
@@ -831,7 +813,7 @@ vector<string> comm_gatherStringsToRoot(char* localChars, int maxNumLocalChars)
     // all nodes send root all their local chars
     int recvRank = ROOT_RANK;
     MPI_Gather(localChars, maxNumLocalChars, MPI_CHAR, allChars.data(),
-        maxNumLocalChars, MPI_CHAR, recvRank, mpiCommQuest);
+        maxNumLocalChars, MPI_CHAR, recvRank, comm_getMpiComm());
 
     // divide allChars into stings, delimited by each node's terminal char
     vector<string> out(numNodes);
diff --git a/quest/src/core/errors.cpp b/quest/src/core/errors.cpp
index 8e61a2d1c..8879fc7a1 100644
--- a/quest/src/core/errors.cpp
+++ b/quest/src/core/errors.cpp
@@ -160,11 +160,6 @@ void error_commAlreadyInit() {
     raiseInternalError("The MPI communication environment was attemptedly re-initialised despite the QuEST environment already existing.");
 }
 
-void error_commInvalidMpiComm() {
-
-    raiseInternalError("The supplied MPI communicator was MPI_COMM_NULL, or duplication failed.");
-}
-
 void error_commButEnvNotDistributed() {
 
     raiseInternalError("A function attempted to invoke communication despite QuEST being compiled in non-distributed mode.");
@@ -190,9 +185,24 @@ void error_commNumMessagesExceedTagMax() {
     raiseInternalError("A function attempted to communicate via more messages than permitted (since there would be more uniquely-tagged messages than the tag upperbound).");
 }
 
-void error_commDoubleSetMpiComm() {
+void error_commAlreadyHasSetMpiComm() {
   
-    raiseInternalError("An attempt was made to set mpiCommQuest after it had already been set, as indicated by mpiCommQuest != MPI_COMM_NULL.");
+    raiseInternalError("An attempt was made to set the QuEST MPI communicator after it had already been set (and changed from MPI_COMM_NULL).");
+}
+
+void error_commMpiCommIsNull() {
+
+    raiseInternalError("The MPI communicator was queried but was unexpectedly MPI_COMM_NULL.");
+}
+
+void error_commNewMpiCommIsNull() {
+
+    raiseInternalError("The MPI communicator was attemptedly set to MPI_COMM_NULL, which validation should have prior caught.");
+}
+
+void error_commActiveButMpiNotInit() {
+
+    raiseInternalError("QuEST believed communication was active, but MPI_Init reported MPI was not initialised.");
 }
 
 void assert_commBoundsAreValid(Qureg qureg, qindex sendInd, qindex recvInd, qindex numAmps) {
diff --git a/quest/src/core/errors.hpp b/quest/src/core/errors.hpp
index f276c06ad..33cc182c7 100644
--- a/quest/src/core/errors.hpp
+++ b/quest/src/core/errors.hpp
@@ -81,8 +81,6 @@ void error_commNotInit();
 
 void error_commAlreadyInit();
 
-void error_commInvalidMpiComm();
-
 void error_commButEnvNotDistributed();
 
 void error_commOutOfBounds();
@@ -93,7 +91,13 @@ void error_commGivenInconsistentNumSubArraysANodes();
 
 void error_commNumMessagesExceedTagMax();
 
-void error_commDoubleSetMpiComm();
+void error_commAlreadyHasSetMpiComm();
+
+void error_commMpiCommIsNull();
+
+void error_commNewMpiCommIsNull();
+
+void error_commActiveButMpiNotInit();
 
 void assert_commBoundsAreValid(Qureg qureg, qindex sendInd, qindex recvInd, qindex numAmps);
 
diff --git a/quest/src/core/randomiser.cpp b/quest/src/core/randomiser.cpp
index 65c6da4eb..7b35a29fc 100644
--- a/quest/src/core/randomiser.cpp
+++ b/quest/src/core/randomiser.cpp
@@ -66,14 +66,14 @@ void rand_setSeeds(vector<unsigned> seeds) {
 
     // all nodes learn root node's #seeds
     unsigned numRootSeeds = seeds.size();
-    if (comm_isInit())
+    if (comm_isActive())
         comm_broadcastUnsignedsFromRoot(&numRootSeeds, 1);
 
     // all nodes ensure they have space to receive root node's seeds
     seeds.resize(numRootSeeds);
     
     // all nodes receive root seeds
-    if (comm_isInit())
+    if (comm_isActive())
         comm_broadcastUnsignedsFromRoot(seeds.data(), seeds.size());
 
     // all nodes remember seeds (in case user wishes to later recall them)
diff --git a/quest/src/core/validation.cpp b/quest/src/core/validation.cpp
index fc6adc58f..c727ad1c5 100644
--- a/quest/src/core/validation.cpp
+++ b/quest/src/core/validation.cpp
@@ -107,6 +107,21 @@ namespace report {
     string CUQUANTUM_DEPLOYED_ON_GPU_WITHOUT_MEM_POOLS =
         "Cannot use cuQuantum since your GPU does not support memory pools. Recompile with cuQuantum disabled to fall-back to using Thrust and custom kernels.";
 
+    string USER_OWNED_MPI_WAS_NOT_INIT =
+        "User owns MPI but did not prior initialise MPI before initialising QuEST.";
+
+    string USER_GIVEN_MPI_COMMUNICATOR_IS_NULL =
+        "The provided MPI communicator was null (MPI_COMM_NULL).";
+
+    string USER_GIVEN_MPI_COMMUNICATOR_FAILED_TO_SET =
+        "The provided MPI communicator could not be used; MPI_Comm_dup() was not successful.";
+
+    string QUEST_OWNED_MPI_WAS_PRE_INIT =
+        "MPI was already initialised prior to QuESTEnv initialisation, but the user did not declare MPI ownership.";
+
+    string QUEST_IS_NON_DISTRIBUTED_BUT_MPI_WAS_INIT =
+        "QuESTEnv was initialised to be non-distributed but MPI was externally initialised - this is presently unsupported due to a (very minor) technical limitation. If you need this facility, please raise a Github issue!";
+
     
     /*
      * EXISTING QUESTENV
@@ -1159,10 +1174,11 @@ void default_inputErrorHandler(const char* func, const char* msg) {
     // will then attempt to instantly abort all nodes, losing the error message.
     printer_sync();
 
-    // finalise MPI before error-exit to avoid scaring user with giant MPI error message
-    // we always "take ownership" of MPI here since we're about to kill the whole program
-    if (comm_isInit())
-        comm_end(0);
+    // finalise QuEST-owned MPI before error-exit to avoid scaring user with giant MPI crash
+    // message. note user-owned MPI is NOT killed because it's possible only SOME processes
+    // reach here, and attempting to sync/kill them would result in an MPI hang/crash anyway
+    if (comm_isActive())
+        comm_end(); // keeps user-owned MPI alive
 
     // simply exit, interrupting any other process (potentially leaking)
     exit(EXIT_FAILURE);
@@ -1344,7 +1360,7 @@ void assertAllNodesAgreeThat(bool valid, string msg, tokenSubs vars, const char*
     // when performing validation that may be non-uniform between nodes. For
     // example, mallocs may succeed on one node but fail on another due to
     // inhomogeneous loads.
-    if (comm_isInit())
+    if (comm_isActive())
         valid = comm_isTrueOnAllNodes(valid);
 
     // prepare error message only if validation will fail
@@ -1486,6 +1502,53 @@ void validate_gpuIsCuQuantumCompatible(const char* caller) {
     assertAllNodesAgreeThat(hasMemPools, report::CUQUANTUM_DEPLOYED_ON_GPU_WITHOUT_MEM_POOLS, caller);
 }
 
+void validate_mpiInitStatus(bool useDistrib, bool userOwnsMpi, const char* caller) {
+
+    // Validation prior to this function confirms init(Custom*)QuESTEnv is only ever called
+    // once, but we must additionally confirm the user has interacted with MPI legally
+
+    if (!global_isValidationEnabled)
+        return;
+
+    // We consult whether MPI itself has been initialised, NOT whether QuEST is using it
+    bool isMpiInit = comm_isMpiInit();
+
+    // (A) If the user does not declare ownership of MPI, they are forbidden to initialise it,
+    //     even when they are not distributing QuEST (i.e. useDistrib=0), just for clarity!
+    if (!userOwnsMpi)
+        assertThat(!isMpiInit, report::QUEST_OWNED_MPI_WAS_PRE_INIT, caller);
+
+    // (B) If QuEST will use MPI owned by the user, the user must have pre-initialised it
+    if (useDistrib && userOwnsMpi)
+        assertThat(isMpiInit, report::USER_OWNED_MPI_WAS_NOT_INIT, caller);
+    
+    // Confirmation that all 8 scenarios are handled:
+    //     useDistrib=0, userOwnsMpi=0, isMpiInit=0 (legal: nobody wants MPI)
+    // (A) useDistrib=0, userOwnsMpi=0, isMpiInit=1 (illegal: user lied about ownership)
+    //     useDistrib=0, userOwnsMpi=1, isMpiInit=0 (legal: user owns MPI but does nothing!)
+    //     useDistrib=0, userOwnsMpi=1, isMpiInit=1 (legal: user owns MPI, QuEST won't use it)
+    //     useDistrib=1, userOwnsMpi=0, isMpiInit=0 (legal: QuEST will init MPI)
+    // (A) useDistrib=1, userOwnsMpi=0, isMpiInit=1 (illegal: user lied about ownership)
+    // (B) useDistrib=1, userOwnsMpi=1, isMpiInit=0 (illegal: user has reponsibility to pre-init)
+    //     useDistrib=1, userOwnsMpi=1, isMpiInit=1 (legal: user fulfilled responsibility to pre-init)
+}
+
+void validate_mpiSubCommIsNonNull(bool isNonNull, const char* caller) {
+
+    if (!global_isValidationEnabled)
+        return;
+
+    assertThat(isNonNull, report::USER_GIVEN_MPI_COMMUNICATOR_IS_NULL, caller);
+}
+
+void validate_mpiSubCommSetSucceeded(bool success, const char* caller) {
+
+    if (!global_isValidationEnabled)
+        return;
+
+    assertThat(success, report::USER_GIVEN_MPI_COMMUNICATOR_FAILED_TO_SET, caller);
+}
+
 
 
 /*
diff --git a/quest/src/core/validation.hpp b/quest/src/core/validation.hpp
index 66fb8f546..787316326 100644
--- a/quest/src/core/validation.hpp
+++ b/quest/src/core/validation.hpp
@@ -77,6 +77,12 @@ void validate_newEnvNodesEachHaveUniqueGpu(const char* caller);
 
 void validate_gpuIsCuQuantumCompatible(const char* caller);
 
+void validate_mpiInitStatus(bool useDistrib, bool userOwnsMpi, const char* caller);
+
+void validate_mpiSubCommIsNonNull(bool isNonNull, const char* caller);
+
+void validate_mpiSubCommSetSucceeded(bool success, const char* caller);
+
 
 
 /*
diff --git a/quest/src/gpu/gpu_config.cpp b/quest/src/gpu/gpu_config.cpp
index 5bf4b257f..4e03217e5 100644
--- a/quest/src/gpu/gpu_config.cpp
+++ b/quest/src/gpu/gpu_config.cpp
@@ -395,7 +395,7 @@ bool gpu_areAnyNodesBoundToSameGpu() {
 #if QUEST_COMPILE_CUDA
     assert_gpuHasBeenBound(hasGpuBeenBound);
 
-    if (!comm_isInit())
+    if (!comm_isActive())
         return false;
 
     // obtain bound GPU's UUID; a unique identifier 16-char identifier
diff --git a/tests/unit/environment.cpp b/tests/unit/environment.cpp
index 344ac5864..9ecf8e376 100644
--- a/tests/unit/environment.cpp
+++ b/tests/unit/environment.cpp
@@ -158,13 +158,6 @@ TEST_CASE( "getQuESTEnv", TEST_CATEGORY ) {
 
         QuESTEnv env = getQuESTEnv();
 
-        REQUIRE( (env.isMultithreaded     == 0 || env.isMultithreaded     == 1) );
-        REQUIRE( (env.isGpuAccelerated    == 0 || env.isGpuAccelerated    == 1) );
-        REQUIRE( (env.isDistributed       == 0 || env.isDistributed       == 1) );
-        REQUIRE( (env.userOwnsMpi         == 0 || env.userOwnsMpi         == 1) );
-        REQUIRE( (env.isCuQuantumEnabled  == 0 || env.isCuQuantumEnabled  == 1) );
-        REQUIRE( (env.isGpuSharingEnabled == 0 || env.isGpuSharingEnabled == 1) );
-        
         REQUIRE( env.rank     >= 0 );
         REQUIRE( env.numNodes >= 0 );