diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index 583749df4..0cec48613 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -50,9 +50,9 @@ jobs:
run: >
cmake -B ${{ env.build_dir }}
-DCMAKE_CXX_COMPILER=clang++
- -DENABLE_TESTING=ON
- -DENABLE_MULTITHREADING=OFF
- -DFLOAT_PRECISION=${{ matrix.precision }}
+ -DQUEST_BUILD_TESTS=ON
+ -DQUEST_ENABLE_OMP=OFF
+ -DQUEST_FLOAT_PRECISION=${{ matrix.precision }}
-DCMAKE_CXX_FLAGS="${{ env.sanitiser_flags }}"
-DCMAKE_EXE_LINKER_FLAGS="${{ env.sanitiser_flags }}"
@@ -92,9 +92,9 @@ jobs:
- name: Configure CMake
run: >
cmake -B ${{ env.build_dir }}
- -DENABLE_TESTING=ON
- -DENABLE_MULTITHREADING=OFF
- -DFLOAT_PRECISION=${{ matrix.precision }}
+ -DQUEST_BUILD_TESTS=ON
+ -DQUEST_ENABLE_OMP=OFF
+ -DQUEST_FLOAT_PRECISION=${{ matrix.precision }}
- name: Compile QuEST
run: cmake --build ${{ env.build_dir }} --parallel
@@ -147,8 +147,8 @@ jobs:
run: >
cmake -B .
-DCMAKE_BUILD_TYPE=Release
- -DENABLE_TESTING=ON
- -DENABLE_MULTITHREADING=OFF
+ -DQUEST_BUILD_TESTS=ON
+ -DQUEST_ENABLE_OMP=OFF
-DCMAKE_CXX_FLAGS="--coverage"
-DCMAKE_EXE_LINKER_FLAGS="--coverage"
diff --git a/.github/workflows/compile.yml b/.github/workflows/compile.yml
index 0950b7dbb..c86de84f1 100644
--- a/.github/workflows/compile.yml
+++ b/.github/workflows/compile.yml
@@ -239,16 +239,16 @@ jobs:
- name: Configure CMake
run: >
cmake -B ${{ env.build_dir }}
- -DBUILD_EXAMPLES=ON
- -DENABLE_TESTING=ON
- -DFLOAT_PRECISION=${{ matrix.precision }}
- -DENABLE_DEPRECATED_API=${{ matrix.deprecated }}
- -DDISABLE_DEPRECATION_WARNINGS=${{ matrix.deprecated }}
- -DENABLE_MULTITHREADING=${{ matrix.omp }}
- -DENABLE_DISTRIBUTION=${{ matrix.mpi }}
- -DENABLE_CUDA=${{ matrix.cuda }}
- -DENABLE_HIP=${{ matrix.hip }}
- -DENABLE_CUQUANTUM=${{ matrix.cuquantum }}
+ -DQUEST_BUILD_EXAMPLES=ON
+ -DQUEST_BUILD_TESTS=ON
+ -DQUEST_FLOAT_PRECISION=${{ matrix.precision }}
+ -DQUEST_ENABLE_DEPRECATED_API=${{ matrix.deprecated }}
+ -DQUEST_DISABLE_DEPRECATION_WARNINGS=${{ matrix.deprecated }}
+ -DQUEST_ENABLE_OMP=${{ matrix.omp }}
+ -DQUEST_ENABLE_MPI=${{ matrix.mpi }}
+ -DQUEST_ENABLE_CUDA=${{ matrix.cuda }}
+ -DQUEST_ENABLE_HIP=${{ matrix.hip }}
+ -DQUEST_ENABLE_CUQUANTUM=${{ matrix.cuquantum }}
-DCMAKE_CUDA_ARCHITECTURES=${{ env.cuda_arch }}
-DCMAKE_HIP_ARCHITECTURES=${{ env.hip_arch }}
-DCMAKE_CXX_COMPILER=${{ matrix.compiler }}
diff --git a/.github/workflows/test_free.yml b/.github/workflows/test_free.yml
index e0837bfde..2d332e842 100644
--- a/.github/workflows/test_free.yml
+++ b/.github/workflows/test_free.yml
@@ -63,11 +63,11 @@ jobs:
- name: Configure CMake
run: >
cmake -B ${{ env.build_dir }}
- -DENABLE_TESTING=ON
- -DENABLE_MULTITHREADING=OFF
- -DENABLE_DEPRECATED_API=${{ matrix.version == 3 && 'ON' || 'OFF' }}
- -DDISABLE_DEPRECATION_WARNINGS=${{ matrix.version == 3 && 'ON' || 'OFF' }}
- -DFLOAT_PRECISION=${{ matrix.precision }}
+ -DQUEST_BUILD_TESTS=ON
+ -DQUEST_ENABLE_OMP=OFF
+ -DQUEST_ENABLE_DEPRECATED_API=${{ matrix.version == 3 && 'ON' || 'OFF' }}
+ -DQUEST_DISABLE_DEPRECATION_WARNINGS=${{ matrix.version == 3 && 'ON' || 'OFF' }}
+ -DQUEST_FLOAT_PRECISION=${{ matrix.precision }}
# force 'Release' build (needed by MSVC to enable optimisations)
- name: Compile
@@ -80,11 +80,11 @@ jobs:
# are manually excluding each integration test by name
- name: Run v4 tests
if: ${{ matrix.version == 4 }}
- run: ctest -j2 --output-on-failure --schedule-random -E "density evolution"
+ run: ctest -j2 --output-on-failure --schedule-random -C Release -E "density evolution"
working-directory: ${{ env.build_dir }}
# run v3 unit tests in random order
- name: Run v3 tests
if: ${{ matrix.version == 3 }}
- run: ctest -j2 --output-on-failure --schedule-random
+ run: ctest -j2 --output-on-failure --schedule-random -C Release
working-directory: ${{ env.depr_dir }}
diff --git a/.github/workflows/test_paid.yml b/.github/workflows/test_paid.yml
index 070592399..63518c90a 100644
--- a/.github/workflows/test_paid.yml
+++ b/.github/workflows/test_paid.yml
@@ -136,16 +136,16 @@ jobs:
- name: Configure CMake
run: >
cmake -B ${{ env.build_dir }}
- -DENABLE_TESTING=ON
- -DFLOAT_PRECISION=${{ matrix.precision }}
- -DENABLE_DEPRECATED_API=${{ matrix.version == 3 && 'ON' || 'OFF' }}
- -DENABLE_MULTITHREADING=${{ matrix.omp }}
- -DENABLE_DISTRIBUTION=${{ matrix.mpi }}
- -DENABLE_CUDA=${{ matrix.cuda }}
- -DENABLE_CUQUANTUM=${{ matrix.cuquantum }}
+ -DQUEST_BUILD_TESTS=ON
+ -DQUEST_FLOAT_PRECISION=${{ matrix.precision }}
+ -DQUEST_ENABLE_DEPRECATED_API=${{ matrix.version == 3 && 'ON' || 'OFF' }}
+ -DQUEST_ENABLE_OMP=${{ matrix.omp }}
+ -DQUEST_ENABLE_MPI=${{ matrix.mpi }}
+ -DQUEST_ENABLE_CUDA=${{ matrix.cuda }}
+ -DQUEST_ENABLE_CUQUANTUM=${{ matrix.cuquantum }}
-DCMAKE_CUDA_ARCHITECTURES=${{ env.cuda_arch }}
- -DTEST_ALL_DEPLOYMENTS=${{ env.test_all_deploys }}
- -DTEST_MAX_NUM_QUBIT_PERMUTATIONS=${{ env.num_qubit_perms }}
+ -DQUEST_TEST_TRY_ALL_DEPLOYMENTS=${{ env.test_all_deploys }}
+ -DQUEST_TEST_MAX_NUM_QUBIT_PERMUTATIONS=${{ env.num_qubit_perms }}
- name: Compile
run: cmake --build ${{ env.build_dir }} --parallel
@@ -153,8 +153,8 @@ jobs:
# specifying only env-vars with non-default values
- name: Configure tests with environment variables
run: |
- echo "TEST_MAX_NUM_QUBIT_PERMUTATIONS=${{ env.num_qubit_perms }}" >> $GITHUB_ENV
- echo "TEST_ALL_DEPLOYMENTS=${{ env.test_all_deploys }}" >> $GITHUB_ENV
+ echo "QUEST_TEST_MAX_NUM_QUBIT_PERMUTATIONS=${{ env.num_qubit_perms }}" >> $GITHUB_ENV
+ echo "QUEST_TEST_TRY_ALL_DEPLOYMENTS=${{ env.test_all_deploys }}" >> $GITHUB_ENV
# cannot use ctests when distributed, grr!
- name: Run multithreaded + distributed v4 tests (16 nodes, 4 threads eeach)
@@ -264,13 +264,13 @@ jobs:
- name: Configure CMake
run: >
cmake -B ${{ env.build_dir }}
- -DENABLE_TESTING=ON
- -DFLOAT_PRECISION=${{ matrix.precision }}
- -DENABLE_DEPRECATED_API=${{ matrix.version == 3 && 'ON' || 'OFF' }}
- -DENABLE_MULTITHREADING=${{ matrix.omp }}
- -DENABLE_DISTRIBUTION=${{ matrix.mpi }}
- -DENABLE_CUDA=${{ matrix.cuda }}
- -DENABLE_CUQUANTUM=${{ matrix.cuquantum }}
+ -DQUEST_BUILD_TESTS=ON
+ -DQUEST_FLOAT_PRECISION=${{ matrix.precision }}
+ -DQUEST_ENABLE_DEPRECATED_API=${{ matrix.version == 3 && 'ON' || 'OFF' }}
+ -DQUEST_ENABLE_OMP=${{ matrix.omp }}
+ -DQUEST_ENABLE_MPI=${{ matrix.mpi }}
+ -DQUEST_ENABLE_CUDA=${{ matrix.cuda }}
+ -DQUEST_ENABLE_CUQUANTUM=${{ matrix.cuquantum }}
-DCMAKE_CUDA_ARCHITECTURES=${{ env.cuda_arch }}
-DCMAKE_CXX_FLAGS=${{ matrix.mpi == 'ON' && matrix.cuda == 'ON' && '-fno-lto' || '' }}
@@ -280,9 +280,9 @@ jobs:
# specify only env-vars with non-default values
- name: Configure tests with environment variables
run: |
- echo "TEST_ALL_DEPLOYMENTS=${{ env.test_all_deploys }}" >> $GITHUB_ENV
- echo "TEST_NUM_MIXED_DEPLOYMENT_REPETITIONS=${{ env.test_repetitions }}" >> $GITHUB_ENV
- echo "PERMIT_NODES_TO_SHARE_GPU=${{ env.mpi_share_gpu }}" >> $GITHUB_ENV
+ echo "QUEST_TEST_TRY_ALL_DEPLOYMENTS=${{ env.test_all_deploys }}" >> $GITHUB_ENV
+ echo "QUEST_TEST_NUM_MIXED_DEPLOYMENT_REPETITIONS=${{ env.test_repetitions }}" >> $GITHUB_ENV
+ echo "QUEST_PERMIT_NODES_TO_SHARE_GPU=${{ env.mpi_share_gpu }}" >> $GITHUB_ENV
# cannot use ctests when distributed, grr!
- name: Run GPU + distributed v4 mixed tests (4 nodes sharing 1 GPU)
diff --git a/AUTHORS.txt b/AUTHORS.txt
index 907135679..b06846df8 100644
--- a/AUTHORS.txt
+++ b/AUTHORS.txt
@@ -44,6 +44,8 @@ Dr Ian Bush [consultant]
HPC
External contributors:
+Íñigo Aréjula Aísa
+ patched validation error in the experimental user-owned MPI interface (#722)
Daniel Expósito Patiño
patched the applyMultiStateControlledSqrtSwap C++ signature (#737)
Diogo Pratas Maia
@@ -70,8 +72,8 @@ SchineCompton
patched GPU Cmake Release build
Christopher J. Anders
patched Cmake build when multhithreading defaults off
- revsied Cmake min version for GPU build
+ revised Cmake min version for GPU build
Gleb Struchalin
patched the cmake standalone build
Milos Prokop
- implemented serial prototype of initDiagonalOpFromPauliHamil
\ No newline at end of file
+ implemented serial prototype of initDiagonalOpFromPauliHamil
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f91c05f83..b5a438713 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,10 +60,10 @@ endif()
# Default to "Release"
# Using recipe from Kitware Blog post
# https://www.kitware.com/cmake-and-the-default-build-type/
-set(default_build_type "Release")
+set(quest_default_build_type "Release")
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
- message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
- set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
+ message(STATUS "Setting build type to '${quest_default_build_type}' as none was specified.")
+ set(CMAKE_BUILD_TYPE "${quest_default_build_type}" CACHE
STRING "Choose the type of build." FORCE)
# Set the possible values of build type for cmake-gui
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
@@ -79,50 +79,50 @@ if(PROJECT_IS_TOP_LEVEL)
endif ()
# Library naming
-set(LIB_NAME QuEST
- CACHE
+set(QUEST_OUTPUT_LIB_NAME QuEST
+ CACHE
STRING
- "Change library name. LIB_NAME is QuEST by default."
+ "Change library name. QUEST_OUTPUT_LIB_NAME is QuEST by default."
)
-message(STATUS "Library will be named lib${LIB_NAME}. Set LIB_NAME to modify.")
+message(STATUS "Library will be named lib${QUEST_OUTPUT_LIB_NAME}. Set QUEST_OUTPUT_LIB_NAME to modify.")
-option(VERBOSE_LIB_NAME "Modify library name based on compilation configuration. Turned OFF by default." OFF)
-message(STATUS "Verbose library naming is turned ${VERBOSE_LIB_NAME}. Set VERBOSE_LIB_NAME to modify.")
+option(QUEST_APPEND_CONFIG_TO_LIB_NAME "Modify library name based on compilation configuration. Turned OFF by default." OFF)
+message(STATUS "Verbose library naming is turned ${QUEST_APPEND_CONFIG_TO_LIB_NAME}. Set QUEST_APPEND_CONFIG_TO_LIB_NAME to modify.")
# Precision
-set(FLOAT_PRECISION 2
- CACHE
- STRING
+set(QUEST_FLOAT_PRECISION 2
+ CACHE
+ STRING
"Whether to use single, double, or quad floating point precision in the state vector. {1,2,4}"
)
-set_property(CACHE FLOAT_PRECISION PROPERTY STRINGS
+set_property(CACHE QUEST_FLOAT_PRECISION PROPERTY STRINGS
1
2
4
)
-message(STATUS "Precision set to ${FLOAT_PRECISION}. Set FLOAT_PRECISION to modify.")
+message(STATUS "Precision set to ${QUEST_FLOAT_PRECISION}. Set QUEST_FLOAT_PRECISION to modify.")
# Examples
option(
- BUILD_EXAMPLES
+ QUEST_BUILD_EXAMPLES
"Whether the example programs will be built alongside the QuEST library. Turned OFF by default."
OFF
)
-message(STATUS "Examples are turned ${BUILD_EXAMPLES}. Set BUILD_EXAMPLES to modify.")
+message(STATUS "Examples are turned ${QUEST_BUILD_EXAMPLES}. Set QUEST_BUILD_EXAMPLES to modify.")
# Testing
option(
- ENABLE_TESTING
+ QUEST_BUILD_TESTS
"Whether the test suite will be built alongside the QuEST library. Turned ON by default."
OFF
)
-message(STATUS "Testing is turned ${ENABLE_TESTING}. Set ENABLE_TESTING to modify.")
+message(STATUS "Testing is turned ${QUEST_BUILD_TESTS}. Set QUEST_BUILD_TESTS to modify.")
option(
- DOWNLOAD_CATCH2
+ QUEST_TESTS_DOWNLOAD_CATCH2
"Whether Catch2 v3 will be downloaded if it is not found. Turned ON by default."
ON
)
@@ -130,61 +130,97 @@ option(
# Multithreading
option(
- ENABLE_MULTITHREADING
- "Whether QuEST will be built with shared-memory parallelism support using OpenMP. Turned ON by default."
+ QUEST_ENABLE_OMP
+ "Whether QuEST will be built with shared-memory parallelism support using OpenMP. Turned ON by default."
ON
)
-message(STATUS "Multithreading is turned ${ENABLE_MULTITHREADING}. Set ENABLE_MULTITHREADING to modify.")
+message(STATUS "Multithreading is turned ${QUEST_ENABLE_OMP}. Set QUEST_ENABLE_OMP to modify.")
+
+
+# NUMA
+option(
+ QUEST_ENABLE_NUMA
+ "Whether QuEST will be built with NUMA awareness, when also using OpenMP. Turned ON by default."
+ ON
+)
+message(STATUS "NUMA awareness is turned ${QUEST_ENABLE_NUMA}. Set QUEST_ENABLE_NUMA to modify.")
# Distribution
option(
- ENABLE_DISTRIBUTION
- "Whether QuEST will be built with distributed parallelism support using MPI. Turned OFF by default."
+ QUEST_ENABLE_MPI
+ "Whether QuEST will be built with distributed parallelism support using MPI. Turned OFF by default."
OFF
)
-message(STATUS "Distribution is turned ${ENABLE_DISTRIBUTION}. Set ENABLE_DISTRIBUTION to modify.")
+message(STATUS "Distribution is turned ${QUEST_ENABLE_MPI}. Set QUEST_ENABLE_MPI to modify.")
+
+option(
+ QUEST_ENABLE_SUBCOMM
+ "Whether QuEST will be built with support for restricting it to a user-defined MPI communicator. Turned OFF by default."
+ OFF
+)
+message(STATUS "Custom communicator support is turned ${QUEST_ENABLE_SUBCOMM}. Set QUEST_ENABLE_SUBCOMM to modify.")
# GPU Acceleration
option(
- ENABLE_CUDA
+ QUEST_ENABLE_CUDA
"Whether QuEST will be built with support for NVIDIA GPU acceleration. Turned OFF by default."
OFF
)
-message(STATUS "NVIDIA GPU acceleration is turned ${ENABLE_CUDA}. Set ENABLE_CUDA to modify.")
+message(STATUS "NVIDIA GPU acceleration is turned ${QUEST_ENABLE_CUDA}. Set QUEST_ENABLE_CUDA to modify.")
option(
- ENABLE_CUQUANTUM
+ QUEST_ENABLE_CUQUANTUM
"Whether QuEST will be built with support for NVIDIA cuQuantum. Turned OFF by default."
OFF
)
-message(STATUS "CuQuantum support is turned ${ENABLE_CUQUANTUM}. Set ENABLE_CUQUANTUM to modify.")
+message(STATUS "CuQuantum support is turned ${QUEST_ENABLE_CUQUANTUM}. Set QUEST_ENABLE_CUQUANTUM to modify.")
option(
- ENABLE_HIP
+ QUEST_ENABLE_HIP
"Whether QuEST will be built with support for AMD GPU acceleration. Turned OFF by default."
OFF
)
-message(STATUS "AMD GPU acceleration is turned ${ENABLE_HIP}. Set ENABLE_HIP to modify.")
+message(STATUS "AMD GPU acceleration is turned ${QUEST_ENABLE_HIP}. Set QUEST_ENABLE_HIP to modify.")
+
+
+# GPU Performance Tuning
+# (We do not print this value when configuring CMake as it is for advanced users only)
+
+set(quest_tpb_description # (the games we play for multi-line set() strings!)
+ "The default number of threads per block QuEST will use when offloading to a GPU. Set to 128 by default. "
+ "Must be a multiple of 32 (on NVIDIA GPUs) or 64 (on AMD GPUs). Can be overridden at executable launch "
+ "via an environment variable of the same name, or during runtime via a corresponding API setter function."
+)
+set(QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK 128
+ CACHE STRING
+ "${quest_tpb_description}")
+mark_as_advanced(QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK)
# Deprecated API
option(
- ENABLE_DEPRECATED_API
+ QUEST_ENABLE_DEPRECATED_API
"Whether QuEST will be built with deprecated API support. Turned OFF by default."
OFF
)
-message(STATUS "Deprecated API support is turned ${ENABLE_DEPRECATED_API}. Set ENABLE_DEPRECATED_API to modify.")
+message(STATUS "Deprecated API support is turned ${QUEST_ENABLE_DEPRECATED_API}. Set QUEST_ENABLE_DEPRECATED_API to modify.")
option(
- DISABLE_DEPRECATION_WARNINGS
+ QUEST_DISABLE_DEPRECATION_WARNINGS
"Whether to disable compile-time warnings ordinarily triggered by use of the deprecated API. Turned OFF by default."
OFF
)
-message(STATUS "Disabling of deprecated API warnings is turned ${DISABLE_DEPRECATION_WARNINGS}. Set DISABLE_DEPRECATION_WARNINGS to modify.")
+message(STATUS
+ "Disabling of deprecated API warnings is turned ${QUEST_DISABLE_DEPRECATION_WARNINGS}. "
+ "Set QUEST_DISABLE_DEPRECATION_WARNINGS to modify."
+)
-option(INSTALL_BINARIES "Whether to include example and user binaries in the install." OFF)
+option(QUEST_INSTALL_BINARIES "Whether to include example and user binaries in the install." OFF)
+if (QUEST_INSTALL_BINARIES)
+ message(STATUS "Including example and user binaries in the install (if built).")
+endif()
@@ -193,38 +229,74 @@ option(INSTALL_BINARIES "Whether to include example and user binaries in the ins
# ============================
-if (ENABLE_CUDA AND ENABLE_HIP)
+if (QUEST_ENABLE_CUDA AND QUEST_ENABLE_HIP)
message(FATAL_ERROR "QuEST cannot support CUDA and HIP simultaneously.")
endif()
-if ((ENABLE_CUDA OR ENABLE_HIP) AND FLOAT_PRECISION STREQUAL 4)
+if ((QUEST_ENABLE_CUDA OR QUEST_ENABLE_HIP) AND QUEST_FLOAT_PRECISION STREQUAL 4)
message(FATAL_ERROR "Quad precision is not supported on GPU. Please disable GPU acceleration or lower precision.")
endif()
-if (ENABLE_CUQUANTUM AND NOT ENABLE_CUDA)
+if (QUEST_ENABLE_CUQUANTUM AND NOT QUEST_ENABLE_CUDA)
message(FATAL_ERROR "Use of cuQuantum requires CUDA.")
endif()
+if (QUEST_ENABLE_SUBCOMM AND NOT QUEST_ENABLE_MPI)
+ message(FATAL_ERROR "Distribution must be enabled to make use of a user-defined communicator for QuEST.")
+endif()
+
+
if(WIN32)
# Force MSVC to export all symbols in a shared library, like GCC and clang
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
- if (ENABLE_TESTING AND BUILD_SHARED_LIBS)
+ if (QUEST_BUILD_TESTS AND BUILD_SHARED_LIBS)
message(WARNING "Compiling the tests on Windows requires BUILD_SHARED_LIBS=OFF which we now force.")
set(BUILD_SHARED_LIBS OFF)
endif()
- if (ENABLE_DEPRECATED_API)
+ if (QUEST_ENABLE_DEPRECATED_API)
message(FATAL_ERROR "The deprecated API is not compatible with MSVC.")
endif()
endif()
+# validate numTPB even when GPU not compiled
+if (QUEST_ENABLE_HIP)
+ set(quest_warp_size 64)
+ set(quest_gpu_model "AMD GPUs (via HIP)")
+else()
+ set(quest_warp_size 32)
+ set(quest_gpu_model "NVIDIA GPUs (via CUDA), or when not targeting GPUs")
+endif()
+math(EXPR quest_tpb_remainder "${QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK} % ${quest_warp_size}")
+if ((NOT (quest_tpb_remainder EQUAL 0)) OR NOT (QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK GREATER 0))
+ message(FATAL_ERROR
+ "QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK was set to ${QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK}, "
+ "but it must be a positive multiple of ${quest_warp_size} when compiling for ${quest_gpu_model}."
+ )
+endif()
+
+
+# warn when numTPB will be later overridden by the current environment variable
+if(
+ DEFINED ENV{QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK}
+ AND NOT "$ENV{QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK}" STREQUAL ""
+ AND NOT "$ENV{QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK}" STREQUAL "${QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK}"
+)
+ message(WARNING
+ "The CMake option QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK=${QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK} "
+ "differs from the current environment variable (of the same name) value of $ENV{QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK}. "
+ "If not cleared before QuEST is launched, the latter will override the former."
+ )
+endif()
+
+
# Encourage high-performance Release build
# Taken from Kitware's exmaple of problematic code at
@@ -251,32 +323,32 @@ endif()
# ============================
-if (VERBOSE_LIB_NAME)
+if (QUEST_APPEND_CONFIG_TO_LIB_NAME)
- string(CONCAT LIB_NAME ${LIB_NAME} "-fp${FLOAT_PRECISION}")
+ string(CONCAT QUEST_OUTPUT_LIB_NAME ${QUEST_OUTPUT_LIB_NAME} "-fp${QUEST_FLOAT_PRECISION}")
- if (ENABLE_MULTITHREADING)
- string(CONCAT LIB_NAME ${LIB_NAME} "+mt")
+ if (QUEST_ENABLE_OMP)
+ string(CONCAT QUEST_OUTPUT_LIB_NAME ${QUEST_OUTPUT_LIB_NAME} "+mt")
endif()
- if (ENABLE_DISTRIBUTION)
- string(CONCAT LIB_NAME ${LIB_NAME} "+mpi")
+ if (QUEST_ENABLE_MPI)
+ string(CONCAT QUEST_OUTPUT_LIB_NAME ${QUEST_OUTPUT_LIB_NAME} "+mpi")
endif()
- if (ENABLE_CUDA)
- string(CONCAT LIB_NAME ${LIB_NAME} "+cuda")
+ if (QUEST_ENABLE_CUDA)
+ string(CONCAT QUEST_OUTPUT_LIB_NAME ${QUEST_OUTPUT_LIB_NAME} "+cuda")
endif()
- if (ENABLE_HIP)
- string(CONCAT LIB_NAME ${LIB_NAME} "+hip")
+ if (QUEST_ENABLE_HIP)
+ string(CONCAT QUEST_OUTPUT_LIB_NAME ${QUEST_OUTPUT_LIB_NAME} "+hip")
endif()
- if (ENABLE_CUQUANTUM)
- string(CONCAT LIB_NAME ${LIB_NAME} "+cuquantum")
+ if (QUEST_ENABLE_CUQUANTUM)
+ string(CONCAT QUEST_OUTPUT_LIB_NAME ${QUEST_OUTPUT_LIB_NAME} "+cuquantum")
endif()
- if (ENABLE_DEPRECATED_API)
- string(CONCAT LIB_NAME ${LIB_NAME} "+depr")
+ if (QUEST_ENABLE_DEPRECATED_API)
+ string(CONCAT QUEST_OUTPUT_LIB_NAME ${QUEST_OUTPUT_LIB_NAME} "+depr")
endif()
endif()
@@ -313,7 +385,7 @@ set_target_properties(QuEST PROPERTIES
# while the source code is entirely C++ and requires C++17,
# and the tests further require C++20 (handled in tests/).
# Yet, we here specify C++17 for the source, and C11 as only
-# applies to the C interface when users specify USER_SOURCE,
+# applies to the C interface when users specify USER_SOURCE_NAMES,
# to attemptedly minimise user confusion. Users wishing to
# link QuEST with C++14 should separate compilation.
target_compile_features(QuEST
@@ -344,7 +416,7 @@ target_compile_options(QuEST
# OpenMP
-if (ENABLE_MULTITHREADING)
+if (QUEST_ENABLE_OMP)
# find OpenMP, but fail gracefully...
find_package(OpenMP QUIET)
@@ -375,35 +447,38 @@ endif()
# NUMA (only relevant when multithreading)
-if (ENABLE_MULTITHREADING)
+if (QUEST_ENABLE_OMP AND QUEST_ENABLE_NUMA)
# Find NUMA - location of NUMA headers
if (WIN32)
- set(NUMA_AWARE 0)
+ set(QUEST_ENABLE_NUMA 0)
message(WARNING "Building on Windows, QuEST will not be aware of numa locality")
else()
include(FindPkgConfig)
pkg_search_module(NUMA numa IMPORTED_TARGET GLOBAL)
if (${NUMA_FOUND})
- set(NUMA_AWARE ${NUMA_FOUND})
+ set(QUEST_ENABLE_NUMA ${NUMA_FOUND})
target_link_libraries(QuEST PRIVATE PkgConfig::NUMA)
message(STATUS "NUMA awareness is enabled.")
else()
- set(NUMA_AWARE 0)
+ set(QUEST_ENABLE_NUMA 0)
message(WARNING "libnuma not found, QuEST will not be aware of numa locality")
endif()
endif()
else()
- set(NUMA_AWARE 0)
+ set(QUEST_ENABLE_NUMA 0)
endif()
# MPI
-if (ENABLE_DISTRIBUTION)
+if (QUEST_ENABLE_MPI)
find_package(MPI REQUIRED
+ # Component CXX is the C api usable from C++
+ # NOT the deprecated C++ API
COMPONENTS CXX
)
+
target_link_libraries(QuEST
PRIVATE
MPI::MPI_CXX
@@ -412,7 +487,7 @@ endif()
# CUDA
-if (ENABLE_CUDA)
+if (QUEST_ENABLE_CUDA)
# make nvcc use user cxx-compiler as default host (before cuda-host is set below)
if (NOT DEFINED CMAKE_CUDA_HOST_COMPILER)
@@ -437,7 +512,7 @@ endif()
# HIP
-if (ENABLE_HIP)
+if (QUEST_ENABLE_HIP)
# if generation fails (hip::amdhip64 not found), users can try setting
# CMAKE_MODULE_PATH to '/opt/rocm/cmake' or '/opt/rocm/hip/lib/cmake/hip'
@@ -460,7 +535,7 @@ endif()
# cuQuantum
-if (ENABLE_CUQUANTUM)
+if (QUEST_ENABLE_CUQUANTUM)
find_package(CUQUANTUM REQUIRED)
target_link_libraries(QuEST PRIVATE CUQUANTUM::cuStateVec)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
@@ -474,28 +549,30 @@ endif()
# set vars which will be written to config.h.in (auto-converted to 0 or 1)
-set(COMPILE_OPENMP ${ENABLE_MULTITHREADING})
-set(COMPILE_MPI ${ENABLE_DISTRIBUTION})
-set(COMPILE_CUQUANTUM ${ENABLE_CUQUANTUM})
-set(INCLUDE_DEPRECATED_FUNCTIONS ${ENABLE_DEPRECATED_API})
+set(QUEST_COMPILE_OMP ${QUEST_ENABLE_OMP})
+set(QUEST_COMPILE_MPI ${QUEST_ENABLE_MPI})
+set(QUEST_COMPILE_SUBCOMM ${QUEST_ENABLE_SUBCOMM})
+set(QUEST_COMPILE_CUQUANTUM ${QUEST_ENABLE_CUQUANTUM})
+set(QUEST_INCLUDE_DEPRECATED_FUNCTIONS ${QUEST_ENABLE_DEPRECATED_API})
# (for the love of God cmake, create a concise syntax for this)
-if (ENABLE_CUDA OR ENABLE_HIP)
- set(COMPILE_CUDA 1)
+if (QUEST_ENABLE_CUDA OR QUEST_ENABLE_HIP)
+ set(QUEST_COMPILE_CUDA 1)
else()
- set(COMPILE_CUDA 0)
+ set(QUEST_COMPILE_CUDA 0)
endif()
+set(QUEST_COMPILE_HIP ${QUEST_ENABLE_HIP})
-# these vars are already set, but repeated here for clarity
-set(FLOAT_PRECISION ${FLOAT_PRECISION})
-set(NUMA_AWARE ${NUMA_AWARE})
-set(DISABLE_DEPRECATION_WARNINGS ${DISABLE_DEPRECATION_WARNINGS})
+# non-binary set vars which will be written to config.h.in (with a differing name)
+set(QUEST_UNSPECIFIED_DEFAULT_NUM_GPU_THREADS_PER_BLOCK ${QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK})
-# these do not appear in src but are saved for record-keeping in config.h.in
-set(COMPILE_HIP ${ENABLE_HIP})
+# these vars are already set (cmake name matches the macro name), but repeated here for clarity
+set(QUEST_FLOAT_PRECISION ${QUEST_FLOAT_PRECISION})
+set(QUEST_ENABLE_NUMA ${QUEST_ENABLE_NUMA})
+set(QUEST_DISABLE_DEPRECATION_WARNINGS ${QUEST_DISABLE_DEPRECATION_WARNINGS})
@@ -511,7 +588,7 @@ endif()
# Set output name
-set_target_properties(QuEST PROPERTIES OUTPUT_NAME ${LIB_NAME})
+set_target_properties(QuEST PROPERTIES OUTPUT_NAME ${QUEST_OUTPUT_LIB_NAME})
# Add source files
@@ -530,7 +607,11 @@ add_executable(min_example
)
target_link_libraries(min_example PRIVATE QuEST::QuEST)
-if (INSTALL_BINARIES)
+if (QUEST_ENABLE_MPI AND QUEST_ENABLE_SUBCOMM)
+ target_link_libraries(min_example PRIVATE MPI::MPI_CXX)
+endif()
+
+if (QUEST_INSTALL_BINARIES)
install(TARGETS min_example
RUNTIME
DESTINATION ${CMAKE_INSTALL_BINDIR}
@@ -539,7 +620,7 @@ endif ()
# all examples optionally built
-if (BUILD_EXAMPLES)
+if (QUEST_BUILD_EXAMPLES)
add_subdirectory(examples)
endif()
@@ -576,26 +657,26 @@ setup_quest_rpath(min_example)
# validate
-if (USER_SOURCE AND NOT OUTPUT_EXE)
- message(SEND_ERROR "USER_SOURCE specified, but not OUTPUT_EXE.")
+if (USER_SOURCE_NAMES AND NOT USER_OUTPUT_EXE_NAME)
+ message(SEND_ERROR "USER_SOURCE_NAMES specified, but not USER_OUTPUT_EXE_NAME.")
endif()
-if (OUTPUT_EXE AND NOT USER_SOURCE)
- message(SEND_ERROR "OUTPUT_EXE specified, but not USER_SOURCE.")
+if (USER_OUTPUT_EXE_NAME AND NOT USER_SOURCE_NAMES)
+ message(SEND_ERROR "USER_OUTPUT_EXE_NAME specified, but not USER_SOURCE_NAMES.")
endif()
# compile user source
-if (USER_SOURCE AND OUTPUT_EXE)
- message(STATUS "Compiling ${USER_SOURCE} to executable ${OUTPUT_EXE}.")
+if (USER_SOURCE_NAMES AND USER_OUTPUT_EXE_NAME)
+ message(STATUS "Compiling ${USER_SOURCE_NAMES} to executable ${USER_OUTPUT_EXE_NAME}.")
- add_executable(${OUTPUT_EXE} ${USER_SOURCE})
- target_link_libraries(${OUTPUT_EXE} PUBLIC QuEST)
+ add_executable(${USER_OUTPUT_EXE_NAME} ${USER_SOURCE_NAMES})
+ target_link_libraries(${USER_OUTPUT_EXE_NAME} PUBLIC QuEST)
- if (INSTALL_BINARIES)
- install(TARGETS ${OUTPUT_EXE} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+ if (QUEST_INSTALL_BINARIES)
+ install(TARGETS ${USER_OUTPUT_EXE_NAME} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()
- setup_quest_rpath(${OUTPUT_EXE})
+ setup_quest_rpath(${USER_OUTPUT_EXE_NAME})
endif()
@@ -605,14 +686,14 @@ endif()
# ============================
-if (ENABLE_TESTING)
+if (QUEST_BUILD_TESTS)
# try find Catch2
set(CatchVersion 3.8.0)
find_package(Catch2 ${CatchVersion} QUIET)
# else try download Catch2
- if (NOT TARGET Catch2::Catch2 AND DOWNLOAD_CATCH2)
+ if (NOT TARGET Catch2::Catch2 AND QUEST_TESTS_DOWNLOAD_CATCH2)
message(STATUS "Catch2 not found, it will be downloaded and built in the build directory.")
Include(FetchContent)
@@ -654,12 +735,12 @@ install(TARGETS QuEST
# Write CMake version file for QuEST
-set(QuEST_INSTALL_CONFIGDIR "${CMAKE_INSTALL_LIBDIR}/cmake/QuEST")
+set(quest_install_config_dir "${CMAKE_INSTALL_LIBDIR}/cmake/QuEST")
# Write QuESTConfigVersion.cmake
write_basic_package_version_file(
- "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}ConfigVersion.cmake"
+ "${CMAKE_CURRENT_BINARY_DIR}/${QUEST_OUTPUT_LIB_NAME}ConfigVersion.cmake"
VERSION ${PROJECT_VERSION}
COMPATIBILITY AnyNewerVersion
)
@@ -668,16 +749,16 @@ write_basic_package_version_file(
# Configure QuESTConfig.cmake (from template)
configure_package_config_file(
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/QuESTConfig.cmake.in"
- "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}Config.cmake"
- INSTALL_DESTINATION "${QuEST_INSTALL_CONFIGDIR}"
+ "${CMAKE_CURRENT_BINARY_DIR}/${QUEST_OUTPUT_LIB_NAME}Config.cmake"
+ INSTALL_DESTINATION "${quest_install_config_dir}"
)
# Install them
install(FILES
- "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}Config.cmake"
- "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}ConfigVersion.cmake"
- DESTINATION "${QuEST_INSTALL_CONFIGDIR}"
+ "${CMAKE_CURRENT_BINARY_DIR}/${QUEST_OUTPUT_LIB_NAME}Config.cmake"
+ "${CMAKE_CURRENT_BINARY_DIR}/${QUEST_OUTPUT_LIB_NAME}ConfigVersion.cmake"
+ DESTINATION "${quest_install_config_dir}"
)
install(FILES
@@ -699,9 +780,9 @@ install(
install(
EXPORT QuESTTargets
- FILE "${LIB_NAME}Targets.cmake"
+ FILE "${QUEST_OUTPUT_LIB_NAME}Targets.cmake"
NAMESPACE QuEST::
- DESTINATION "${QuEST_INSTALL_CONFIGDIR}"
+ DESTINATION "${quest_install_config_dir}"
)
if(PROJECT_IS_TOP_LEVEL)
diff --git a/cmake/QuESTConfig.cmake.in b/cmake/QuESTConfig.cmake.in
index 5f112d9a4..76f7ff3d6 100644
--- a/cmake/QuESTConfig.cmake.in
+++ b/cmake/QuESTConfig.cmake.in
@@ -1,5 +1,5 @@
# @author Erich Essmann
-# @author Luc Jaulmes (patched use of LIB_NAME)
+# @author Luc Jaulmes (patched use of QUEST_OUTPUT_LIB_NAME)
@PACKAGE_INIT@
-include("${CMAKE_CURRENT_LIST_DIR}/@LIB_NAME@Targets.cmake")
+include("${CMAKE_CURRENT_LIST_DIR}/@QUEST_OUTPUT_LIB_NAME@Targets.cmake")
diff --git a/docs/cmake.md b/docs/cmake.md
index d3c23ee4c..fec90d76a 100644
--- a/docs/cmake.md
+++ b/docs/cmake.md
@@ -11,7 +11,7 @@
Version 4 of QuEST includes reworked CMake to support library builds, CMake export, and installation. Here we detail useful variables to configure the compilation of QuEST. If using a Unix-like operating system, any of these variables can be set using the `-D` flag when invoking CMake, for example:
```
-cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/opt/QuEST -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DENABLE_MULTITHREADING=ON -DENABLE_DISTRIBUTION=OFF ./
+cmake -Bbuild -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/opt/QuEST -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DQUEST_ENABLE_OMP=ON -DQUEST_ENABLE_MPI=OFF ./
```
Then, as detailed in [`compile.md`](compile.md), one need only move to the build directory and compile by invoking make:
@@ -32,21 +32,23 @@ make
| Variable | (Default) Values | Notes |
| -------- | ---------------- | ----- |
-| `LIB_NAME` | (`QuEST`), String | The QuEST library will be named `lib${LIB_NAME}.so`. Can be used to differentiate multiple versions of QuEST which have been compiled. |
-| `VERBOSE_LIB_NAME` | (`OFF`), `ON` | When turned on `LIB_NAME` will be modified according to the other configuration options chosen. For example compiling QuEST with multithreading, distribution, and double precision with `VERBOSE_LIB_NAME` turned on creates `libQuEST-fp2+mt+mpi.so`. |
-| `FLOAT_PRECISION` | (`2`), `1`, `4` | Determines which floating-point precision QuEST will use: double, single, or quad. *Note: Quad precision is not supported when also compiling for GPU.* |
-| `BUILD_EXAMPLES` | (`OFF`), `ON` | Determines whether the example programs will be built alongside QuEST. Note that `min_example` is always built. |
-| `INSTALL_BINARIES` | (`OFF`), `ON` | Determines whether compiled binaries such as the examples will be installed as well as the QuEST library. |
-| `ENABLE_MULTITHREADING` | (`ON`), `OFF` | Determines whether QuEST will be built with support for parallelisation with OpenMP. |
-| `ENABLE_DISTRIBUTION` | (`OFF`), `ON` | Determines whether QuEST will be built with support for parallelisation with MPI. |
-| `ENABLE_CUDA` | (`OFF`), `ON` | Determines whether QuEST will be built with support for NVIDIA GPU acceleration. If turned on, `CMAKE_CUDA_ARCHITECTURES` should probably also be set. |
-| `ENABLE_CUQUANTUM` | (`OFF`), `ON` | Determines whether QuEST will make use of the NVIDIA CuQuantum library. Cannot be turned on if `ENABLE_CUDA` is off. |
-| `ENABLE_HIP` | (`OFF`), `ON` | Determines whether QuEST will be built with support for AMD GPU acceleration. If turned on, `CMAKE_HIP_ARCHITECTURES` should probably also be set. |
-| `ENABLE_DEPRECATED_API` | (`OFF`), `ON` | Determines whether QuEST will be built with support for the deprecated (v3) API. ***Note**: this will generate compiler warnings and is not supported by MSVC.* |
-| `DISABLE_DEPRECATION_WARNINGS` | (`OFF`), `ON` | Whether to disable the compile-time deprecation warnings when using the deprecated (v3) API. |
-| `USER_SOURCE` | (Undefined), String | The source file for a user program which will be compiled alongside QuEST. `OUTPUT_EXE` *must* also be defined. |
-| `OUTPUT_EXE` | (Undefined), String | The name of the executable which will be created from the provided `USER_SOURCE`. `USER_SOURCE` *must* also be defined. |
-
+| `QUEST_OUTPUT_LIB_NAME` | (`QuEST`), String | The QuEST library will be named `lib${QUEST_OUTPUT_LIB_NAME}.so`. Can be used to differentiate multiple versions of QuEST which have been compiled. |
+| `QUEST_APPEND_CONFIG_TO_LIB_NAME` | (`OFF`), `ON` | When turned on `QUEST_OUTPUT_LIB_NAME` will be modified according to the other configuration options chosen. For example compiling QuEST with multithreading, distribution, and double precision with `QUEST_APPEND_CONFIG_TO_LIB_NAME` turned on creates `libQuEST-fp2+mt+mpi.so`. |
+| `QUEST_FLOAT_PRECISION` | (`2`), `1`, `4` | Determines which floating-point precision QuEST will use: double, single, or quad. *Note: Quad precision is not supported when also compiling for GPU.* |
+| `QUEST_BUILD_EXAMPLES` | (`OFF`), `ON` | Determines whether the example programs will be built alongside QuEST. Note that `min_example` is always built. |
+| `QUEST_INSTALL_BINARIES` | (`OFF`), `ON` | Determines whether compiled binaries such as the examples will be installed as well as the QuEST library. |
+| `QUEST_ENABLE_OMP` | (`ON`), `OFF` | Determines whether QuEST will be built with support for parallelisation with OpenMP. |
+| `QUEST_ENABLE_NUMA` | (`ON`), `OFF` | Determines whether QuEST will attempt to build with NUMA awareness when OpenMP is also enabled. |
+| `QUEST_ENABLE_MPI` | (`OFF`), `ON` | Determines whether QuEST will be built with support for parallelisation with MPI. |
+| `QUEST_ENABLE_SUBCOMM` | (`OFF`), `ON` | Determines whether QuEST will be built with support for custom MPI communicators. _**Note**: This has the unfortunate side-effect of requiring the MPI header in the public header for QuEST, meaning MPI will become a dependency of any application or library which includes the QuEST header whether it uses MPI or not._ |
+| `QUEST_ENABLE_CUDA` | (`OFF`), `ON` | Determines whether QuEST will be built with support for NVIDIA GPU acceleration. If turned on, `CMAKE_CUDA_ARCHITECTURES` should probably also be set. |
+| `QUEST_ENABLE_CUQUANTUM` | (`OFF`), `ON` | Determines whether QuEST will make use of the NVIDIA CuQuantum library. Cannot be turned on if `QUEST_ENABLE_CUDA` is off. |
+| `QUEST_ENABLE_HIP` | (`OFF`), `ON` | Determines whether QuEST will be built with support for AMD GPU acceleration. If turned on, `CMAKE_HIP_ARCHITECTURES` should probably also be set. |
+| `QUEST_ENABLE_DEPRECATED_API` | (`OFF`), `ON` | Determines whether QuEST will be built with support for the deprecated (v3) API. ***Note**: this will generate compiler warnings and is not supported by MSVC.* |
+| `QUEST_DISABLE_DEPRECATION_WARNINGS` | (`OFF`), `ON` | Whether to disable the compile-time deprecation warnings when using the deprecated (v3) API. |
+| `USER_SOURCE_NAMES` | (Undefined), String | The source file for a user program which will be compiled alongside QuEST. `USER_OUTPUT_EXE_NAME` *must* also be defined. |
+| `USER_OUTPUT_EXE_NAME` | (Undefined), String | The name of the executable which will be created from the provided `USER_SOURCE_NAMES`. `USER_SOURCE_NAMES` *must* also be defined. |
+| `QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK` | (128), Number | The default number of threads per block QuEST will use when offloading to a GPU. *Must* be a multiple of 32 (on NVIDIA GPUs) or 64 (on AMD GPUs). This CMake variable sets the default if not later overridden. The number can be overridden at process launch time using an [environment variable](https://quest-kit.github.io/QuEST/group__modes.html#gaf1b71f54d270d3353fe072c66827339b) of the same name, or during runtime using [`setQuESTNumGpuThreadsPerBlock()`](https://quest-kit.github.io/QuEST/group__experimental.html#gae35a55c6d9366ce677e6aaaf4c1ff5ef). |
@@ -56,11 +58,11 @@ make
| Variable | (Default) Values | Notes |
| -------- | ---------------- | ----- |
-| `ENABLE_TESTING` | (`OFF`), `ON` | Determines whether to additionally build QuEST's unit and integration tests. If built, tests can be run from the `build` directory with `make test`, or `ctest`, or manually launched with `./tests/tests` which enables distribution (i.e. `mpirun -np 8 ./tests/tests`) |
-| `ENABLE_DEPRECATED_API` | (`OFF`), `ON` | As described above. When enabled alongside testing, the `v3 deprecated` unit tests will additionally be compiled and can be run from within `build` via `cd tests/deprecated; ctest`, or manually launched with `./tests/deprecated/dep_tests` (enabling distribution, as above).
-| `DOWNLOAD_CATCH2` | (`ON`), `OFF` | QuEST's tests require Catch2. By default, if you don't have Catch2 installed (or CMake doesn't find it) it will be downloaded from Github and built for you. If you don't want that to happen, for example because you _do_ have Catch2 installed, set this to `OFF`. |
+| `QUEST_BUILD_TESTS` | (`OFF`), `ON` | Determines whether to additionally build QuEST's unit and integration tests. If built, tests can be run from the `build` directory with `make test`, or `ctest`, or manually launched with `./tests/tests` which enables distribution (i.e. `mpirun -np 8 ./tests/tests`) |
+| `QUEST_ENABLE_DEPRECATED_API` | (`OFF`), `ON` | As described above. When enabled alongside testing, the `v3 deprecated` unit tests will additionally be compiled and can be run from within `build` via `cd tests/deprecated; ctest`, or manually launched with `./tests/deprecated/dep_tests` (enabling distribution, as above).
+| `QUEST_TESTS_DOWNLOAD_CATCH2` | (`ON`), `OFF` | QuEST's tests require Catch2. By default, if you don't have Catch2 installed (or CMake doesn't find it) it will be downloaded from Github and built for you. If you don't want that to happen, for example because you _do_ have Catch2 installed, set this to `OFF`. |
-> As of `v4.2`, macros which configure the unit tests such as `TEST_MAX_NUM_QUBIT_PERMUTATIONS` have become environment variables specified before launch. See [`launch.md`](launch.md)
+> As of `v4.2`, macros which configure the unit tests such as `QUEST_TEST_MAX_NUM_QUBIT_PERMUTATIONS` have become environment variables specified before launch. See [`launch.md`](launch.md)
---------------------------
diff --git a/docs/compile.md b/docs/compile.md
index f11677fbf..ba4306a85 100644
--- a/docs/compile.md
+++ b/docs/compile.md
@@ -9,7 +9,7 @@
Some notes about this guide:
- we will always use a build directory called 'build'
- we will use spaces around cmake argnames and values for clarity, e.g.
- cmake -B build -D ENABLE_CUDA=ON
+ cmake -B build -D QUEST_ENABLE_CUDA=ON
- we will demonstrate the simplest and visually clear (and likely sub-optimal)
use-cases before progressively more visually complicated examples
-->
@@ -183,10 +183,10 @@ int main() {
return 0;
}
```
-simply specify variables `USER_SOURCE` and `OUTPUT_EXE` at _configure time_:
+simply specify variables `USER_SOURCE_NAMES` and `USER_OUTPUT_EXE_NAME` at _configure time_:
```bash
# configure
-cmake .. -D USER_SOURCE=myfile.c -D OUTPUT_EXE=myexec
+cmake .. -D USER_SOURCE_NAMES=myfile.c -D USER_OUTPUT_EXE_NAME=myexec
```
where
- `myfile.c` is your `C` source file (or `myfile.cpp` if using `C++`).
@@ -194,7 +194,7 @@ where
> [!IMPORTANT]
-> `USER_SOURCE` can be any relative or absolute path to a file, but `OUTPUT_EXE` must be strictly a filename and cannot contain subdirectories. See Location to change the output directory.
+> `USER_SOURCE_NAMES` can be any relative or absolute path to a file, but `USER_OUTPUT_EXE_NAME` must be strictly a filename and cannot contain subdirectories. See Location to change the output directory.
To compile multiple dependent files, such as
@@ -221,10 +221,10 @@ void myfunc() {
printf("hello quworld!\n");
}
```
-simply separate them by `;` in `USER_SOURCE`, wrapped in quotations:
+simply separate them by `;` in `USER_SOURCE_NAMES`, wrapped in quotations:
```bash
# configure
-cmake .. -D USER_SOURCE="myfile.cpp;otherfile.cpp" -D OUTPUT_EXE=myexec
+cmake .. -D USER_SOURCE_NAMES="myfile.cpp;otherfile.cpp" -D USER_OUTPUT_EXE_NAME=myexec
```
@@ -297,7 +297,7 @@ This applies to _all_ built executables, including your own custom files, the ex
> [!IMPORTANT]
> Configuration will fail if any two executables have the same output name since they will not be separated into subdirectories and will collide. We do not gaurantee that all test and example filenames will remain unique in the future, such that use of `CMAKE_RUNTIME_OUTPUT_DIRECTORY` may become invalid except when also specifying
> ```
-> -D ENABLE_TESTING=OFF -D BUILD_EXAMPLES=OFF
+> -D QUEST_BUILD_TESTS=OFF -D QUEST_BUILD_EXAMPLES=OFF
> ```
@@ -311,11 +311,11 @@ This applies to _all_ built executables, including your own custom files, the ex
QuEST's numerical precision can be configured at compile-time, informing what _type_, and ergo how many _bytes_, are used to represent each `qreal` (a floating-point real number) and `qcomp` (a complex amplitude). This affects the memory used by each `Qureg`, but also the user-facing `qreal` and `qcomp` types, as detailed below. Reducing the precision accelerates QuEST at the cost of worsened numerical accuracy.
-Precision is set at configure-time using the `FLOAT_PRECISION` [cmake variable](cmake.md), taking on the values `1`, `2` (default) or `4`.
+Precision is set at configure-time using the `QUEST_FLOAT_PRECISION` [cmake variable](cmake.md), taking on the values `1`, `2` (default) or `4`.
For example
```bash
# configure
-cmake .. -D FLOAT_PRECISION=1
+cmake .. -D QUEST_FLOAT_PRECISION=1
```
The values inform types:
@@ -393,7 +393,7 @@ QuEST itself accepts a variety of its preprocessors (mostly related to testing)
To compile all of QuEST's [`examples/`](/examples/), use
```bash
# configure
-cmake .. -D BUILD_EXAMPLES=ON
+cmake .. -D QUEST_BUILD_EXAMPLES=ON
# build
cmake --build .
@@ -433,7 +433,7 @@ To compile QuEST's latest unit and integration tests, use
```bash
# configure
-cmake .. -D ENABLE_TESTING=ON
+cmake .. -D QUEST_BUILD_TESTS=ON
# build
cmake --build .
@@ -451,7 +451,7 @@ This will compile an executable `tests` in subdirectory `build/tests/`, which ca
QuEST's deprecated v3 API has its own unit tests which can be additionally compiled (_except_ on Windows) via
```bash
# configure
-cmake .. -D ENABLE_TESTING=ON -D ENABLE_DEPRECATED_API=ON
+cmake .. -D QUEST_BUILD_TESTS=ON -D QUEST_ENABLE_DEPRECATED_API=ON
# build
cmake --build .
@@ -488,7 +488,7 @@ QuEST uses [OpenMP](https://www.openmp.org/) to perform multithreading, so accel
To compile with multithreading, simply enable it during configuration:
```bash
# configure
-cmake .. -D ENABLE_MULTITHREADING=ON
+cmake .. -D QUEST_ENABLE_OMP=ON
# build
cmake --build .
@@ -533,13 +533,13 @@ nvcc --version
To compile your QuEST application with CUDA-acceleration, specify both
```bash
# configure
-cmake .. -D ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=$CC
+cmake .. -D QUEST_ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=$CC
```
where `$CC` is your GPU's [compute capability](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities) (excluding the full-stop) which you can look up [here](https://developer.nvidia.com/cuda-gpus).
For example, compiling for the [NVIDIA A100](https://www.nvidia.com/en-us/data-center/a100/) looks like:
```bash
# configure
-cmake .. -D ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=80
+cmake .. -D QUEST_ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=80
```
@@ -567,14 +567,14 @@ The compiled executable can be run like any other, though the GPU behaviour can
> TODO!
> - ROCm
-> - ENABLE_HIP
+> - QUEST_ENABLE_HIP
> - CMAKE_HIP_ARCHITECTURES
To compile your QuEST application with HIP-acceleration, specify both
```bash
# configure
-cmake .. -D ENABLE_HIP=ON -D CMAKE_HIP_ARCHITECTURES=$TN
+cmake .. -D QUEST_ENABLE_HIP=ON -D CMAKE_HIP_ARCHITECTURES=$TN
```
where `$TN` is your AMD GPU's [LLVM target name](https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html#glossary). You can look this up [here](https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html), or find the names of all of your local GPUs by running the [ROCM agent enumerator](https://rocm.docs.amd.com/projects/rocminfo/en/latest/how-to/use-rocm-agent-enumerator.html) command, i.e.
```bash
@@ -583,7 +583,7 @@ rocm_agent_enumerator -name
For example, compiling for the [AMD Instinct MI210 accelerator](https://www.amd.com/en/products/accelerators/instinct/mi200/mi210.html) looks like:
```bash
# configure
-cmake .. -D ENABLE_HIP=ON -D CMAKE_HIP_ARCHITECTURES=gfx90a
+cmake .. -D QUEST_ENABLE_HIP=ON -D CMAKE_HIP_ARCHITECTURES=gfx90a
```
@@ -626,11 +626,11 @@ After download and installation, and before compiling, you must set the `CUQUANT
export CUQUANTUM_ROOT=/path/to/cuquantum-folder
```
-Compilation is then simple; we specify `ENABLE_CUQUANTUM` in addition to the above GPU CMake variables.
+Compilation is then simple; we specify `QUEST_ENABLE_CUQUANTUM` in addition to the above GPU CMake variables.
For example
```bash
# configure
-cmake .. -D ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=80 -D ENABLE_CUQUANTUM=ON
+cmake .. -D QUEST_ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=80 -D QUEST_ENABLE_CUQUANTUM=ON
# build
cmake --build . --parallel
@@ -665,7 +665,7 @@ Compiling QuEST's distributed backend is as simple as
```bash
# configure
-cmake .. -D ENABLE_DISTRIBUTION=ON
+cmake .. -D QUEST_ENABLE_MPI=ON
# build
cmake --build . --parallel
diff --git a/docs/launch.md b/docs/launch.md
index a76ce612b..3eb8493ee 100644
--- a/docs/launch.md
+++ b/docs/launch.md
@@ -223,11 +223,11 @@ The `v4` unit tests make use of the below, optional environment variables to con
| Environment variable | Default | Description |
| -------- | ------- | ------- |
-| `TEST_NUM_QUBITS_IN_QUREG` | `6` | The number of qubits in the Qureg(s) undergoing unit testing. In addition to operation upon larger Quregs being exponentially slower, beware that more qubits permit more variations and permutations of input parameters like target qubits, factorially increasing the number of tests per operation. |
-| `TEST_MAX_NUM_QUBIT_PERMUTATIONS` | `0` | The maximum number of control and target qubit permutations under which to unit test each function. Set to `0` (default) to test all permutations, or to a positive integer (e.g. `50`) to accelerate the unit tests. See more info [here](https://quest-kit.github.io/QuEST/group__testutilsconfig.html#gac5adcc10bd26c56f20344f5ae3d9ba41). |
-| `TEST_MAX_NUM_SUPEROP_TARGETS` | `4` | The maximum number of superoperator targets for which to unit test functions `mixKrausMap()` and `mixSuperOp()`. These are computationally equivalent to simulating unitaries with double the number of targets upon a density matrix. Set to `0` to test all sizes which is likely prohibitively slow, or to a positive integer (e.g. the default of `4`) to accelerate the unit tests. |
-| `NUM_MIXED_DEPLOYMENT_REPETITIONS` | `10` | The number of times (minimum of `1`) to repeat each random mixed-deployment unit test for each deployment combination. |
-| `TEST_ALL_DEPLOYMENTS` | `1` | Whether unit tests will be run using all possible deployment combinations (i.e. OpenMP, CUDA, MPI) in-turn (`=1`), or only once using all available deployments simultaneously (`=0`). |
+| `QUEST_TEST_NUM_QUBITS_IN_QUREG` | `6` | The number of qubits in the Qureg(s) undergoing unit testing. In addition to operation upon larger Quregs being exponentially slower, beware that more qubits permit more variations and permutations of input parameters like target qubits, factorially increasing the number of tests per operation. |
+| `QUEST_TEST_MAX_NUM_QUBIT_PERMUTATIONS` | `0` | The maximum number of control and target qubit permutations under which to unit test each function. Set to `0` (default) to test all permutations, or to a positive integer (e.g. `50`) to accelerate the unit tests. See more info [here](https://quest-kit.github.io/QuEST/group__testutilsconfig.html#ga34b54a167498c27babfcc9b28c4ac680). |
+| `QUEST_TEST_MAX_NUM_SUPEROP_TARGETS` | `4` | The maximum number of superoperator targets for which to unit test functions `mixKrausMap()` and `mixSuperOp()`. These are computationally equivalent to simulating unitaries with double the number of targets upon a density matrix. Set to `0` to test all sizes which is likely prohibitively slow, or to a positive integer (e.g. the default of `4`) to accelerate the unit tests. |
+| `QUEST_TEST_NUM_MIXED_DEPLOYMENT_REPETITIONS` | `10` | The number of times (minimum of `1`) to repeat each random mixed-deployment unit test for each deployment combination. |
+| `QUEST_TEST_TRY_ALL_DEPLOYMENTS` | `1` | Whether unit tests will be run using all possible deployment combinations (i.e. OpenMP, CUDA, MPI) in-turn (`=1`), or only once using all available deployments simultaneously (`=0`). |
@@ -268,8 +268,9 @@ ctest
QuEST execution can be configured prior to runtime using the below [environment variables](https://en.wikipedia.org/wiki/Environment_variable).
-- [`PERMIT_NODES_TO_SHARE_GPU`](https://quest-kit.github.io/QuEST/group__modes.html#ga7e12922138caa68ddaa6221e40f62dda)
-- [`DEFAULT_VALIDATION_EPSILON`](https://quest-kit.github.io/QuEST/group__modes.html#ga55810d6f3d23de810cd9b12a2bbb8cc2)
+- [`QUEST_PERMIT_NODES_TO_SHARE_GPU`](https://quest-kit.github.io/QuEST/group__modes.html#ga84b134d552464a82d29517e1ce1309a7)
+- [`QUEST_DEFAULT_VALIDATION_EPSILON`](https://quest-kit.github.io/QuEST/group__modes.html#gac4ab30619e411c965377c910680e242c)
+- [`QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK`](https://quest-kit.github.io/QuEST/group__modes.html#gaf1b71f54d270d3353fe072c66827339b)
Note the unit tests in the preceding section accept additional environment variables.
diff --git a/docs/tutorial.md b/docs/tutorial.md
index b3e99706e..306a1f1ec 100644
--- a/docs/tutorial.md
+++ b/docs/tutorial.md
@@ -14,11 +14,7 @@ QuEST is included into a `C` or `C++` project via
> [!TIP]
-> Some of QuEST's deprecated `v3` API can be accessed by specifying `ENABLE_DEPRECATED_API` when [compiling](/docs/compile.md#v3), or defining it before import, i.e.
-> ```cpp
-> #define ENABLE_DEPRECATED_API 1
-> #include "quest.h"
-> ```
+> Some of QuEST's deprecated `v3` API can be accessed by specifying `QUEST_ENABLE_DEPRECATED_API` when [compiling](/docs/compile.md#v3).
> We recommend migrating to the latest `v4` API however as will be showcased below.
Simulation typically proceeds as:
@@ -173,29 +169,29 @@ if (env.isGpuAccelerated)
Configuring the environment is ordinarily not necessary, but convenient in certain applications.
-For example, we may wish our simulations to deterministically obtain the same measurement outcomes and random states as a previous or future run, and ergo choose to [override](https://quest-kit.github.io/QuEST/group__debug__seed.html#ga9e3a6de413901afbf50690573add1587) the default seeds.
+For example, we may wish our simulations to deterministically obtain the same measurement outcomes and random states as a previous or future run, and ergo choose to [override](https://quest-kit.github.io/QuEST/group__debug__seed.html#ga4fea21c26edfea5a64cbdab860dbf583) the default seeds.
```cpp
unsigned seeds[] = {123u, 1u << 10};
-setSeeds(seeds, 2);
+setQuESTSeeds(seeds, 2);
```
We may wish further to [adjust](https://quest-kit.github.io/QuEST/group__debug__reporting.html) how subsequent functions will display information to the screen
```cpp
int maxRows = 8;
int maxCols = 4;
-setMaxNumReportedItems(maxRows, maxCols);
-setMaxNumReportedSigFigs(3);
+setQuESTMaxNumReportedItems(maxRows, maxCols);
+setQuESTMaxNumReportedSigFigs(3);
```
-or [add](https://quest-kit.github.io/QuEST/group__debug__reporting.html#ga29413703d609254244d6b13c663e6e06) extra spacing between QuEST's printed outputs
+or [add](https://quest-kit.github.io/QuEST/group__debug__reporting.html#gac5fa20b24814c555eae1d77229959b5e) extra spacing between QuEST's printed outputs
```cpp
-setNumReportedNewlines(3);
+setQuESTNumReportedNewlines(3);
```
-Perhaps we also wish to relax the [precision](https://quest-kit.github.io/QuEST/group__debug__validation.html#gae395568df6def76045ec1881fcb4e6d1) with which our future inputs will be asserted unitary or Hermitian
+Perhaps we also wish to relax the [precision](https://quest-kit.github.io/QuEST/group__debug__validation.html#ga6be7e12fc056a751a03073ee6844b0eb) with which our future inputs will be asserted unitary or Hermitian
```cpp
-setValidationEpsilon(0.001);
+setQuESTValidationEpsilon(0.001);
```
-but when unitarity _is_ violated, or we otherwise pass an invalid input, we wish to execute a [custom function](https://quest-kit.github.io/QuEST/group__debug__validation.html#ga14b6e7ce08465e36750da3acbc41062f) before exiting.
+but when unitarity _is_ violated, or we otherwise pass an invalid input, we wish to execute a [custom function](https://quest-kit.github.io/QuEST/group__debug__validation.html#gaa02a39c21c770e06ff891e028fd1fe75) before exiting.
```cpp
#include
@@ -205,7 +201,7 @@ void myErrorHandler(const char *func, const char *msg) {
exit(1);
}
-setInputErrorHandler(myErrorHandler);
+setQuESTInputErrorHandler(myErrorHandler);
```
> [!TIP]
@@ -218,7 +214,7 @@ setInputErrorHandler(myErrorHandler);
> std::string msg(errMsg);
> throw std::runtime_error(func + ": " + msg);
> }
-> setInputErrorHandler(myErrorHandler);
+> setQuESTInputErrorHandler(myErrorHandler);
> ```
@@ -253,7 +249,7 @@ Qureg (10 qubit statevector, 1024 qcomps, 16.1 KiB):
0 |1022⟩
0 |1023⟩
```
-> This printed only `8` amplitudes as per our setting of [`setMaxNumReportedItems()`](https://quest-kit.github.io/QuEST/group__debug__reporting.html#ga093c985b1970a0fd8616c01b9825979a) above.
+> This printed only `8` amplitudes as per our setting of [`setQuESTMaxNumReportedItems()`](https://quest-kit.github.io/QuEST/group__debug__reporting.html#ga2f2d0258f4f7acd6bfe74a19f697d0c2) above.
Behind the scenes, the function `createQureg` did something clever; it consulted the compiled deployments and available hardware to decide whether to distribute `qureg`, or dedicate it persistent GPU memory, and marked whether or not to multithread its subsequent modification. It attempts to choose _optimally_, avoiding gratuitous parallelisation if the overheads outweigh the benefits, or if the hardware devices have insufficient memory.
@@ -356,7 +352,7 @@ Qureg:
globalTotal.......16 MiB
```
-> The spacing between the outputs of those two consecutive QuEST functions was determined by our earlier call to [`setNumReportedNewlines()`](https://quest-kit.github.io/QuEST/group__debug__reporting.html#ga29413703d609254244d6b13c663e6e06).
+> The spacing between the outputs of those two consecutive QuEST functions was determined by our earlier call to [`setQuESTNumReportedNewlines()`](https://quest-kit.github.io/QuEST/group__debug__reporting.html#gac5fa20b24814c555eae1d77229959b5e).
A density matrix `Qureg` can model classical uncertainty as results from [decoherence](https://quest-kit.github.io/QuEST/group__decoherence.html), and proves useful when simulating quantum operations on a noisy quantum computer.
@@ -415,7 +411,7 @@ Qureg (5 qubit density matrix, 32x32 qcomps, 16.1 KiB):
-0.00597-0.00615i -0.00207-0.00451i … 0.000509-0.00401i 0.0173+(3.12e-19)i
```
-> The number of printed significant figures above results from our earlier calling of [`setMaxNumReportedSigFigs()`](https://quest-kit.github.io/QuEST/group__debug__reporting.html#ga15d46e5d813f70b587762814964e1994).
+> The number of printed significant figures above results from our earlier calling of [`setQuESTMaxNumReportedSigFigs()`](https://quest-kit.github.io/QuEST/group__debug__reporting.html#ga3b4156994fdcf65eee0875316a9cc95f).
@@ -609,10 +605,10 @@ QuEST encountered a validation error during function 'applyCompMatr1':
The given matrix was not (approximately) unitary.
Exiting...
```
-If we're satisfied our matrix _is_ sufficiently approximately unitary, we can [adjust](https://quest-kit.github.io/QuEST/group__debug__validation.html#gae395568df6def76045ec1881fcb4e6d1) or [disable](https://quest-kit.github.io/QuEST/group__debug__validation.html#ga5999824df0785ea88fb2d5b5582f2b46) the validation.
+If we're satisfied our matrix _is_ sufficiently approximately unitary, we can [adjust](https://quest-kit.github.io/QuEST/group__debug__validation.html#ga6be7e12fc056a751a03073ee6844b0eb) or [disable](https://quest-kit.github.io/QuEST/group__debug__validation.html#ga0a20ca2bc35e22e914bc25671dabdb9b) the validation.
```cpp
// max(norm(m * dagger(m) - identity)) = 0.9025
-setValidationEpsilon(0.903);
+setQuESTValidationEpsilon(0.903);
applyCompMatr1(qureg, 0, m);
```
@@ -783,7 +779,7 @@ reportScalar("entanglement", calcPurity(reduced));
## Report the results
-We've seen above that [scalars](https://quest-kit.github.io/QuEST/group__types.html) can be reported, handling the pretty formatting of real and complex numbers, controlled by settings like [`setMaxNumReportedSigFigs()`](https://quest-kit.github.io/QuEST/group__debug__reporting.html#ga15d46e5d813f70b587762814964e1994). But we can also report every data structure in the QuEST API, such as Pauli strings
+We've seen above that [scalars](https://quest-kit.github.io/QuEST/group__types.html) can be reported, handling the pretty formatting of real and complex numbers, controlled by settings like [`setQuESTMaxNumReportedSigFigs()`](https://quest-kit.github.io/QuEST/group__debug__reporting.html#ga3b4156994fdcf65eee0875316a9cc95f). But we can also report every data structure in the QuEST API, such as Pauli strings
```cpp
reportPauliStr(
getInlinePauliStr("XXYYZZ", {5,50, 10,60, 30,40})
@@ -805,8 +801,8 @@ PauliStrSum (4 terms, 160 bytes):
```
All outputs are affected by the [reporter settings](https://quest-kit.github.io/QuEST/group__debug__reporting.html).
```cpp
-setMaxNumReportedItems(4,4);
-setMaxNumReportedSigFigs(1);
+setQuESTMaxNumReportedItems(4,4);
+setQuESTMaxNumReportedSigFigs(1);
reportCompMatr(bigmatrix);
```
```
diff --git a/docs/v4.md b/docs/v4.md
index bc8018355..42c109521 100644
--- a/docs/v4.md
+++ b/docs/v4.md
@@ -53,7 +53,7 @@ QuEST `v4` has completely overhauled the API, software architecture, algorithms,
The set of supported quantum operations has greatly expanded. _All_ unitaries can be effected with any number of control qubits (in any [state](https://quest-kit.github.io/QuEST/group__op__compmatr.html#ga2f4526fe3a4f96509040151f3d31535a)), diagonal matrices can be [raised to powers](https://quest-kit.github.io/QuEST/group__op__diagmatr.html#ga7e07c28332d7d89784166f82cdd26eb9), density matrices can undergo [partial tracing](https://quest-kit.github.io/QuEST/group__calc__partialtrace.html) and [inhomogeneous Pauli channels](https://quest-kit.github.io/QuEST/group__decoherence.html#ga51a7f8d5ba0b142c37a698deed07bc28) (in addition to general [Kraus maps](https://quest-kit.github.io/QuEST/group__decoherence.html#ga57753c0d2deac93d3395c5b20a0122f0) and [superoperatos](https://quest-kit.github.io/QuEST/group__decoherence.html#ga6afbb4f2bb3a9c382861feb8a7b70951)), and multi-qubit projectors can now be performed, [with](https://quest-kit.github.io/QuEST/group__op__measurement.html#ga6bd438f3ebd80cf017292bb68542ed8f) and [without](https://quest-kit.github.io/QuEST/group__op__projectors.html#gaa4bde7e5a344fb46cf3119d462b18745) renormalisation.
- **more control**
- Extensive new [debugging](https://quest-kit.github.io/QuEST/group__debug.html) facilities allow [disabling](https://quest-kit.github.io/QuEST/group__debug__validation.html#ga5999824df0785ea88fb2d5b5582f2b46) or [changing](https://quest-kit.github.io/QuEST/group__debug__validation.html#gae395568df6def76045ec1881fcb4e6d1) the validation precision and [error response](https://quest-kit.github.io/QuEST/group__debug__validation.html#ga14b6e7ce08465e36750da3acbc41062f) at runtime, and controlling how many [amplitudes](https://quest-kit.github.io/QuEST/group__debug__reporting.html#ga093c985b1970a0fd8616c01b9825979a) and [significant figures](https://quest-kit.github.io/QuEST/group__debug__reporting.html#ga15d46e5d813f70b587762814964e1994) of `Qureg` and matrices are printed.
+ Extensive new [debugging](https://quest-kit.github.io/QuEST/group__debug.html) facilities allow [disabling](https://quest-kit.github.io/QuEST/group__debug__validation.html#ga0a20ca2bc35e22e914bc25671dabdb9b) or [changing](https://quest-kit.github.io/QuEST/group__debug__validation.html#ga6be7e12fc056a751a03073ee6844b0eb) the validation precision and [error response](https://quest-kit.github.io/QuEST/group__debug__validation.html#gaa02a39c21c770e06ff891e028fd1fe75) at runtime, and controlling how many [amplitudes](https://quest-kit.github.io/QuEST/group__debug__reporting.html#ga2f2d0258f4f7acd6bfe74a19f697d0c2) and [significant figures](https://quest-kit.github.io/QuEST/group__debug__reporting.html#ga3b4156994fdcf65eee0875316a9cc95f) of `Qureg` and matrices are printed.
- **better documentation**
The [documentation](/docs/) has been rewritten from the ground-up, and the [API doc](https://quest-kit.github.io/QuEST/topics.html) grouped into sub-categories and aesthetically overhauled with [Doxygen Awesome](https://jothepro.github.io/doxygen-awesome-css/). It is now more consistently structured, mathematically explicit, and is a treat on the eyes!
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index afc8f85d6..10278afb6 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -20,7 +20,11 @@ function(add_example direc in_fn)
add_executable(${target} ${in_fn})
target_link_libraries(${target} PUBLIC QuEST)
- if (INSTALL_BINARIES)
+ if (QUEST_ENABLE_MPI AND QUEST_ENABLE_SUBCOMM)
+ target_link_libraries(${target} PRIVATE MPI::MPI_CXX)
+ endif()
+
+ if (QUEST_INSTALL_BINARIES)
install(
TARGETS ${target}
RUNTIME
diff --git a/examples/extended/dynamics.c b/examples/extended/dynamics.c
index 03abcc16c..8c72d71ab 100644
--- a/examples/extended/dynamics.c
+++ b/examples/extended/dynamics.c
@@ -103,16 +103,16 @@ PauliStrSum createMyObservable(int numQubits) {
void reportMyStructs(Qureg qureg, PauliStrSum hamil, PauliStrSum observ) {
- setMaxNumReportedSigFigs(6); // sig-figs in scalars
- setNumReportedNewlines(2); // spacing between reports
- setReportedPauliChars(".XYZ"); // print I as .
- setReportedPauliStrStyle(0); // print XYZ (0) or Z3 Y2 X1 (1)
- setMaxNumReportedItems(8, 8); // show max 8 qureg amplitudes
+ setQuESTMaxNumReportedSigFigs(6); // sig-figs in scalars
+ setQuESTNumReportedNewlines(2); // spacing between reports
+ setQuESTReportedPauliChars(".XYZ"); // print I as .
+ setQuESTReportedPauliStrStyle(0); // print XYZ (0) or Z3 Y2 X1 (1)
+ setQuESTMaxNumReportedItems(8, 8); // show max 8 qureg amplitudes
reportStr("[Initial state]");
reportQureg(qureg);
- setMaxNumReportedItems(0, 0); // show 0=all Pauli operators
+ setQuESTMaxNumReportedItems(0, 0); // show 0=all Pauli operators
reportStr("[Hamiltonian]");
reportPauliStrSum(hamil);
@@ -144,8 +144,8 @@ int main() {
reportMyStructs(qureg, hamil, observ);
// tidy reporting of below expectation values
- setMaxNumReportedSigFigs(3);
- setNumReportedNewlines(1);
+ setQuESTMaxNumReportedSigFigs(3);
+ setQuESTNumReportedNewlines(1);
// evolve by repeatedly (each is a "step") Trotterising
// exp(-i dt H) with the specified order and repetitions.
@@ -172,8 +172,8 @@ int main() {
reportStr("");
// preview the final state...
- setNumReportedNewlines(2);
- setMaxNumReportedItems(25, 25);
+ setQuESTNumReportedNewlines(2);
+ setQuESTMaxNumReportedItems(25, 25);
reportStr("[Final state]");
reportQureg(qureg);
diff --git a/examples/extended/dynamics.cpp b/examples/extended/dynamics.cpp
index 636145387..da4fd9223 100644
--- a/examples/extended/dynamics.cpp
+++ b/examples/extended/dynamics.cpp
@@ -100,16 +100,16 @@ PauliStrSum createMyObservable(int numQubits) {
void reportMyStructs(Qureg qureg, PauliStrSum hamil, PauliStrSum observ) {
- setMaxNumReportedSigFigs(6); // sig-figs in scalars
- setNumReportedNewlines(2); // spacing between reports
- setReportedPauliChars(".XYZ"); // print I as .
- setReportedPauliStrStyle(0); // print XYZ (0) or Z3 Y2 X1 (1)
- setMaxNumReportedItems(8, 8); // show max 8 qureg amplitudes
+ setQuESTMaxNumReportedSigFigs(6); // sig-figs in scalars
+ setQuESTNumReportedNewlines(2); // spacing between reports
+ setQuESTReportedPauliChars(".XYZ"); // print I as .
+ setQuESTReportedPauliStrStyle(0); // print XYZ (0) or Z3 Y2 X1 (1)
+ setQuESTMaxNumReportedItems(8, 8); // show max 8 qureg amplitudes
reportStr("[Initial state]");
reportQureg(qureg);
- setMaxNumReportedItems(0, 0); // show 0=all Pauli operators
+ setQuESTMaxNumReportedItems(0, 0); // show 0=all Pauli operators
reportStr("[Hamiltonian]");
reportPauliStrSum(hamil);
@@ -141,8 +141,8 @@ int main() {
reportMyStructs(qureg, hamil, observ);
// tidy reporting of below expectation values
- setMaxNumReportedSigFigs(3);
- setNumReportedNewlines(1);
+ setQuESTMaxNumReportedSigFigs(3);
+ setQuESTNumReportedNewlines(1);
// evolve by repeatedly (each is a "step") Trotterising
// exp(-i dt H) with the specified order and repetitions.
@@ -166,8 +166,8 @@ int main() {
reportStr("");
// preview the final state...
- setNumReportedNewlines(2);
- setMaxNumReportedItems(25, 25);
+ setQuESTNumReportedNewlines(2);
+ setQuESTMaxNumReportedItems(25, 25);
reportStr("[Final state]");
reportQureg(qureg);
diff --git a/examples/extended/set_num_gpu_threads.c b/examples/extended/set_num_gpu_threads.c
new file mode 100644
index 000000000..1b3dc175f
--- /dev/null
+++ b/examples/extended/set_num_gpu_threads.c
@@ -0,0 +1,91 @@
+/** @file
+ *
+ * An example of using QuEST's experimental
+ * setQuESTNumGpuThreadsPerBlock() function
+ * to change the parallelisation granularity
+ * of GPU simulation
+ *
+ * @author Tyson Jones
+ */
+
+#include "quest.h"
+#include
+#include
+
+
+const int NUM_REPS = 10;
+const int NUM_QUBITS = 25; // 512 MiB (at double precision)
+
+
+void simulation(Qureg qureg)
+{
+ // put your favourite QuEST simulation here
+ initRandomPureState(qureg);
+ applyFullQuantumFourierTransform(qureg, /*inverse=*/false);
+ calcTotalProb(qureg);
+}
+
+
+void benchmark(Qureg qureg, int numThreadsPerBlock)
+{
+ printf("Using %d threads per block... ", numThreadsPerBlock);
+ fflush(stdout);
+
+ setQuESTNumGpuThreadsPerBlock(numThreadsPerBlock);
+
+ // warmup
+ for (int r=0; r
+#include
+
+
+const int NUM_REPS = 10;
+const int NUM_QUBITS = 25; // 512 MiB (at double precision)
+
+
+void simulation(Qureg qureg)
+{
+ // put your favourite QuEST simulation here
+ initRandomPureState(qureg);
+ applyFullQuantumFourierTransform(qureg, /*inverse=*/false);
+ calcTotalProb(qureg);
+}
+
+
+void benchmark(Qureg qureg, int numThreadsPerBlock)
+{
+ std::cout << "Using " << numThreadsPerBlock << " threads per block... " << std::flush;
+
+ setQuESTNumGpuThreadsPerBlock(numThreadsPerBlock);
+
+ // warmup
+ for (int r=0; r(end - start).count();
+ auto av = dur / NUM_REPS;
+
+ std::cout << " took " << av << "s" << std::endl;
+}
+
+
+int main()
+{
+ initQuESTEnv();
+
+ // This example is pointless without a GPU!
+ if (!getQuESTEnv().isGpuAccelerated) {
+ std::cout
+ << "GPU acceleration is not enabled, and so changing the number "
+ << "of threads per block has no effect. Exiting..."
+ << std::endl;
+ finalizeQuESTEnv();
+ return 0;
+ }
+
+ // The initial number of threads per block is informed by the optional environment
+ // variable QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK. If not specified, QuEST will
+ // use the value of the CMake option of the same name passed during compilation,
+ // which itself will has a default of 128
+ auto initNumTPB = getQuESTNumGpuThreadsPerBlock();
+ std::cout << "Initial numThreadsPerBlock: " << initNumTPB << "\n\n";
+
+ // Create a statevector parallelised only by the GPU
+ Qureg qureg = createCustomQureg(NUM_QUBITS, 0, 0, 1, 0);
+ reportQuregParams(qureg);
+
+ // Benchmark QuEST with sensible numbers of threads per block (multiples of warp size)
+ for (auto numTPB : {64, 128, 256, 512, 1024})
+ benchmark(qureg, numTPB);
+
+ // Try silly parameters ¯\_(ツ)_/¯
+ setQuESTValidationOff();
+ for (auto numTPB : {31, 15, 5, 1})
+ benchmark(qureg, numTPB);
+
+ finalizeQuESTEnv();
+ return 0;
+}
diff --git a/examples/extended/user_owned_mpi.c b/examples/extended/user_owned_mpi.c
new file mode 100644
index 000000000..4e3c766f4
--- /dev/null
+++ b/examples/extended/user_owned_mpi.c
@@ -0,0 +1,49 @@
+/** @file
+ *
+ * An example of using QuEST's experimental
+ * initCustomMpiQuESTEnv() function, to
+ * initialise QuEST in an environment where
+ * MPI is owned and controlled by the user.
+ *
+ * @author Oliver Brown
+ * @author Tyson Jones (doc)
+ */
+
+#include "quest.h"
+#include
+
+
+// This example requires linking with MPI, which the CMake
+// build only enables when QUEST_ENABLE_SUBCOMM is ON, which
+// results in quest.h defining QUEST_COMPILE_SUBCOMM. To
+// enable this example to always be compilable (like during
+// our CI), we guard against when QUEST_ENABLE_SUBCOMM is OFF.
+#if ! QUEST_COMPILE_SUBCOMM
+int main(void)
+{
+ printf("Example skipped since MPI is not linked.\n");
+ return 0;
+}
+#else
+
+
+#include
+
+int main(void)
+{
+ const int USE_DISTRIB = 1;
+ const bool USER_MPI = 1;
+ const int USE_OPENMP = 1;
+ const int USE_GPU = 0;
+
+ MPI_Init(NULL, NULL);
+ initCustomMpiQuESTEnv(USE_DISTRIB, USER_MPI, USE_GPU, USE_OPENMP);
+ reportQuESTEnv();
+ finalizeQuESTEnv();
+ MPI_Finalize();
+
+ return 0;
+}
+
+
+#endif // QUEST_COMPILE_SUBCOMM
diff --git a/examples/extended/user_owned_mpi.cpp b/examples/extended/user_owned_mpi.cpp
new file mode 100644
index 000000000..54345d576
--- /dev/null
+++ b/examples/extended/user_owned_mpi.cpp
@@ -0,0 +1,49 @@
+/** @file
+ *
+ * An example of using QuEST's experimental
+ * initCustomMpiQuESTEnv() function to
+ * initialise QuEST in an environment where
+ * MPI is owned and controlled by the user.
+ *
+ * @author Oliver Brown
+ * @author Tyson Jones (doc)
+ */
+
+#include "quest.h"
+#include
+
+
+// This example requires linking with MPI, which the CMake
+// build only enables when QUEST_ENABLE_SUBCOMM is ON, which
+// results in quest.h defining QUEST_COMPILE_SUBCOMM. To
+// enable this example to always be compilable (like during
+// our CI), we guard against when QUEST_ENABLE_SUBCOMM is OFF.
+#if ! QUEST_COMPILE_SUBCOMM
+int main(void)
+{
+ std::printf("Example skipped since MPI is not linked.\n");
+ return 0;
+}
+#else
+
+
+#include
+
+int main(void)
+{
+ const int USE_DISTRIB = 1;
+ const bool USER_MPI = 1;
+ const int USE_OPENMP = 1;
+ const int USE_GPU = 0;
+
+ MPI_Init(NULL, NULL);
+ initCustomMpiQuESTEnv(USE_DISTRIB, USER_MPI, USE_GPU, USE_OPENMP);
+ reportQuESTEnv();
+ finalizeQuESTEnv();
+ MPI_Finalize();
+
+ return 0;
+}
+
+
+#endif // QUEST_COMPILE_SUBCOMM
diff --git a/examples/extended/user_owned_submpi.c b/examples/extended/user_owned_submpi.c
new file mode 100644
index 000000000..6f2ea6290
--- /dev/null
+++ b/examples/extended/user_owned_submpi.c
@@ -0,0 +1,84 @@
+/** @file
+ *
+ * An example of using QuEST's experimental
+ * initCustomMpiCommQuESTEnv() function to
+ * dedicate only some user-owned MPI processes
+ * to QuEST, and dedicate the remainder to
+ * other tasks.
+ *
+ * @author Oliver Brown
+ * @author Tyson Jones (doc)
+ */
+
+#include "quest.h"
+#include
+
+
+// This example requires linking with MPI, which the CMake
+// build only enables when QUEST_ENABLE_SUBCOMM is ON, which
+// results in quest.h defining QUEST_COMPILE_SUBCOMM. To
+// enable this example to always be compilable (like during
+// our CI), we guard against when QUEST_ENABLE_SUBCOMM is OFF.
+#if ! QUEST_COMPILE_SUBCOMM
+int main()
+{
+ printf("Example skipped since MPI is not linked.\n");
+ return 0;
+}
+#else
+
+
+#include
+
+int main (void)
+{
+ int nprocs, quest_nprocs, world_rank, quest_rank;
+ MPI_Comm comm_split, comm_quantum, comm_classical;
+
+ MPI_Init(NULL, NULL);
+
+ MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+ MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+
+ const int I_AM_QUANTUM = world_rank % 2;
+
+ printf("[%d] Hello from rank %d of %d in MPI_COMM_WORLD.\n", world_rank, world_rank, nprocs);
+
+ MPI_Comm_split(MPI_COMM_WORLD, I_AM_QUANTUM, world_rank, &comm_split);
+
+ if (I_AM_QUANTUM) {
+ MPI_Comm_dup(comm_split, &comm_quantum);
+ MPI_Comm_size(comm_quantum, &quest_nprocs);
+ MPI_Comm_rank(comm_quantum, &quest_rank);
+ printf("[%d] Hello from rank %d of %d in comm_quantum.\n", world_rank, quest_rank, quest_nprocs);
+ } else {
+ MPI_Comm_dup(comm_split, &comm_classical);
+ quest_rank = -1;
+ quest_nprocs = -1;
+ }
+
+ // only procs in quantum comm initialise QuEST
+ if (I_AM_QUANTUM) {
+ printf("[%d] Initialising QuEST.\n", world_rank);
+ initCustomMpiCommQuESTEnv(comm_quantum, -1, -1); // -1 = auto-deployments
+
+ reportQuESTEnv();
+
+ printf("[%d] Finalising QuEST.\n", world_rank);
+ finalizeQuESTEnv();
+ }
+
+ MPI_Comm_free(&comm_split);
+ if (I_AM_QUANTUM) {
+ MPI_Comm_free(&comm_quantum);
+ } else {
+ MPI_Comm_free(&comm_classical);
+ }
+
+ MPI_Finalize();
+
+ return 0;
+}
+
+
+#endif // QUEST_COMPILE_SUBCOMM
diff --git a/examples/extended/user_owned_submpi.cpp b/examples/extended/user_owned_submpi.cpp
new file mode 100644
index 000000000..ea82a4f9d
--- /dev/null
+++ b/examples/extended/user_owned_submpi.cpp
@@ -0,0 +1,84 @@
+/** @file
+ *
+ * An example of using QuEST's experimental
+ * initCustomMpiCommQuESTEnv() function to
+ * dedicate only some user-owned MPI processes
+ * to QuEST, and dedicate the remainder to
+ * other tasks.
+ *
+ * @author Oliver Brown
+ * @author Tyson Jones (doc)
+ */
+
+#include "quest.h"
+#include
+
+
+// This example requires linking with MPI, which the CMake
+// build only enables when QUEST_ENABLE_SUBCOMM is ON, which
+// results in quest.h defining QUEST_COMPILE_SUBCOMM. To
+// enable this example to always be compilable (like during
+// our CI), we guard against when QUEST_ENABLE_SUBCOMM is OFF.
+#if ! QUEST_COMPILE_SUBCOMM
+int main()
+{
+ std::printf("Example skipped since MPI is not linked.\n");
+ return 0;
+}
+#else
+
+
+#include
+
+int main (void)
+{
+ int nprocs, quest_nprocs, world_rank, quest_rank;
+ MPI_Comm comm_split, comm_quantum, comm_classical;
+
+ MPI_Init(NULL, NULL);
+
+ MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+ MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+
+ const int I_AM_QUANTUM = world_rank % 2;
+
+ std::printf("[%d] Hello from rank %d of %d in MPI_COMM_WORLD.\n", world_rank, world_rank, nprocs);
+
+ MPI_Comm_split(MPI_COMM_WORLD, I_AM_QUANTUM, world_rank, &comm_split);
+
+ if (I_AM_QUANTUM) {
+ MPI_Comm_dup(comm_split, &comm_quantum);
+ MPI_Comm_size(comm_quantum, &quest_nprocs);
+ MPI_Comm_rank(comm_quantum, &quest_rank);
+ std::printf("[%d] Hello from rank %d of %d in comm_quantum.\n", world_rank, quest_rank, quest_nprocs);
+ } else {
+ MPI_Comm_dup(comm_split, &comm_classical);
+ quest_rank = -1;
+ quest_nprocs = -1;
+ }
+
+ // only procs in quantum comm initialise QuEST
+ if (I_AM_QUANTUM) {
+ std::printf("[%d] Initialising QuEST.\n", world_rank);
+ initCustomMpiCommQuESTEnv(comm_quantum, modeflag::USE_AUTO, modeflag::USE_AUTO);
+
+ reportQuESTEnv();
+
+ std::printf("[%d] Finalising QuEST.\n", world_rank);
+ finalizeQuESTEnv();
+ }
+
+ MPI_Comm_free(&comm_split);
+ if (I_AM_QUANTUM) {
+ MPI_Comm_free(&comm_quantum);
+ } else {
+ MPI_Comm_free(&comm_classical);
+ }
+
+ MPI_Finalize();
+
+ return 0;
+}
+
+
+#endif // QUEST_COMPILE_SUBCOMM
diff --git a/examples/isolated/reporting_matrices.c b/examples/isolated/reporting_matrices.c
index 319c758cb..cb497593e 100644
--- a/examples/isolated/reporting_matrices.c
+++ b/examples/isolated/reporting_matrices.c
@@ -49,7 +49,7 @@ void demo_CompMatr() {
for (int i=0; i [!CAUTION]
* > Unlike other functions (including calcExpecFullStateDiagMatr()), this function will _NOT_
* > consult the imaginary components of the elements of @p matrix, since a non-complex exponentiation
* > function is used. That is, while validation permits the imaginary components to be small, they
* > will be internally treated as precisely zero. This is true even when Hermiticity validation
- * > is disabled using setValidationOff(). To consult the imaginary components of @p matrix, use
+ * > is disabled using setQuESTValidationOff(). To consult the imaginary components of @p matrix, use
* > calcExpecNonHermitianFullStateDiagMatrPower().
*
* - Hermiticity of @p matrix when raised to @p exponent further requires that, when @p exponent is
@@ -298,7 +298,7 @@ qreal calcExpecFullStateDiagMatr(Qureg qureg, FullStateDiagMatr matr);
* zero elements which would otherwise create divergences in @f$\hat{D}^x@f$. Validation ergo
* checks that when @p exponent is (strictly) negative, @p matrix contains no elements within
* distance @f$\valeps@f$ to zero (regardless of the magnitude of @p exponent). Adjust
- * @f$\valeps@f$ using setValidationEpsilon().
+ * @f$\valeps@f$ using setQuESTValidationEpsilon().
* - The passed @p exponent is always real, but can be relaxed to a general complex scalar via
* calcExpecNonHermitianFullStateDiagMatrPower().
* - The returned value is always real, and the imaginary component is neglected even when
@@ -890,7 +890,7 @@ qreal calcPurity(Qureg qureg);
* - The output of this function is always real, which validation will check after computing the
* fidelity as a complex scalar. Specifically, validation will assert that the result has an
* absolute imaginary component less than the validation epsilon, which can be adjusted with
- * setValidationEpsilon().
+ * setQuESTValidationEpsilon().
*
* - This function does not yet support both @p qureg and @p other being density matrices, for
* which the fidelity calculation is more substantial.
@@ -1004,7 +1004,7 @@ qreal calcFidelity(Qureg qureg, Qureg other);
\left| \, \im{ \brapsi \dmrho \svpsi } \, \right| \le \valeps, \\
\re{ \brapsi \dmrho \svpsi } \le 1 + \valeps,
* @f]
- * where @f$\valeps@f$ is the validation epsilon, adjustable via setValidationEpsilon().
+ * where @f$\valeps@f$ is the validation epsilon, adjustable via setQuESTValidationEpsilon().
*
* - Even when the above postcondition validation is disabled, the Bures and purified distance
* calculations will respectively replace @f$\left| \braket{\phi}{\psi} \right|@f$ and
diff --git a/quest/include/config.h.in b/quest/include/config.h.in
index 2cb12fa90..1bb8a0470 100644
--- a/quest/include/config.h.in
+++ b/quest/include/config.h.in
@@ -7,9 +7,9 @@
* defined in one central place (right here) rather than being
* passed to each source file as compiler flags. It further
* ensures that when QuEST is installed, critical user-facing
- * macros such as FLOAT_PRECISION cannot ever be changed from
+ * macros such as QUEST_FLOAT_PRECISION cannot ever be changed from
* their value during source compilation. Finally, it enables
- * users to access macros such as COMPILE_OPENMP at pre-build
+ * users to access macros such as QUEST_COMPILE_OMP at pre-build
* time of their own source code, which could prove necessary
* when interfacing with external libraries.
*
@@ -34,15 +34,16 @@
*/
-#if defined(FLOAT_PRECISION) || \
- defined(COMPILE_OPENMP) || \
- defined(COMPILE_MPI) || \
- defined(COMPILE_CUDA) || \
- defined(COMPILE_HIP) || \
- defined(COMPILE_CUQUANTUM) || \
- defined(NUMA_AWARE) || \
- defined(INCLUDE_DEPRECATED_FUNCTIONS) || \
- defined(DISABLE_DEPRECATION_WARNINGS)
+#if defined(QUEST_FLOAT_PRECISION) || \
+ defined(QUEST_COMPILE_OMP) || \
+ defined(QUEST_COMPILE_MPI) || \
+ defined(QUEST_COMPILE_SUBCOMM) || \
+ defined(QUEST_COMPILE_CUDA) || \
+ defined(QUEST_COMPILE_HIP) || \
+ defined(QUEST_COMPILE_CUQUANTUM) || \
+ defined(QUEST_ENABLE_NUMA) || \
+ defined(QUEST_INCLUDE_DEPRECATED_FUNCTIONS) || \
+ defined(QUEST_DISABLE_DEPRECATION_WARNINGS)
#error "Pre-config macros were erroneously passed directly to the source rather than through the CMake config file."
@@ -71,24 +72,26 @@
// crucial to user source (informs API)
-#cmakedefine FLOAT_PRECISION @FLOAT_PRECISION@
-#cmakedefine01 INCLUDE_DEPRECATED_FUNCTIONS
-#cmakedefine01 DISABLE_DEPRECATION_WARNINGS
+#cmakedefine QUEST_FLOAT_PRECISION @QUEST_FLOAT_PRECISION@
+#cmakedefine01 QUEST_INCLUDE_DEPRECATED_FUNCTIONS
+#cmakedefine01 QUEST_DISABLE_DEPRECATION_WARNINGS
// crucial to QuEST source (informs external library usage)
-#cmakedefine01 COMPILE_OPENMP
-#cmakedefine01 COMPILE_MPI
-#cmakedefine01 COMPILE_CUDA
-#cmakedefine01 COMPILE_CUQUANTUM
+#cmakedefine01 QUEST_COMPILE_OMP
+#cmakedefine01 QUEST_COMPILE_MPI
+#cmakedefine01 QUEST_COMPILE_SUBCOMM
+#cmakedefine01 QUEST_COMPILE_CUDA
+#cmakedefine01 QUEST_COMPILE_CUQUANTUM
+#cmakedefine01 QUEST_COMPILE_HIP
-// not actually a CMake option (user cannot disable) but nonetheless crucial
-#cmakedefine01 NUMA_AWARE
+// crucial to QuEST source (informs optional NUMA usage)
+#cmakedefine01 QUEST_ENABLE_NUMA
-// not consulted by src (included for book-keeping)
-#cmakedefine01 COMPILE_HIP
+// default parameters which may have been tuned for performance when building the library
+#cmakedefine QUEST_UNSPECIFIED_DEFAULT_NUM_GPU_THREADS_PER_BLOCK @QUEST_UNSPECIFIED_DEFAULT_NUM_GPU_THREADS_PER_BLOCK@
@@ -115,15 +118,16 @@
*/
-#if ! defined(FLOAT_PRECISION) || \
- ! defined(COMPILE_OPENMP) || \
- ! defined(COMPILE_MPI) || \
- ! defined(COMPILE_CUDA) || \
- ! defined(COMPILE_HIP) || \
- ! defined(COMPILE_CUQUANTUM) || \
- ! defined(NUMA_AWARE) || \
- ! defined(INCLUDE_DEPRECATED_FUNCTIONS) || \
- ! defined(DISABLE_DEPRECATION_WARNINGS)
+#if ! defined(QUEST_FLOAT_PRECISION) || \
+ ! defined(QUEST_COMPILE_OMP) || \
+ ! defined(QUEST_COMPILE_MPI) || \
+ ! defined(QUEST_COMPILE_SUBCOMM) || \
+ ! defined(QUEST_COMPILE_CUDA) || \
+ ! defined(QUEST_COMPILE_HIP) || \
+ ! defined(QUEST_COMPILE_CUQUANTUM) || \
+ ! defined(QUEST_ENABLE_NUMA) || \
+ ! defined(QUEST_INCLUDE_DEPRECATED_FUNCTIONS) || \
+ ! defined(QUEST_DISABLE_DEPRECATION_WARNINGS)
#error "Expected macros were not defined by the config.h header, possibly because their corresponding CMake variables were not substituted."
@@ -142,14 +146,15 @@
*/
-#if ! (COMPILE_OPENMP == 0 || COMPILE_OPENMP == 1) || \
- ! (COMPILE_MPI == 0 || COMPILE_MPI == 1) || \
- ! (COMPILE_CUDA == 0 || COMPILE_CUDA == 1) || \
- ! (COMPILE_HIP == 0 || COMPILE_HIP == 1) || \
- ! (COMPILE_CUQUANTUM == 0 || COMPILE_CUQUANTUM == 1) || \
- ! (NUMA_AWARE == 0 || NUMA_AWARE == 1) || \
- ! (INCLUDE_DEPRECATED_FUNCTIONS == 0 || INCLUDE_DEPRECATED_FUNCTIONS == 1) || \
- ! (DISABLE_DEPRECATION_WARNINGS == 0 || DISABLE_DEPRECATION_WARNINGS == 1)
+#if ! (QUEST_COMPILE_OMP == 0 || QUEST_COMPILE_OMP == 1) || \
+ ! (QUEST_COMPILE_MPI == 0 || QUEST_COMPILE_MPI == 1) || \
+ ! (QUEST_COMPILE_SUBCOMM == 0 || QUEST_COMPILE_SUBCOMM == 1) || \
+ ! (QUEST_COMPILE_CUDA == 0 || QUEST_COMPILE_CUDA == 1) || \
+ ! (QUEST_COMPILE_HIP == 0 || QUEST_COMPILE_HIP == 1) || \
+ ! (QUEST_COMPILE_CUQUANTUM == 0 || QUEST_COMPILE_CUQUANTUM == 1) || \
+ ! (QUEST_ENABLE_NUMA == 0 || QUEST_ENABLE_NUMA == 1) || \
+ ! (QUEST_INCLUDE_DEPRECATED_FUNCTIONS == 0 || QUEST_INCLUDE_DEPRECATED_FUNCTIONS == 1) || \
+ ! (QUEST_DISABLE_DEPRECATION_WARNINGS == 0 || QUEST_DISABLE_DEPRECATION_WARNINGS == 1)
#error "A macro defined by the config.h header (as inferred from a CMake variable) had an illegal value."
@@ -166,4 +171,4 @@
-#endif // CONFIG_H
\ No newline at end of file
+#endif // CONFIG_H
diff --git a/quest/include/debug.h b/quest/include/debug.h
index 48ef22527..a51236141 100644
--- a/quest/include/debug.h
+++ b/quest/include/debug.h
@@ -43,19 +43,19 @@ extern "C" {
/// @notyetdoced
-void setSeeds(unsigned* seeds, int numSeeds);
+void setQuESTSeeds(unsigned* seeds, int numSeeds);
/// @notyetdoced
-void setSeedsToDefault();
+void setQuESTSeedsToDefault();
/// @notyetdoced
-void getSeeds(unsigned* seeds);
+void getQuESTSeeds(unsigned* seeds);
/// @notyetdoced
-int getNumSeeds();
+int getQuESTNumSeeds();
/** @} */
@@ -79,27 +79,27 @@ int getNumSeeds();
* - [C](https://github.com/QuEST-Kit/QuEST/blob/devel/examples/isolated/setting_errorhandler.c) and
* [C++](https://github.com/QuEST-Kit/QuEST/blob/devel/examples/isolated/setting_errorhandler.cpp) examples
*/
-void setInputErrorHandler(void (*callback)(const char* func, const char* msg));
+void setQuESTInputErrorHandler(void (*callback)(const char* func, const char* msg));
/// @notyetdoced
-void setValidationOn();
+void setQuESTValidationOn();
/// @notyetdoced
-void setValidationOff();
+void setQuESTValidationOff();
/// @notyetdoced
-void setValidationEpsilonToDefault();
+void setQuESTValidationEpsilonToDefault();
/// @notyetdoced
-void setValidationEpsilon(qreal eps);
+void setQuESTValidationEpsilon(qreal eps);
/// @notyetdoced
-qreal getValidationEpsilon();
+qreal getQuESTValidationEpsilon();
/** @} */
@@ -115,7 +115,7 @@ qreal getValidationEpsilon();
/// @notyetdoced
/// @notyettested
-void setMaxNumReportedItems(qindex numRows, qindex numCols);
+void setQuESTMaxNumReportedItems(qindex numRows, qindex numCols);
/** @notyetdoced
@@ -123,11 +123,11 @@ void setMaxNumReportedItems(qindex numRows, qindex numCols);
* > (e.g. `5.32 KiB`) which is always shown with three significant figures
* > (or four when in bytes, e.g. `1023 bytes`).
*/
-void setMaxNumReportedSigFigs(int numSigFigs);
+void setQuESTMaxNumReportedSigFigs(int numSigFigs);
/// @notyetdoced
-void setNumReportedNewlines(int numNewlines);
+void setQuESTNumReportedNewlines(int numNewlines);
/**
@@ -138,11 +138,11 @@ void setNumReportedNewlines(int numNewlines);
PauliStr str = getInlinePauliStr("XYZ", {0,10,20});
reportPauliStr(str);
- setReportedPauliChars(".xyz");
+ setQuESTReportedPauliChars(".xyz");
reportPauliStr(str);
* ```
*/
-void setReportedPauliChars(const char* paulis);
+void setQuESTReportedPauliChars(const char* paulis);
/**
@@ -152,14 +152,14 @@ void setReportedPauliChars(const char* paulis);
* ```
PauliStr str = getInlinePauliStr("XYZ", {0,10,20});
- setReportedPauliStrStyle(0);
+ setQuESTReportedPauliStrStyle(0);
reportPauliStr(str);
- setReportedPauliStrStyle(1);
+ setQuESTReportedPauliStrStyle(1);
reportPauliStr(str);
* ```
*/
-void setReportedPauliStrStyle(int style);
+void setQuESTReportedPauliStrStyle(int style);
/** @} */
@@ -174,11 +174,11 @@ void setReportedPauliStrStyle(int style);
/// @notyetdoced
-qindex getGpuCacheSize();
+qindex getQuESTGpuCacheSize();
/// @notyetdoced
-void clearGpuCache();
+void clearQuESTGpuCache();
/** @} */
@@ -194,7 +194,7 @@ void clearGpuCache();
/// @notyetdoced
/// @notyettested
-void getEnvironmentString(char str[200]);
+void getQuESTEnvironmentString(char str[200]);
/** @} */
@@ -225,16 +225,16 @@ void getEnvironmentString(char str[200]);
/// @notyettested
/// @notyetdoced
/// @cppvectoroverload
-/// @see setSeeds()
-void setSeeds(std::vector seeds);
+/// @see setQuESTSeeds()
+void setQuESTSeeds(std::vector seeds);
/// @ingroup debug_seed
/// @notyettested
/// @notyetdoced
/// @cpponly
-/// @see getSeeds()
-std::vector getSeeds();
+/// @see getQuESTSeeds()
+std::vector getQuESTSeeds();
#endif // __cplusplus
diff --git a/quest/include/deprecated.h b/quest/include/deprecated.h
index 1a63b2044..92032efb8 100644
--- a/quest/include/deprecated.h
+++ b/quest/include/deprecated.h
@@ -29,7 +29,7 @@
* INITIAL WARNING
*/
-#if !defined(DISABLE_DEPRECATION_WARNINGS) || DISABLE_DEPRECATION_WARNINGS == 0
+#if !defined(QUEST_DISABLE_DEPRECATION_WARNINGS) || QUEST_DISABLE_DEPRECATION_WARNINGS == 0
// #warning command is always recognised (deprecated API is not MSVC-compatible)
#warning "\
@@ -49,7 +49,7 @@ refactor your code to v4, and should absolutely not continue to use the old v3 A
/*
* TOGGLEABLE WARNING MESSAGES
*
- * users can define precompiler constant DISABLE_DEPRECATION_WARNINGS=1
+ * users can define precompiler constant QUEST_DISABLE_DEPRECATION_WARNINGS=1
* in order to disable compile-time deprecation warnings. This will
* make most of the QuEST v3 API silently work by casting to the
* v4 API at compile-time. Note that _Pragma() are resolved at
@@ -62,7 +62,7 @@ refactor your code to v4, and should absolutely not continue to use the old v3 A
#define _EFFECT_PRAGMA(cmd) _Pragma(#cmd)
-#if DISABLE_DEPRECATION_WARNINGS
+#if QUEST_DISABLE_DEPRECATION_WARNINGS
#define _WARN_TYPE_RENAMED(oldname, newname)
@@ -449,13 +449,6 @@ typedef enum pauliOpType _NoWarnPauliOpType;
"setDensityQuregAmps(Qureg, qindex startRow, qindex startCol, qcomp** amps, qindex numRows, qindex numCols)")
-#define getQuESTSeeds(...) \
- _ERROR_GENERAL_MSG( \
- "The QuEST function 'getQuESTSeeds(QuESTEnv env, unsigned long int* out, int numOut)' has been deprecated. " \
- "Please instead use 'getSeeds(unsigned* out)' which accepts a pointer to pre-allocated memory of length " \
- "equal to that returned by 'getNumSeeds()'. We cannot automatically invoke this replacement routine." )
-
-
#define applyPhaseFunc(...) \
_ERROR_PHASE_FUNC_REMOVED("applyPhaseFunc")
@@ -548,17 +541,44 @@ typedef enum pauliOpType _NoWarnPauliOpType;
#define _GET_ENVIRONMENT_STRING_1(str) \
- getEnvironmentString(str)
+ getQuESTEnvironmentString(str)
#define _GET_ENVIRONMENT_STRING_2(str) \
- _WARN_FUNC_NOW_HAS_FEWER_ARGS("getEnvironmentString(QuESTEnv, char[200])", "getEnvironmentString(char[200])") \
+ _WARN_FUNC_NOW_HAS_FEWER_ARGS("getQuESTEnvironmentString(QuESTEnv, char[200])", "getQuESTEnvironmentString(char[200])") \
_GET_ENVIRONMENT_STRING_1(str)
-#define getEnvironmentString(...) \
+#define getQuESTEnvironmentString(...) \
_CALL_MACRO_WITH_1_OR_2_ARGS(_GET_ENVIRONMENT_STRING, __VA_ARGS__)
+/*
+ * FUNCTIONS WITH THE SAME NAME BUT 1 INSTEAD OF 3 ARGS
+ *
+ * which are handled similar to above
+ */
+
+
+#define _GET_MACRO_WITH_1_OR_3_ARGS(_1, _2, _3, macroname, ...) macroname
+
+#define _CALL_MACRO_WITH_1_OR_3_ARGS(prefix, ...) \
+ _GET_MACRO_WITH_1_OR_3_ARGS(__VA_ARGS__, prefix##_3, prefix##_2, prefix##_1)(__VA_ARGS__)
+
+
+#define _GET_QUEST_SEEDS_1(out) \
+ getQuESTSeeds(out)
+
+#define _GET_QUEST_SEEDS_3(env, out, numOut) \
+ _WARN_FUNC_NOW_HAS_FEWER_ARGS( \
+ "getQuESTSeeds(QuESTEnv env, unsigned long int* out, int numOut)", \
+ "getQuESTSeeds(unsigned* out)") \
+ _GET_QUEST_SEEDS_1(out)
+
+#define getQuESTSeeds(...) \
+ _CALL_MACRO_WITH_1_OR_3_ARGS(_GET_QUEST_SEEDS, __VA_ARGS__)
+
+
+
/*
* FUNCTIONS WITH THE SAME NAME BUT 0 INSTEAD OF 1 ARGS
*
@@ -657,10 +677,10 @@ static inline void v3_mixKrausMap(Qureg qureg, int targ, _NoWarnComplexMatrix2 *
static inline void _mixNonTPKrausMap(Qureg qureg, int targ, _NoWarnComplexMatrix2 *ops, int numOps) {
- qreal eps = getValidationEpsilon();
- setValidationEpsilon(0);
+ qreal eps = getQuESTValidationEpsilon();
+ setQuESTValidationEpsilon(0);
_MIX_KRAUS_MAP_INNER(qureg, ops, numOps, &targ, 1);
- setValidationEpsilon(eps);
+ setQuESTValidationEpsilon(eps);
}
#define mixNonTPKrausMap(...) \
@@ -673,10 +693,10 @@ static inline void _mixNonTPKrausMap(Qureg qureg, int targ, _NoWarnComplexMatrix
static inline void _mixTwoQubitKrausMap(Qureg qureg, int targ1, int targ2, _NoWarnComplexMatrix4 *ops, int numOps, int isNonCPTP) {
int targs[] = {targ1, targ2};
- qreal eps = getValidationEpsilon();
- if (isNonCPTP) setValidationEpsilon(0);
+ qreal eps = getQuESTValidationEpsilon();
+ if (isNonCPTP) setQuESTValidationEpsilon(0);
_MIX_KRAUS_MAP_INNER(qureg, ops, numOps, targs, 2);
- setValidationEpsilon(eps);
+ setQuESTValidationEpsilon(eps);
}
#define mixTwoQubitKrausMap(...) \
@@ -703,11 +723,11 @@ static inline void _mixMultiQubitKrausMap(Qureg qureg, int* targs, int numTargs,
setKrausMap(map, ptrs);
free(ptrs);
- qreal eps = getValidationEpsilon();
- if (isNonCPTP) setValidationEpsilon(0);
+ qreal eps = getQuESTValidationEpsilon();
+ if (isNonCPTP) setQuESTValidationEpsilon(0);
(mixKrausMap)(qureg, targs, numTargs, map); // calls above macro, wrapped to avoid warning */
destroyKrausMap(map);
- setValidationEpsilon(eps);
+ setQuESTValidationEpsilon(eps);
}
#define mixMultiQubitKrausMap(...) \
@@ -827,16 +847,16 @@ static inline QuESTEnv _createQuESTEnv() {
leftapplyDiagMatr(__VA_ARGS__)
static inline void _applyGateSubDiagonalOp(Qureg qureg, int* targets, int numTargets, DiagMatr op) {
- qreal eps = getValidationEpsilon();
- setValidationEpsilon(0);
+ qreal eps = getQuESTValidationEpsilon();
+ setQuESTValidationEpsilon(0);
applyDiagMatr(qureg, targets, numTargets, op);
- setValidationEpsilon(eps);
+ setQuESTValidationEpsilon(eps);
}
#define applyGateSubDiagonalOp(...) \
_WARN_GENERAL_MSG( \
"The QuEST function 'applyGateSubDiagonalOp()' is deprecated. To achieve the same thing, disable " \
- "numerical validation via 'setValidationEpsilon(0)' before calling 'applyDiagMatr()'. You can " \
- "save the existing epsilon via 'getValidationEpsilon()' to thereafter restore. This procedure " \
+ "numerical validation via 'setQuESTValidationEpsilon(0)' before calling 'applyDiagMatr()'. You can " \
+ "save the existing epsilon via 'getQuESTValidationEpsilon()' to thereafter restore. This procedure " \
"has been performed here automatically.") \
_applyGateSubDiagonalOp(__VA_ARGS__)
@@ -1131,32 +1151,32 @@ static inline void _applyPauliHamil(Qureg inQureg, PauliStrSum hamil, Qureg outQ
static inline void _applyGateMatrixN(Qureg qureg, int* targs, int numTargs, CompMatr u) {
- qreal eps = getValidationEpsilon();
- setValidationEpsilon(0);
+ qreal eps = getQuESTValidationEpsilon();
+ setQuESTValidationEpsilon(0);
applyCompMatr(qureg, targs, numTargs, u);
- setValidationEpsilon(eps);
+ setQuESTValidationEpsilon(eps);
}
#define applyGateMatrixN(...) \
_WARN_GENERAL_MSG( \
"The QuEST function 'applyGateMatrixN()' is deprecated. To achieve the same thing, disable " \
- "numerical validation via 'setValidationEpsilon(0)' before calling 'applyCompMatr()'. You can " \
- "save the existing epsilon via 'getValidationEpsilon()' to thereafter restore. This procedure " \
+ "numerical validation via 'setQuESTValidationEpsilon(0)' before calling 'applyCompMatr()'. You can " \
+ "save the existing epsilon via 'getQuESTValidationEpsilon()' to thereafter restore. This procedure " \
"has been performed here automatically.") \
_applyGateMatrixN(__VA_ARGS__)
static inline void _applyMultiControlledGateMatrixN(Qureg qureg, int* ctrls, int numCtrls, int* targs, int numTargs, CompMatr u) {
- qreal eps = getValidationEpsilon();
- setValidationEpsilon(0);
+ qreal eps = getQuESTValidationEpsilon();
+ setQuESTValidationEpsilon(0);
applyMultiControlledCompMatr(qureg, ctrls, numCtrls, targs, numTargs, u);
- setValidationEpsilon(eps);
+ setQuESTValidationEpsilon(eps);
}
#define applyMultiControlledGateMatrixN(...) \
_WARN_GENERAL_MSG( \
"The QuEST function 'applyMultiControlledGateMatrixN()' is deprecated. To achieve the same thing, disable " \
- "numerical validation via 'setValidationEpsilon(0)' before calling 'applyMultiControlledCompMatr()'. You can " \
- "save the existing epsilon via 'getValidationEpsilon()' to thereafter restore. This procedure has been " \
+ "numerical validation via 'setQuESTValidationEpsilon(0)' before calling 'applyMultiControlledCompMatr()'. You can " \
+ "save the existing epsilon via 'getQuESTValidationEpsilon()' to thereafter restore. This procedure has been " \
"performed here automatically.") \
_applyMultiControlledGateMatrixN(__VA_ARGS__)
@@ -1331,12 +1351,12 @@ static inline void _multiControlledMultiRotatePauli(Qureg qureg, int* ctrls, int
#define seedQuESTDefault(...) \
- _WARN_FUNC_RENAMED("seedQuESTDefault(QuESTEnv)", "setSeedsToDefault()") \
- setSeedsToDefault()
+ _WARN_FUNC_RENAMED("seedQuESTDefault(QuESTEnv)", "setQuESTSeedsToDefault()") \
+ setQuESTSeedsToDefault()
#define seedQuEST(env, seeds, numSeeds) \
- _WARN_FUNC_RENAMED("seedQuEST(QuESTEnv, unsigned long int*, int)", "setSeeds(unsigned*, int)") \
- setSeeds(seeds, numSeeds)
+ _WARN_FUNC_RENAMED("seedQuEST(QuESTEnv, unsigned long int*, int)", "setQuESTSeeds(unsigned*, int)") \
+ setQuESTSeeds(seeds, numSeeds)
diff --git a/quest/include/environment.h b/quest/include/environment.h
index 04f24bfe2..cdefa7d7d 100644
--- a/quest/include/environment.h
+++ b/quest/include/environment.h
@@ -14,6 +14,8 @@
#ifndef ENVIRONMENT_H
#define ENVIRONMENT_H
+#include
+
// enable invocation by both C and C++ binaries
#ifdef __cplusplus
extern "C" {
@@ -33,15 +35,17 @@ extern "C" {
typedef struct {
// deployment modes which can be runtime disabled
- int isMultithreaded;
- int isGpuAccelerated;
- int isDistributed;
+ bool isMultithreaded;
+ bool isGpuAccelerated;
+ bool isDistributed;
+ bool isMpiUserOwned;
// deployment modes which cannot be directly changed after compilation
- int isCuQuantumEnabled;
+ bool isCuQuantumEnabled;
// deployment configurations which can be changed via environment variables
int isGpuSharingEnabled;
+ int isMpiGpuAware;
// distributed configuration
int rank;
diff --git a/quest/include/experimental.h b/quest/include/experimental.h
new file mode 100644
index 000000000..8c2cc4e0a
--- /dev/null
+++ b/quest/include/experimental.h
@@ -0,0 +1,110 @@
+/** @file
+ * Experimental functions which are liable to
+ * API breaks within QuEST minor version releases.
+ * Some optional functions require compiling this
+ * file against MPI, despite being outside of /comm/,
+ * and so require opt-in macros (QUEST_COMPILE_SUBCOMM)
+ *
+ * @author Oliver Brown
+ * @author Tyson Jones (formatting)
+ *
+ * @defgroup experimental Experimental
+ * @ingroup api
+ * @brief Experimental functions with tentative APIs
+ * @{
+ */
+
+#ifndef EXPERIMENTAL_H
+#define EXPERIMENTAL_H
+
+#include "quest/include/config.h"
+
+#if QUEST_COMPILE_SUBCOMM && ! QUEST_COMPILE_MPI
+ #error "Macro QUEST_COMPILE_SUBCOMM was true, but QUEST_COMPILE_MPI was illegally false."
+#endif
+
+#if QUEST_COMPILE_SUBCOMM
+ #include
+#endif
+
+// enable invocation by both C and C++ binaries
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** @notyetdoced
+ *
+ * Advanced initialiser which lets the user positively declare that they take responsibility for MPI.
+ * This means we assume they have called MPI_Init, and that they will call MPI_Finalize.
+ *
+ * @author Oliver Brown
+ */
+void initCustomMpiQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread);
+
+
+#if QUEST_COMPILE_SUBCOMM
+/** @notyetdoced
+ *
+ * Advanced initialiser which allows the user to provide an MPI communicator for QuEST to use.
+ * Use of this initialiser implies userOwnsMpi = true, (exposed by initCustomMpiQuESTEnv) and
+ * therefore that they have already initialised MPI, and they will call MPI_Finalize at the
+ * appropriate time.
+ *
+ * The user-provided MPI communicator undergoes the same validation procedure as any that QuEST
+ * would use, and so must contain a power-of-2 number of processes.
+ *
+ * This function is only compiled and exposed when macro QUEST_COMPILE_SUBCOMM is 1, as is
+ * defined when providing CMake option QUEST_ENABLE_SUBCOMM during building.
+ *
+ * @author Oliver Brown
+ */
+void initCustomMpiCommQuESTEnv(MPI_Comm questComm, int useGpuAccel, int useMultithread);
+#endif // QUEST_COMPILE_SUBCOMM
+
+
+/** @notyetdoced
+ *
+ * @author Oliver Brown
+ */
+int getQuESTNumGpuThreadsPerBlock();
+
+
+/** Overrides the number of CUDA threads per block (or @p blockDim) used by QuEST's GPU-accelerated backend.
+ *
+ * This changes the GPU parallelisation granularity and can affect performance, and is useful
+ * for performance tuning or diagnostics. Before this function is called, QuEST will use the
+ * number as specified by the environment variable @p QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK,
+ * if defined. Otherwise, it will use the value specified by the CMake/compile option of the
+ * same name, which itself presently defaults to @p 128. After this function is called, QuEST
+ * will adopt @p numThreadsPerBlock for the remainder of execution, or until this function is
+ * called again.
+ *
+ * Practical values of @p numThreadsPerBlock can vary with the simulation size, the user's GPU hardware,
+ * and whether it is NVIDIA or AMD, which have respective warp sizes of @p 32 and @p 64.
+ *
+ * @note
+ * This function has no effect when QuEST is not deployed with GPU-acceleration enabled.
+ *
+ * @param[in] numThreadsPerBlock the new block size.
+ * @throws @validationerror
+ * - if the @p QuESTEnv is not initialised.
+ * - if @p numThreadsPerBlock is negative.
+ * - if @p numThreadsPerBlock is not a multiple of the GPU warp size.
+ * - if @p numThreadsPerBlock exceeds the maximum @p blockDim imposed by the GPU hardware.
+ * @see
+ * - QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK
+ * @author Oliver Brown
+ * @author Tyson Jones
+ */
+void setQuESTNumGpuThreadsPerBlock(int numThreadsPerBlock);
+
+
+// end de-mangler
+#ifdef __cplusplus
+}
+#endif
+
+#endif // EXPERIMENTAL_H
+
+/** @} */ // (end file-wide doxygen defgroup)
diff --git a/quest/include/modes.h b/quest/include/modes.h
index f8fc52a1c..25ad8bb54 100644
--- a/quest/include/modes.h
+++ b/quest/include/modes.h
@@ -43,39 +43,77 @@
* - forbid sharing: @p 0, @p '0', @p '', @p , (unspecified)
* - permit sharing: @p 1, @p '1'
*
+ * @constraints
+ * The function initQuESTEnv() will throw a validation error if any of the below are not satisfied.
+ * - The specified string does not evaluate to an integer @p 0 or @p 1.
+ *
* @author Tyson Jones
*/
- const int PERMIT_NODES_TO_SHARE_GPU = 0;
+ const int QUEST_PERMIT_NODES_TO_SHARE_GPU = 0;
/** @envvardoc
*
* Specifies the default validation epsilon.
*
- * Specifying `DEFAULT_VALIDATION_EPSILON` to a positive, real number overrides the
+ * Specifying `QUEST_DEFAULT_VALIDATION_EPSILON` to a positive, real number overrides the
* precision-specific default (`1E-5`, `1E-12`, `1E-13` for single, double and quadruple
* precision respectively). The specified epsilon is used by QuEST for numerical validation
- * unless overriden at runtime via setValidationEpsilon(), in which case it can be
- * restored to that specified by this environment variable using setValidationEpsilonToDefault().
+ * unless overriden at runtime via setQuESTValidationEpsilon(), in which case it can be
+ * restored to that specified by this environment variable using setQuESTValidationEpsilonToDefault().
*
* @envvarvalues
- * - setting @p DEFAULT_VALIDATION_EPSILON=0 disables numerical validation, as if the value
+ * - setting @p QUEST_DEFAULT_VALIDATION_EPSILON=0 disables numerical validation, as if the value
* were instead infinity.
- * - setting @p DEFAULT_VALIDATION_EPSILON='' is equivalent to _not_ specifying the variable,
+ * - setting @p QUEST_DEFAULT_VALIDATION_EPSILON='' is equivalent to _not_ specifying the variable,
* adopting instead the precision-specific default above.
- * - setting @p DEFAULT_VALIDATION_EPSILON=x where `x` is a positive, valid `qreal` in any
+ * - setting @p QUEST_DEFAULT_VALIDATION_EPSILON=x where `x` is a positive, valid `qreal` in any
* format accepted by `C` or `C++` (e.g. `0.01`, `1E-2`, `+1e-2`) will use `x` as the
* default validation epsilon.
*
* @constraints
- * The function initQuESTEnv() will throw a validation error if:
+ * The function initQuESTEnv() will throw a validation error if any of the below are not satisfied.
* - The specified epsilon must be `0` or positive.
* - The specified epsilon must not exceed that maximum or minimum value which can be stored
* in a `qreal`, which is specific to its precision.
*
* @author Tyson Jones
*/
- const qreal DEFAULT_VALIDATION_EPSILON = 0;
+ const qreal QUEST_DEFAULT_VALIDATION_EPSILON = 0;
+
+
+ /** @envvardoc
+ *
+ * Specifies the default number of threads per block (or "block dimension") used by GPU acceleration.
+ *
+ * The number of dispatched CUDA threads per block controls the parallelisation granularity of
+ * QuEST's GPU backend, affecting performance.
+ * Specifying `QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK` to a valid, positive integer overrides
+ * QuEST's default otherwise set during compilation via a CMake option of the same name. If
+ * that CMake option was not set, the default is assumed to be @p 128.
+ *
+ * The number specified by this environment variable will be used as the block dimension by all of
+ * QuEST's GPU backend functions, unless overridden at runtime via setQuESTNumGpuThreadsPerBlock().
+ * The actual number of threads per block used at any time can be queried via
+ * getQuESTNumGpuThreadsPerBlock(), or reported by reportQuESTEnv().
+ *
+ * @envvarvalues
+ * - use internal default of `128`: @p '', @p , (unspecified)
+ * - use number `x`: @p x, @p 'x', @p '+x'
+ *
+ * @constraints
+ * The function initQuESTEnv() will throw a validation error if any of the below are not satisfied.
+ * - The specified number must be a positive integer.
+ * - The specified number must not exceed the minimum or maximum value which can be stored in an @p int.
+ * - The specified number must be divisible by the GPU warp size, which is 32 or 64, depending on
+ * whether deployed to an NVIDIA or AMD GPU. This restriction is imposed even when QuEST is not
+ * deployed with GPU-acceleration.
+ * - The specified number exceeds the maximum imposed by the available GPU hardware.
+ *
+ * @author Oliver Brown
+ * @author Tyson Jones
+ */
+ const qreal QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK = 0;
#endif
diff --git a/quest/include/operations.h b/quest/include/operations.h
index ea4a316ae..3c97d2c61 100644
--- a/quest/include/operations.h
+++ b/quest/include/operations.h
@@ -95,7 +95,7 @@ digraph {
* @f[
\max\limits_{ij} \Big|\left(\hat{U} \hat{U}^\dagger - \id\right)_{ij}\Big|^2 \le \valeps
* @f]
- * where the validation epsilon @f$ \valeps @f$ can be adjusted with setValidationEpsilon().
+ * where the validation epsilon @f$ \valeps @f$ can be adjusted with setQuESTValidationEpsilon().
*
* @myexample
* ```
@@ -194,7 +194,7 @@ digraph {
* @f[
\max\limits_{ij} \Big|\left(\hat{U} \hat{U}^\dagger - \id\right)_{ij}\Big|^2 \le \valeps
* @f]
- * where the validation epsilon @f$ \valeps @f$ can be adjusted with setValidationEpsilon().
+ * where the validation epsilon @f$ \valeps @f$ can be adjusted with setQuESTValidationEpsilon().
*
* @equivalences
*
@@ -573,7 +573,7 @@ void applyMultiControlledCompMatr2(Qureg qureg, std::vector controls, int t
/// @notyetdoced
/// @cppvectoroverload
/// @see applyMultiStateControlledCompMatr2()
-void applyMultiStateControlledCompMatr2(Qureg qureg, std::vector controls, std::vector states, int numControls, int target1, int target2, CompMatr2 matr);
+void applyMultiStateControlledCompMatr2(Qureg qureg, std::vector controls, std::vector states, int target1, int target2, CompMatr2 matr);
#endif // __cplusplus
diff --git a/quest/include/precision.h b/quest/include/precision.h
index d37b9a2d3..7b932e678 100644
--- a/quest/include/precision.h
+++ b/quest/include/precision.h
@@ -77,16 +77,16 @@
*/
// validate precision is 1 (float), 2 (double) or 4 (long double)
-#if ! (FLOAT_PRECISION == 1 || FLOAT_PRECISION == 2 || FLOAT_PRECISION == 4)
- #error "FLOAT_PRECISION must be 1 (float), 2 (double) or 4 (long double)"
+#if ! (QUEST_FLOAT_PRECISION == 1 || QUEST_FLOAT_PRECISION == 2 || QUEST_FLOAT_PRECISION == 4)
+ #error "QUEST_FLOAT_PRECISION must be 1 (float), 2 (double) or 4 (long double)"
#endif
// infer floating-point type from precision
-#if FLOAT_PRECISION == 1
+#if QUEST_FLOAT_PRECISION == 1
#define FLOAT_TYPE float
-#elif FLOAT_PRECISION == 2
+#elif QUEST_FLOAT_PRECISION == 2
#define FLOAT_TYPE double
-#elif FLOAT_PRECISION == 4
+#elif QUEST_FLOAT_PRECISION == 4
#define FLOAT_TYPE long double
#endif
@@ -96,13 +96,13 @@
/// @notyetdoced
/// @macrodoc
///
- /// (note this macro is informed by the FLOAT_PRECISION CMake variable)
- const int FLOAT_PRECISION = 2;
+ /// (note this macro is informed by the QUEST_FLOAT_PRECISION CMake variable)
+ const int QUEST_FLOAT_PRECISION = 2;
/// @notyetdoced
/// @macrodoc
///
- /// (note this macro is informed by the FLOAT_PRECISION CMake variable)
+ /// (note this macro is informed by the QUEST_FLOAT_PRECISION CMake variable)
typedef double int FLOAT_TYPE;
#endif
@@ -113,8 +113,8 @@
* CHECK PRECISION TYPES ARE COMPATIBLE WITH DEPLOYMENT
*/
-#if COMPILE_CUDA && (FLOAT_PRECISION == 4)
- #error "A quad floating-point precision (FLOAT_PRECISION=4, i.e. long double) is not supported by GPU deployment"
+#if QUEST_COMPILE_CUDA && (QUEST_FLOAT_PRECISION == 4)
+ #error "A quad floating-point precision (QUEST_FLOAT_PRECISION=4, i.e. long double) is not supported by GPU deployment"
#endif
@@ -125,14 +125,14 @@
* which is pre-run-time overridable by specifying the corresponding environment variable.
*/
-#if FLOAT_PRECISION == 1
- #define UNSPECIFIED_DEFAULT_VALIDATION_EPSILON 1E-5
+#if QUEST_FLOAT_PRECISION == 1
+ #define QUEST_UNSPECIFIED_DEFAULT_VALIDATION_EPSILON 1E-5
-#elif FLOAT_PRECISION == 2
- #define UNSPECIFIED_DEFAULT_VALIDATION_EPSILON 1E-12
+#elif QUEST_FLOAT_PRECISION == 2
+ #define QUEST_UNSPECIFIED_DEFAULT_VALIDATION_EPSILON 1E-12
-#elif FLOAT_PRECISION == 4
- #define UNSPECIFIED_DEFAULT_VALIDATION_EPSILON 1E-13
+#elif QUEST_FLOAT_PRECISION == 4
+ #define QUEST_UNSPECIFIED_DEFAULT_VALIDATION_EPSILON 1E-13
#endif
@@ -142,13 +142,13 @@
* PRECISION-AGNOSTIC CONVENIENCE MACROS
*/
-#if FLOAT_PRECISION == 1
+#if QUEST_FLOAT_PRECISION == 1
#define QREAL_FORMAT_SPECIFIER "%.8g"
-#elif FLOAT_PRECISION == 2
+#elif QUEST_FLOAT_PRECISION == 2
#define QREAL_FORMAT_SPECIFIER "%.14g"
-#elif FLOAT_PRECISION == 4
+#elif QUEST_FLOAT_PRECISION == 4
#define QREAL_FORMAT_SPECIFIER "%.17Lg"
#endif
diff --git a/quest/include/quest.h b/quest/include/quest.h
index 409253ff8..da1c778e2 100644
--- a/quest/include/quest.h
+++ b/quest/include/quest.h
@@ -38,6 +38,7 @@
#include "quest/include/debug.h"
#include "quest/include/decoherence.h"
#include "quest/include/environment.h"
+#include "quest/include/experimental.h"
#include "quest/include/trotterisation.h"
#include "quest/include/initialisations.h"
#include "quest/include/channels.h"
@@ -49,7 +50,7 @@
#include "quest/include/wrappers.h"
-#if INCLUDE_DEPRECATED_FUNCTIONS
+#if QUEST_INCLUDE_DEPRECATED_FUNCTIONS
#include "quest/include/deprecated.h"
#endif
diff --git a/quest/include/qureg.h b/quest/include/qureg.h
index f3284fa14..4ff4c5627 100644
--- a/quest/include/qureg.h
+++ b/quest/include/qureg.h
@@ -281,10 +281,10 @@ Qureg createForcedDensityQureg(int numQubits);
* @par Memory
* The total allocated memory depends on all parameters (_except_
* @p useMultithread), and the size of the variable-precision @c qcomp used to represent each
- * amplitude. This is determined by preprocessor @c FLOAT_PRECISION via
*
+ * amplitude. This is determined by preprocessor @c QUEST_FLOAT_PRECISION via
*
- * | @c FLOAT_PRECISION | @c qcomp size (bytes) |
+ * | @c QUEST_FLOAT_PRECISION | @c qcomp size (bytes) |
* | --- | --- |
* | 1 | 8 |
* | 2 | 16 |
@@ -310,7 +310,7 @@ Qureg createForcedDensityQureg(int numQubits);
* | 1 | 1 | @f$ 2 \, B \, D \, / \, W @f$ | @f$ 2 \, B \, D @f$ | @f$ 2 \, B \, D \, / \, W @f$ | @f$ 2 \, B \, D @f$ | @f$ 4 \, B \, D @f$ |
*
*
- * For illustration, using the default @c FLOAT_PRECISION=2 whereby @f$ B = 16 @f$ bytes, the RAM _per node_
+ * For illustration, using the default @c QUEST_FLOAT_PRECISION=2 whereby @f$ B = 16 @f$ bytes, the RAM _per node_
* over varying distributions is:
*
*
diff --git a/quest/include/trotterisation.h b/quest/include/trotterisation.h
index 6fd493264..59600c9d9 100644
--- a/quest/include/trotterisation.h
+++ b/quest/include/trotterisation.h
@@ -138,7 +138,7 @@ extern "C" {
* @f[
\max\limits_{i} |c_i| \le \valeps
* @f]
- * where the validation epsilon @f$ \valeps @f$ can be adjusted with setValidationEpsilon().
+ * where the validation epsilon @f$ \valeps @f$ can be adjusted with setQuESTValidationEpsilon().
* Otherwise, use applyTrotterizedNonUnitaryPauliStrSumGadget() to permit non-Hermitian @p sum
* and ergo effect a non-unitary exponential(s).
*
@@ -352,7 +352,7 @@ extern "C" {
* @f[
\max\limits_{i} |c_i| \le \valeps
* @f]
- * where the validation epsilon @f$ \valeps @f$ can be adjusted with setValidationEpsilon(). The imaginary components
+ * where the validation epsilon @f$ \valeps @f$ can be adjusted with setQuESTValidationEpsilon(). The imaginary components
* of the Hamiltonian _are_ considered during simulation.
*
* - The @p time parameter is necessarily real to retain unitarity. It can be substituted for a strictly imaginary
@@ -488,7 +488,7 @@ void applyTrotterizedUnitaryTimeEvolution(Qureg qureg, PauliStrSum hamil, qreal
* @f[
\max\limits_{i} |c_i| \le \valeps
* @f]
- * where the validation epsilon @f$ \valeps @f$ can be adjusted with setValidationEpsilon(). Beware however that
+ * where the validation epsilon @f$ \valeps @f$ can be adjusted with setQuESTValidationEpsilon(). Beware however that
* imaginary-time evolution under a non-Hermitian Hamiltonian will _not_ necessarily approach the lowest lying eigenstate
* (the eigenvalues may be non-real) so is likely of limited utility.
*
@@ -604,8 +604,8 @@ void applyTrotterizedImaginaryTimeEvolution(Qureg qureg, PauliStrSum hamil, qrea
* @f[
\min\limits_{i} \gamma_i \ge - \valeps
* @f]
- * where the validation epsilon @f$ \valeps @f$ can be adjusted with setValidationEpsilon(). Non-trace-preserving,
- * negative damping rates can be simulated by disabling numerical validation via `setValidationEpsilon(0)`.
+ * where the validation epsilon @f$ \valeps @f$ can be adjusted with setQuESTValidationEpsilon(). Non-trace-preserving,
+ * negative damping rates can be simulated by disabling numerical validation via `setQuESTValidationEpsilon(0)`.
*
* - The @p time parameter is necessarily real, and cannot be generalised to imaginary or complex like in other
* functions. Generalisation is trivially numerically possible, but has no established physical meaning and so
diff --git a/quest/include/types.h b/quest/include/types.h
index f1f49315d..ac0ef36c1 100644
--- a/quest/include/types.h
+++ b/quest/include/types.h
@@ -53,13 +53,13 @@ typedef INDEX_TYPE qindex;
// which is either MSVC's custom C complex...
#ifdef _MSC_VER
- #if (FLOAT_PRECISION == 1)
+ #if (QUEST_FLOAT_PRECISION == 1)
typedef _Fcomplex qcomp;
- #elif (FLOAT_PRECISION == 2)
+ #elif (QUEST_FLOAT_PRECISION == 2)
typedef _Dcomplex qcomp;
- #elif (FLOAT_PRECISION == 4)
+ #elif (QUEST_FLOAT_PRECISION == 4)
typedef _Lcomplex qcomp;
#endif
diff --git a/quest/src/api/CMakeLists.txt b/quest/src/api/CMakeLists.txt
index 0979f2f6c..7f90dcf17 100644
--- a/quest/src/api/CMakeLists.txt
+++ b/quest/src/api/CMakeLists.txt
@@ -5,6 +5,7 @@ target_sources(QuEST
debug.cpp
decoherence.cpp
environment.cpp
+ experimental.cpp
initialisations.cpp
matrices.cpp
modes.cpp
@@ -14,4 +15,4 @@ target_sources(QuEST
qureg.cpp
trotterisation.cpp
types.cpp
-)
\ No newline at end of file
+)
diff --git a/quest/src/api/calculations.cpp b/quest/src/api/calculations.cpp
index 1143a4ecd..47e5d8a63 100644
--- a/quest/src/api/calculations.cpp
+++ b/quest/src/api/calculations.cpp
@@ -12,6 +12,7 @@
#include "quest/include/calculations.h"
#include "quest/src/core/validation.hpp"
+#include "quest/src/core/lists.hpp"
#include "quest/src/core/utilities.hpp"
#include "quest/src/core/localiser.hpp"
#include "quest/src/core/bitwise.hpp"
@@ -253,12 +254,12 @@ qreal calcProbOfMultiQubitOutcome(Qureg qureg, int* qubits, int* outcomes, int n
validate_targets(qureg, qubits, numQubits, __func__);
validate_measurementOutcomesAreValid(outcomes, numQubits, __func__);
- auto qubitVec = util_getVector(qubits, numQubits);
- auto outcomeVec = util_getVector(outcomes, numQubits);
+ auto qubitList = lists_getList64(qubits, numQubits);
+ auto outcomeList = lists_getList64(outcomes, numQubits);
return (qureg.isDensityMatrix)?
- localiser_densmatr_calcProbOfMultiQubitOutcome(qureg, qubitVec, outcomeVec):
- localiser_statevec_calcProbOfMultiQubitOutcome(qureg, qubitVec, outcomeVec);
+ localiser_densmatr_calcProbOfMultiQubitOutcome(qureg, qubitList, outcomeList):
+ localiser_statevec_calcProbOfMultiQubitOutcome(qureg, qubitList, outcomeList);
}
@@ -267,11 +268,11 @@ void calcProbsOfAllMultiQubitOutcomes(qreal* outcomeProbs, Qureg qureg, int* qub
validate_targets(qureg, qubits, numQubits, __func__);
validate_measurementOutcomesFitInGpuMem(qureg, numQubits, __func__);
- auto qubitVec = util_getVector(qubits, numQubits);
+ auto qubitList = lists_getList64(qubits, numQubits);
(qureg.isDensityMatrix)?
- localiser_densmatr_calcProbsOfAllMultiQubitOutcomes(outcomeProbs, qureg, qubitVec):
- localiser_statevec_calcProbsOfAllMultiQubitOutcomes(outcomeProbs, qureg, qubitVec);
+ localiser_densmatr_calcProbsOfAllMultiQubitOutcomes(outcomeProbs, qureg, qubitList):
+ localiser_statevec_calcProbsOfAllMultiQubitOutcomes(outcomeProbs, qureg, qubitList);
}
@@ -383,7 +384,7 @@ Qureg calcPartialTrace(Qureg qureg, int* traceOutQubits, int numTraceQubits) {
qureg.isGpuAccelerated, qureg.isMultithreaded, __func__);
// set it to reduced density matrix
- auto targets = util_getVector(traceOutQubits, numTraceQubits);
+ auto targets = lists_getList64(traceOutQubits, numTraceQubits);
localiser_densmatr_partialTrace(qureg, out, targets);
return out;
@@ -396,7 +397,7 @@ Qureg calcReducedDensityMatrix(Qureg qureg, int* retainQubits, int numRetainQubi
validate_targets(qureg, retainQubits, numRetainQubits, __func__);
validate_quregCanBeReduced(qureg, qureg.numQubits - numRetainQubits, __func__);
- auto traceQubits = util_getNonTargetedQubits(retainQubits, numRetainQubits, qureg.numQubits);
+ auto traceQubits = util_getNonTargetedQubits(lists_getList64(retainQubits, numRetainQubits), qureg.numQubits);
// harmlessly re-validates
return calcPartialTrace(qureg, traceQubits.data(), traceQubits.size());
diff --git a/quest/src/api/channels.cpp b/quest/src/api/channels.cpp
index d6e3ac4fb..c6702438a 100644
--- a/quest/src/api/channels.cpp
+++ b/quest/src/api/channels.cpp
@@ -107,7 +107,7 @@ void freeAllMemoryIfAnyAllocsFailed(T& obj) {
// determine whether any node experienced a failure
bool anyFail = didAnyLocalAllocsFail(obj);
- if (comm_isInit())
+ if (comm_isActive())
anyFail = comm_isTrueOnAllNodes(anyFail);
// if so, free all memory before subsequent validation
@@ -456,11 +456,15 @@ extern "C" void reportSuperOp(SuperOp op) {
size_t elemMem = mem_getLocalSuperOpMemoryRequired(op.numQubits);
size_t structMem = sizeof(op);
+ printer_sync();
+
print_header(op, elemMem + structMem);
print_elems(op);
// exclude mandatory newline above
print_oneFewerNewlines();
+
+ printer_sync();
}
@@ -479,6 +483,8 @@ extern "C" void reportKrausMap(KrausMap map) {
size_t superMem = mem_getLocalSuperOpMemoryRequired(map.superop.numQubits);
size_t strucMem = sizeof(map);
+ printer_sync();
+
// gauranteed not to overflow
size_t totalMem = krausMem + superMem + strucMem;
print_header(map, totalMem);
@@ -486,4 +492,6 @@ extern "C" void reportKrausMap(KrausMap map) {
// exclude mandatory newline above
print_oneFewerNewlines();
+
+ printer_sync();
}
diff --git a/quest/src/api/debug.cpp b/quest/src/api/debug.cpp
index 82146da2a..e6c6b9f2a 100644
--- a/quest/src/api/debug.cpp
+++ b/quest/src/api/debug.cpp
@@ -34,7 +34,7 @@ extern "C" {
*/
-void setSeeds(unsigned* seeds, int numSeeds) {
+void setQuESTSeeds(unsigned* seeds, int numSeeds) {
validate_envIsInit(__func__);
validate_randomSeeds(seeds, numSeeds, __func__);
@@ -42,20 +42,20 @@ void setSeeds(unsigned* seeds, int numSeeds) {
rand_setSeeds(vector(seeds, seeds+numSeeds));
}
-void setSeedsToDefault() {
+void setQuESTSeedsToDefault() {
validate_envIsInit(__func__);
rand_setSeedsToDefault();
}
-int getNumSeeds() {
+int getQuESTNumSeeds() {
validate_envIsInit(__func__);
return rand_getNumSeeds();
}
-void getSeeds(unsigned* seeds) {
+void getQuESTSeeds(unsigned* seeds) {
validate_envIsInit(__func__);
auto vec = rand_getSeeds();
@@ -71,19 +71,19 @@ void getSeeds(unsigned* seeds) {
* VALIDATION
*/
-void setInputErrorHandler(void (*callback)(const char*, const char*)) {
+void setQuESTInputErrorHandler(void (*callback)(const char*, const char*)) {
validate_envIsInit(__func__);
validateconfig_setErrorHandler(callback);
}
-void setValidationOn() {
+void setQuESTValidationOn() {
validate_envIsInit(__func__);
validateconfig_enable();
}
-void setValidationOff() {
+void setQuESTValidationOff() {
validate_envIsInit(__func__);
// disables all validation and computation
@@ -97,7 +97,7 @@ void setValidationOff() {
}
-void setValidationEpsilon(qreal eps) {
+void setQuESTValidationEpsilon(qreal eps) {
validate_envIsInit(__func__);
validate_newEpsilonValue(eps, __func__);
@@ -105,14 +105,14 @@ void setValidationEpsilon(qreal eps) {
util_setEpsilonSensitiveHeapFlagsToUnknown();
}
-void setValidationEpsilonToDefault() {
+void setQuESTValidationEpsilonToDefault() {
validate_envIsInit(__func__);
validateconfig_setEpsilonToDefault();
util_setEpsilonSensitiveHeapFlagsToUnknown();
}
-qreal getValidationEpsilon() {
+qreal getQuESTValidationEpsilon() {
validate_envIsInit(__func__);
return validateconfig_getEpsilon();
@@ -125,7 +125,7 @@ qreal getValidationEpsilon() {
*/
-void setMaxNumReportedItems(qindex numRows, qindex numCols) {
+void setQuESTMaxNumReportedItems(qindex numRows, qindex numCols) {
validate_envIsInit(__func__);
validate_newMaxNumReportedScalars(numRows, numCols, __func__);
@@ -139,7 +139,7 @@ void setMaxNumReportedItems(qindex numRows, qindex numCols) {
}
-void setMaxNumReportedSigFigs(int numSigFigs) {
+void setQuESTMaxNumReportedSigFigs(int numSigFigs) {
validate_envIsInit(__func__);
validate_newMaxNumReportedSigFigs(numSigFigs, __func__);
@@ -147,7 +147,7 @@ void setMaxNumReportedSigFigs(int numSigFigs) {
}
-void setNumReportedNewlines(int numNewlines) {
+void setQuESTNumReportedNewlines(int numNewlines) {
validate_envIsInit(__func__);
validate_newNumReportedNewlines(numNewlines, __func__);
@@ -155,7 +155,7 @@ void setNumReportedNewlines(int numNewlines) {
}
-void setReportedPauliChars(const char* paulis) {
+void setQuESTReportedPauliChars(const char* paulis) {
validate_envIsInit(__func__);
validate_numPauliChars(paulis, __func__);
@@ -163,7 +163,7 @@ void setReportedPauliChars(const char* paulis) {
}
-void setReportedPauliStrStyle(int flag) {
+void setQuESTReportedPauliStrStyle(int flag) {
validate_envIsInit(__func__);
validate_reportedPauliStrStyleFlag(flag, __func__);
@@ -177,7 +177,7 @@ void setReportedPauliStrStyle(int flag) {
*/
-qindex getGpuCacheSize() {
+qindex getQuESTGpuCacheSize() {
validate_envIsInit(__func__);
if (getQuESTEnv().isGpuAccelerated)
@@ -188,7 +188,7 @@ qindex getGpuCacheSize() {
}
-void clearGpuCache() {
+void clearQuESTGpuCache() {
validate_envIsInit(__func__);
// safely do nothing if not GPU accelerated
@@ -206,19 +206,19 @@ void clearGpuCache() {
*/
-void setSeeds(vector seeds) {
- setSeeds(seeds.data(), seeds.size());
+void setQuESTSeeds(vector seeds) {
+ setQuESTSeeds(seeds.data(), seeds.size());
}
-vector getSeeds() {
+vector getQuESTSeeds() {
validate_envIsInit(__func__);
// allocate temp vector, and pedantically validate successful
vector out;
- int numSeeds = getNumSeeds();
+ int numSeeds = rand_getNumSeeds();
auto callback = [&]() { validate_tempListAllocSucceeded(false, numSeeds, sizeof(unsigned), __func__); };
util_tryAllocVector(out, numSeeds, callback);
- getSeeds(out.data());
+ getQuESTSeeds(out.data());
return out;
}
diff --git a/quest/src/api/decoherence.cpp b/quest/src/api/decoherence.cpp
index d2fadf621..4e1901f25 100644
--- a/quest/src/api/decoherence.cpp
+++ b/quest/src/api/decoherence.cpp
@@ -126,7 +126,7 @@ void mixKrausMap(Qureg qureg, int* qubits, int numQubits, KrausMap map) {
validate_krausMapIsCPTP(map, __func__); // also checks fields and is-sync
validate_krausMapMatchesTargets(map, numQubits, __func__);
- localiser_densmatr_krausMap(qureg, map, util_getVector(qubits, numQubits));
+ localiser_densmatr_krausMap(qureg, map, lists_getList64(qubits, numQubits));
}
@@ -149,7 +149,7 @@ void mixSuperOp(Qureg qureg, int* targets, int numTargets, SuperOp superop) {
validate_superOpDimMatchesTargs(superop, numTargets, __func__);
validate_mixedAmpsFitInNode(qureg, 2*numTargets, __func__); // superop acts on 2x
- localiser_densmatr_superoperator(qureg, superop, util_getVector(targets, numTargets));
+ localiser_densmatr_superoperator(qureg, superop, lists_getList64(targets, numTargets));
}
diff --git a/quest/src/api/environment.cpp b/quest/src/api/environment.cpp
index 541491899..c59334b55 100644
--- a/quest/src/api/environment.cpp
+++ b/quest/src/api/environment.cpp
@@ -48,7 +48,7 @@ using std::string;
*/
-static QuESTEnv* globalEnvPtr = nullptr;
+static QuESTEnv* global_envPtr = nullptr;
@@ -62,7 +62,7 @@ static QuESTEnv* globalEnvPtr = nullptr;
*/
-static bool hasEnvBeenFinalized = false;
+static bool global_hasEnvBeenFinalized = false;
@@ -71,12 +71,18 @@ static bool hasEnvBeenFinalized = false;
*/
-void validateAndInitCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMultithread, const char* caller) {
+void validateAndInitCustomQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread, const char* caller) {
// ensure that we are never re-initialising QuEST (even after finalize) because
- // this leads to undefined behaviour in distributed mode, as per the MPI
- validate_envNeverInit(globalEnvPtr != nullptr, hasEnvBeenFinalized, caller);
-
+ // this leads to undefined behaviour in distributed mode, as per the MPI std,
+ // regardless of whether the user owns MPI
+ validate_envNeverInit(global_envPtr != nullptr, global_hasEnvBeenFinalized, caller);
+
+ // load env-vars before validating deployment mode, because some env vars can
+ // affect validation (such as QUEST_PERMIT_NODES_TO_SHARE_GPU). note that
+ // some env-vars (like QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK) will be here
+ // validated to have a correct format (like an int), but the validity of its
+ // actual value will be checked later (since it requires deciding GPU-accel).
envvars_validateAndLoadEnvVars(caller);
validateconfig_setEpsilonToDefault();
@@ -86,15 +92,19 @@ void validateAndInitCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMulti
// by mpirun believe they are each the main rank. This seems unavoidable.
validate_newEnvDeploymentMode(useDistrib, useGpuAccel, useMultithread, caller);
- // overwrite deployments left as modeflag::USE_AUTO
+ // overwrite deployments (left as modeflag::USE_AUTO=-1) with 0,1 (a bool),
+ // which crucially, resolves useDistrib, permitting its consultation below
autodep_chooseQuESTEnvDeployment(useDistrib, useGpuAccel, useMultithread);
+ // ensure that current state of MPI is valid
+ validate_mpiInitStatus(useDistrib, userOwnsMpi, caller);
+
// optionally initialise MPI; necessary before completing validation,
// and before any GPU initialisation and validation, since we will
// perform that specifically upon the MPI-process-bound GPU(s). Further,
// we can make sure validation errors are reported only by the root node.
if (useDistrib)
- comm_init();
+ comm_init(userOwnsMpi);
validate_newEnvDistributedBetweenPower2Nodes(caller);
@@ -124,6 +134,11 @@ void validateAndInitCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMulti
/// should we warn here if each machine contains
/// more GPUs than deployed MPI-processes (some GPUs idle)?
+ // validate the initial numTPB env-var (if specified) is valid
+ int initNumThreadsPerBlock = envvars_getDefaultNumGpuThreadsPerBlock();
+ validate_numGpuThreadsPerBlock(initNumThreadsPerBlock, useGpuAccel, caller);
+ gpu_setNumThreadsPerBlock(initNumThreadsPerBlock);
+
// cuQuantum is always used in GPU-accelerated envs when available
bool useCuQuantum = useGpuAccel && gpu_isCuQuantumCompiled();
if (useCuQuantum) {
@@ -131,26 +146,32 @@ void validateAndInitCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMulti
gpu_initCuQuantum();
}
+ // MPI GPU-awareness detection is platform specific; sometimes it is
+ // known at compile-time, other times according to env-vars
+ bool isMpiGpuAware = comm_isMpiGpuAware();
+
// initialise RNG, used by measurements and random-state generation
rand_setSeedsToDefault();
// allocate space for the global QuESTEnv singleton (overwriting nullptr, unless malloc fails)
- globalEnvPtr = (QuESTEnv*) malloc(sizeof(QuESTEnv));
+ global_envPtr = (QuESTEnv*) malloc(sizeof(QuESTEnv));
// pedantically check that teeny tiny malloc just succeeded
- if (globalEnvPtr == nullptr)
+ if (global_envPtr == nullptr)
error_allocOfQuESTEnvFailed();
- // bind deployment info to global instance
- globalEnvPtr->isMultithreaded = useMultithread;
- globalEnvPtr->isGpuAccelerated = useGpuAccel;
- globalEnvPtr->isDistributed = useDistrib;
- globalEnvPtr->isCuQuantumEnabled = useCuQuantum;
- globalEnvPtr->isGpuSharingEnabled = permitGpuSharing;
+ // bind deployment info to global instance (autocasting int to bool)
+ global_envPtr->isMultithreaded = useMultithread;
+ global_envPtr->isGpuAccelerated = useGpuAccel;
+ global_envPtr->isDistributed = useDistrib;
+ global_envPtr->isMpiUserOwned = userOwnsMpi;
+ global_envPtr->isMpiGpuAware = isMpiGpuAware;
+ global_envPtr->isCuQuantumEnabled = useCuQuantum;
+ global_envPtr->isGpuSharingEnabled = permitGpuSharing;
// bind distributed info
- globalEnvPtr->rank = (useDistrib)? comm_getRank() : 0;
- globalEnvPtr->numNodes = (useDistrib)? comm_getNumNodes() : 1;
+ global_envPtr->rank = (useDistrib)? comm_getRank() : 0;
+ global_envPtr->numNodes = (useDistrib)? comm_getNumNodes() : 1;
}
@@ -187,10 +208,12 @@ void printCompilationInfo() {
print_table(
"compilation", {
- {"isMpiCompiled", comm_isMpiCompiled()},
- {"isGpuCompiled", gpu_isGpuCompiled()},
- {"isOmpCompiled", cpu_isOpenmpCompiled()},
- {"isCuQuantumCompiled", gpu_isCuQuantumCompiled()},
+ {"isOmpCompiled", cpu_isOpenmpCompiled()},
+ {"isMpiCompiled", comm_isMpiCompiled()},
+ {"isMpiSubCommCompiled", comm_isMpiSubCommCompiled()},
+ {"isGpuCompiled", gpu_isGpuCompiled()},
+ {"isHipCompiled", gpu_isHipCompiled()},
+ {"isCuQuantumCompiled", gpu_isCuQuantumCompiled()},
});
}
@@ -199,11 +222,10 @@ void printDeploymentInfo() {
print_table(
"deployment", {
- {"isMpiEnabled", globalEnvPtr->isDistributed},
- {"isGpuEnabled", globalEnvPtr->isGpuAccelerated},
- {"isOmpEnabled", globalEnvPtr->isMultithreaded},
- {"isCuQuantumEnabled", globalEnvPtr->isCuQuantumEnabled},
- {"isGpuSharingEnabled", globalEnvPtr->isGpuSharingEnabled},
+ {"isOmpEnabled", global_envPtr->isMultithreaded},
+ {"isMpiEnabled", global_envPtr->isDistributed},
+ {"isGpuEnabled", global_envPtr->isGpuAccelerated},
+ {"isCuQuantumEnabled", global_envPtr->isCuQuantumEnabled},
});
}
@@ -252,6 +274,7 @@ void printGpuInfo() {
{"gpuMemory", isGpu? printer_getMemoryWithUnitStr(gpu_getTotalMemoryInBytes()) + pg : na},
{"gpuMemoryFree", isGpu? printer_getMemoryWithUnitStr(gpu_getCurrentAvailableMemoryInBytes()) + pg : na},
{"gpuCache", isGpu? printer_getMemoryWithUnitStr(gpu_getCacheMemoryInBytes()) + pg : na},
+ {"numThreadsPerBlock", isGpu? printer_toStr(gpu_getNumThreadsPerBlock()) : na},
});
}
@@ -260,10 +283,16 @@ void printDistributionInfo() {
using namespace printer_substrings;
+ bool comm = global_envPtr->isDistributed;
+ bool gpu = global_envPtr->isGpuAccelerated;
+ bool both = comm && gpu;
+
print_table(
"distribution", {
- {"isMpiGpuAware", (comm_isMpiCompiled())? printer_toStr(comm_isMpiGpuAware()) : na},
- {"numMpiNodes", printer_toStr(globalEnvPtr->numNodes)},
+ {"isMpiUserOwned", comm? printer_toStr(global_envPtr->isMpiUserOwned) : na},
+ {"isMpiGpuAware", comm? printer_toStr(global_envPtr->isMpiGpuAware ) : na},
+ {"isGpuSharingEnabled", both? printer_toStr(global_envPtr->isGpuSharingEnabled) : na},
+ {"numMpiNodes", printer_toStr(global_envPtr->numNodes)},
});
}
@@ -273,7 +302,7 @@ void printQuregSizeLimits(bool isDensMatr) {
using namespace printer_substrings;
// for brevity
- int numNodes = globalEnvPtr->numNodes;
+ int numNodes = global_envPtr->numNodes;
// by default, CPU limits are unknown (because memory query might fail)
string maxQbForCpu = un;
@@ -285,7 +314,7 @@ void printQuregSizeLimits(bool isDensMatr) {
maxQbForCpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, 1, cpuMem));
// and the max MPI sizes are only relevant when env is distributed
- if (globalEnvPtr->isDistributed)
+ if (global_envPtr->isDistributed)
maxQbForMpiCpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, numNodes, cpuMem));
// when MPI irrelevant, change their status from "unknown" to "N/A"
@@ -300,12 +329,12 @@ void printQuregSizeLimits(bool isDensMatr) {
string maxQbForMpiGpu = na;
// max GPU registers only relevant if env is GPU-accelerated
- if (globalEnvPtr->isGpuAccelerated) {
+ if (global_envPtr->isGpuAccelerated) {
qindex gpuMem = gpu_getCurrentAvailableMemoryInBytes();
maxQbForGpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, 1, gpuMem));
// and the max MPI sizes are further only relevant when env is distributed
- if (globalEnvPtr->isDistributed)
+ if (global_envPtr->isDistributed)
maxQbForMpiGpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, numNodes, gpuMem));
}
@@ -342,7 +371,7 @@ void printQuregAutoDeployments(bool isDensMatr) {
// test to theoretically max #qubits, surpassing max that can fit in RAM and GPUs, because
// auto-deploy will still try to deploy there to (then subsequent validation will fail)
- int maxQubits = mem_getMaxNumQuregQubitsBeforeGlobalMemSizeofOverflow(isDensMatr, globalEnvPtr->numNodes);
+ int maxQubits = mem_getMaxNumQuregQubitsBeforeGlobalMemSizeofOverflow(isDensMatr, global_envPtr->numNodes);
for (int numQubits=1; numQubitsisGpuAccelerated)
+ if (global_envPtr->isGpuAccelerated)
gpu_clearCache(); // syncs first
- if (globalEnvPtr->isGpuAccelerated && gpu_isCuQuantumCompiled())
+ if (global_envPtr->isGpuAccelerated && gpu_isCuQuantumCompiled())
gpu_finalizeCuQuantum();
- if (globalEnvPtr->isDistributed) {
+ if (global_envPtr->isDistributed) {
comm_sync();
comm_end();
}
// free global env's heap memory and flag it as unallocated
- free(globalEnvPtr);
- globalEnvPtr = nullptr;
+ free(global_envPtr);
+ global_envPtr = nullptr;
// flag that the environment was finalised, to ensure it is never re-initialised
- hasEnvBeenFinalized = true;
+ global_hasEnvBeenFinalized = true;
}
void syncQuESTEnv() {
validate_envIsInit(__func__);
- if (globalEnvPtr->isGpuAccelerated)
+ if (global_envPtr->isGpuAccelerated)
gpu_sync();
- if (globalEnvPtr->isDistributed)
+ if (global_envPtr->isDistributed)
comm_sync();
}
@@ -465,6 +496,8 @@ void reportQuESTEnv() {
/// @todo add function to write this output to file (useful for HPC debugging)
+ printer_sync();
+
print_label("QuEST execution environment");
bool statevec = false;
@@ -486,24 +519,25 @@ void reportQuESTEnv() {
// exclude mandatory newline above
print_oneFewerNewlines();
+
+ printer_sync();
}
-void getEnvironmentString(char str[200]) {
+void getQuESTEnvironmentString(char str[200]) {
validate_envIsInit(__func__);
- QuESTEnv env = getQuESTEnv();
-
int numThreads = cpu_isOpenmpCompiled()? cpu_getAvailableNumThreads() : 1;
- int cuQuantum = env.isGpuAccelerated && gpu_isCuQuantumCompiled();
- int gpuDirect = env.isGpuAccelerated && gpu_isDirectGpuCommPossible();
-
- snprintf(str, 200, "CUDA=%d OpenMP=%d MPI=%d threads=%d ranks=%d cuQuantum=%d gpuDirect=%d",
- env.isGpuAccelerated,
- env.isMultithreaded,
- env.isDistributed,
+ int cuQuantum = global_envPtr->isGpuAccelerated && gpu_isCuQuantumCompiled();
+ int gpuDirect = global_envPtr->isGpuAccelerated && gpu_isDirectGpuCommPossible();
+
+ snprintf(str, 200, "CUDA=%d OpenMP=%d MPI=%d userOwnsMPI=%d threads=%d ranks=%d cuQuantum=%d gpuDirect=%d",
+ global_envPtr->isGpuAccelerated,
+ global_envPtr->isMultithreaded,
+ global_envPtr->isDistributed,
+ global_envPtr->isMpiUserOwned,
numThreads,
- env.numNodes,
+ global_envPtr->numNodes,
cuQuantum,
gpuDirect);
}
diff --git a/quest/src/api/experimental.cpp b/quest/src/api/experimental.cpp
new file mode 100644
index 000000000..a6f883656
--- /dev/null
+++ b/quest/src/api/experimental.cpp
@@ -0,0 +1,107 @@
+/** @file
+ * Experimental functions which are liable to
+ * API breaks within QuEST minor version releases.
+ * Some optional functions require compiling this
+ * file against MPI, despite being outside of /comm/,
+ * and so require opt-in macros (QUEST_COMPILE_SUBCOMM)
+ *
+ * @author Oliver Brown
+ */
+
+#include "quest/include/config.h"
+#include "quest/include/environment.h"
+
+#include "quest/src/core/validation.hpp"
+#include "quest/src/comm/comm_config.hpp"
+#include "quest/src/gpu/gpu_config.hpp"
+
+#if QUEST_COMPILE_SUBCOMM && ! QUEST_COMPILE_MPI
+ #error "Macro QUEST_COMPILE_SUBCOMM was true, but QUEST_COMPILE_MPI was illegally false."
+#endif
+
+#if QUEST_COMPILE_SUBCOMM
+ #include
+#endif
+
+
+
+/*
+ * EXTERNAL FUNCTIONS
+ *
+ * which we here regretfully 'extern' because we are either
+ * unsure which header should expose them, or because they
+ * contain deployment-specific types (like MPI_Comm) which
+ * we do not wish to expose within internal headers
+ */
+
+
+extern void validateAndInitCustomQuESTEnv(
+ int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread, const char* caller);
+
+
+#if QUEST_COMPILE_SUBCOMM // hide MPI_Comm
+ extern bool comm_setMpiComm(MPI_Comm newComm, bool userOwnsMpi);
+#endif
+
+
+
+/*
+ * API FUNCTIONS
+ */
+
+
+// enable invocation by both C and C++ binaries
+extern "C" {
+
+
+void initCustomMpiQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread) {
+ validateAndInitCustomQuESTEnv(useDistrib, userOwnsMpi, useGpuAccel, useMultithread, __func__);
+}
+
+
+#if QUEST_COMPILE_SUBCOMM // hide MPI_Comm
+
+void initCustomMpiCommQuESTEnv(MPI_Comm userQuestComm, int useGpuAccel, int useMultithread) {
+
+ // useDistrib and userOwnsMpi are implied by the user of this initialiser
+ const int useDistrib = 1;
+ const bool userOwnsMpi = true;
+
+ // pre-validate that we are able to set the MPI communicator
+ validate_mpiInitStatus(useDistrib, userOwnsMpi, __func__);
+ validate_mpiSubCommIsNonNull(userQuestComm != MPI_COMM_NULL, __func__);
+
+ // avoid re-setting the MPI comm (to avoid an internal error), which happens
+ // if a user illegally re-calls this function, which will be subsequently
+ // caught by the validation in validateAndInitCustomQuESTEnv() below
+ if (!comm_isActive()) {
+ bool success = comm_setMpiComm(userQuestComm, userOwnsMpi);
+ validate_mpiSubCommSetSucceeded(success, __func__);
+ }
+
+ // perform remaining validation (some is harmlessly repeated) and init QuEST env
+ validateAndInitCustomQuESTEnv(useDistrib, userOwnsMpi, useGpuAccel, useMultithread, __func__);
+}
+#endif // QUEST_COMPILE_SUBCOMM
+
+
+int getQuESTNumGpuThreadsPerBlock() {
+ validate_envIsInit(__func__);
+
+ return gpu_getNumThreadsPerBlock();
+}
+
+
+void setQuESTNumGpuThreadsPerBlock(int numTPB) {
+ validate_envIsInit(__func__);
+
+ // validation messages and queries depend upon GPU usage
+ bool gpuIsActive = getQuESTEnv().isGpuAccelerated;
+ validate_numGpuThreadsPerBlock(numTPB, gpuIsActive, __func__);
+
+ gpu_setNumThreadsPerBlock(numTPB);
+}
+
+
+// end de-mangler
+}
diff --git a/quest/src/api/initialisations.cpp b/quest/src/api/initialisations.cpp
index aba838e0f..36f63910c 100644
--- a/quest/src/api/initialisations.cpp
+++ b/quest/src/api/initialisations.cpp
@@ -13,6 +13,7 @@
#include "quest/src/core/validation.hpp"
#include "quest/src/core/localiser.hpp"
+#include "quest/src/core/lists.hpp"
#include "quest/src/core/utilities.hpp"
#include "quest/src/core/bitwise.hpp"
#include "quest/src/gpu/gpu_config.hpp"
@@ -220,7 +221,7 @@ void setQuregToPartialTrace(Qureg out, Qureg in, int* traceOutQubits, int numTra
validate_targets(in, traceOutQubits, numTraceQubits, __func__);
validate_quregCanBeSetToReducedDensMatr(out, in, numTraceQubits, __func__);
- auto targets = util_getVector(traceOutQubits, numTraceQubits);
+ auto targets = lists_getList64(traceOutQubits, numTraceQubits);
localiser_densmatr_partialTrace(in, out, targets);
}
@@ -233,7 +234,7 @@ void setQuregToReducedDensityMatrix(Qureg out, Qureg in, int* retainQubits, int
validate_targets(in, retainQubits, numRetainQubits, __func__);
validate_quregCanBeSetToReducedDensMatr(out, in, in.numQubits - numRetainQubits, __func__);
- auto traceQubits = util_getNonTargetedQubits(retainQubits, numRetainQubits, in.numQubits);
+ auto traceQubits = util_getNonTargetedQubits(lists_getList64(retainQubits, numRetainQubits), in.numQubits);
localiser_densmatr_partialTrace(in, out, traceQubits);
}
diff --git a/quest/src/api/matrices.cpp b/quest/src/api/matrices.cpp
index b17987eb4..07e37025c 100644
--- a/quest/src/api/matrices.cpp
+++ b/quest/src/api/matrices.cpp
@@ -165,7 +165,7 @@ void freeAllMemoryIfAnyAllocsFailed(T matr) {
// ascertain whether any allocs failed on any node
bool anyFail = didAnyLocalAllocsFail(matr);
- if (comm_isInit())
+ if (comm_isActive())
anyFail = comm_isTrueOnAllNodes(anyFail);
// if so, free all heap fields
@@ -763,11 +763,16 @@ void validateAndPrintMatrix(T matr, const char* caller) {
structMem -= elemMem;
size_t numBytesPerNode = elemMem + structMem;
+
+ printer_sync();
+
print_header(matr, numBytesPerNode);
print_elems(matr);
// exclude mandatory newline above
print_oneFewerNewlines();
+
+ printer_sync();
}
diff --git a/quest/src/api/multiplication.cpp b/quest/src/api/multiplication.cpp
index 9761735a5..a4b72e6da 100644
--- a/quest/src/api/multiplication.cpp
+++ b/quest/src/api/multiplication.cpp
@@ -12,6 +12,7 @@
#include "quest/include/multiplication.h"
#include "quest/src/core/validation.hpp"
+#include "quest/src/core/lists.hpp"
#include "quest/src/core/utilities.hpp"
#include "quest/src/core/localiser.hpp"
#include "quest/src/core/paulilogic.hpp"
@@ -22,6 +23,14 @@ using std::vector;
+// The multiplication API doesn't accept control qubits
+// (which don't have much relevance to non-unitaries),
+// so passes ctrls={} to most internal functions; we
+// spare ourselves some keystrokes by this shortcut
+List64 none = lists_getEmptyList64();
+
+
+
/*
* CompMatr1
*/
@@ -35,7 +44,7 @@ void leftapplyCompMatr1(Qureg qureg, int target, CompMatr1 matrix) {
bool conj = false;
bool transp = false;
- localiser_statevec_anyCtrlOneTargDenseMatr(qureg, {}, {}, target, matrix, conj, transp);
+ localiser_statevec_anyCtrlOneTargDenseMatr(qureg, none, none, target, matrix, conj, transp);
}
void rightapplyCompMatr1(Qureg qureg, int target, CompMatr1 matrix) {
@@ -48,7 +57,7 @@ void rightapplyCompMatr1(Qureg qureg, int target, CompMatr1 matrix) {
bool conj = false;
bool transp = true;
int qubit = util_getBraQubit(target, qureg);
- localiser_statevec_anyCtrlOneTargDenseMatr(qureg, {}, {}, qubit, matrix, conj, transp);
+ localiser_statevec_anyCtrlOneTargDenseMatr(qureg, none, none, qubit, matrix, conj, transp);
}
} // end de-mangler
@@ -69,7 +78,7 @@ void leftapplyCompMatr2(Qureg qureg, int target1, int target2, CompMatr2 matrix)
bool conj = false;
bool transp = false;
- localiser_statevec_anyCtrlTwoTargDenseMatr(qureg, {}, {}, target1, target2, matrix, conj, transp);
+ localiser_statevec_anyCtrlTwoTargDenseMatr(qureg, none, none, target1, target2, matrix, conj, transp);
}
void rightapplyCompMatr2(Qureg qureg, int target1, int target2, CompMatr2 matrix) {
@@ -84,7 +93,7 @@ void rightapplyCompMatr2(Qureg qureg, int target1, int target2, CompMatr2 matrix
bool transp = true;
int qubit1 = util_getBraQubit(target1, qureg);
int qubit2 = util_getBraQubit(target2, qureg);
- localiser_statevec_anyCtrlTwoTargDenseMatr(qureg, {}, {}, qubit1, qubit2, matrix, conj, transp);
+ localiser_statevec_anyCtrlTwoTargDenseMatr(qureg, none, none, qubit1, qubit2, matrix, conj, transp);
}
} // end de-mangler
@@ -105,7 +114,7 @@ void leftapplyCompMatr(Qureg qureg, int* targets, int numTargets, CompMatr matri
bool conj = false;
bool transp = false;
- localiser_statevec_anyCtrlAnyTargDenseMatr(qureg, {}, {}, util_getVector(targets, numTargets), matrix, conj, transp);
+ localiser_statevec_anyCtrlAnyTargDenseMatr(qureg, none, none, lists_getList64(targets, numTargets), matrix, conj, transp);
}
void rightapplyCompMatr(Qureg qureg, int* targets, int numTargets, CompMatr matrix) {
@@ -118,8 +127,8 @@ void rightapplyCompMatr(Qureg qureg, int* targets, int numTargets, CompMatr matr
// rho matrix ~ transpose(rho) (x) I ||rho>>
bool conj = false;
bool transp = true;
- auto qubits = util_getBraQubits(util_getVector(targets, numTargets), qureg);
- localiser_statevec_anyCtrlAnyTargDenseMatr(qureg, {}, {}, qubits, matrix, conj, transp);
+ auto qubits = util_getBraQubits(lists_getList64(targets, numTargets), qureg);
+ localiser_statevec_anyCtrlAnyTargDenseMatr(qureg, none, none, qubits, matrix, conj, transp);
}
} // end de-mangler
@@ -148,7 +157,7 @@ void leftapplyDiagMatr1(Qureg qureg, int target, DiagMatr1 matrix) {
validate_matrixFields(matrix, __func__);
bool conj = false;
- localiser_statevec_anyCtrlOneTargDiagMatr(qureg, {}, {}, target, matrix, conj);
+ localiser_statevec_anyCtrlOneTargDiagMatr(qureg, none, none, target, matrix, conj);
}
void rightapplyDiagMatr1(Qureg qureg, int target, DiagMatr1 matrix) {
@@ -159,7 +168,7 @@ void rightapplyDiagMatr1(Qureg qureg, int target, DiagMatr1 matrix) {
bool conj = false;
int qubit = util_getBraQubit(target, qureg);
- localiser_statevec_anyCtrlOneTargDiagMatr(qureg, {}, {}, qubit, matrix, conj);
+ localiser_statevec_anyCtrlOneTargDiagMatr(qureg, none, none, qubit, matrix, conj);
}
} // end de-mangler
@@ -178,7 +187,7 @@ void leftapplyDiagMatr2(Qureg qureg, int target1, int target2, DiagMatr2 matrix)
validate_matrixFields(matrix, __func__);
bool conj = false;
- localiser_statevec_anyCtrlTwoTargDiagMatr(qureg, {}, {}, target1, target2, matrix, conj);
+ localiser_statevec_anyCtrlTwoTargDiagMatr(qureg, none, none, target1, target2, matrix, conj);
}
void rightapplyDiagMatr2(Qureg qureg, int target1, int target2, DiagMatr2 matrix) {
@@ -190,7 +199,7 @@ void rightapplyDiagMatr2(Qureg qureg, int target1, int target2, DiagMatr2 matrix
bool conj = false;
int qubit1 = util_getBraQubit(target1, qureg);
int qubit2 = util_getBraQubit(target2, qureg);
- localiser_statevec_anyCtrlTwoTargDiagMatr(qureg, {}, {}, qubit1, qubit2, matrix, conj);
+ localiser_statevec_anyCtrlTwoTargDiagMatr(qureg, none, none, qubit1, qubit2, matrix, conj);
}
} // end de-mangler
@@ -210,8 +219,8 @@ void leftapplyDiagMatr(Qureg qureg, int* targets, int numTargets, DiagMatr matri
bool conj = false;
qcomp exponent = 1;
- auto qubits = util_getVector(targets, numTargets);
- localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, {}, {}, qubits, matrix, exponent, conj);
+ auto qubits = lists_getList64(targets, numTargets);
+ localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, none, none, qubits, matrix, exponent, conj);
}
void rightapplyDiagMatr(Qureg qureg, int* targets, int numTargets, DiagMatr matrix) {
@@ -222,8 +231,8 @@ void rightapplyDiagMatr(Qureg qureg, int* targets, int numTargets, DiagMatr matr
bool conj = false;
qcomp exponent = 1;
- auto qubits = util_getBraQubits(util_getVector(targets, numTargets), qureg);
- localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, {}, {}, qubits, matrix, exponent, conj);
+ auto qubits = util_getBraQubits(lists_getList64(targets, numTargets), qureg);
+ localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, none, none, qubits, matrix, exponent, conj);
}
} // end de-mangler
@@ -253,8 +262,8 @@ void leftapplyDiagMatrPower(Qureg qureg, int* targets, int numTargets, DiagMatr
validate_matrixExpIsNonDiverging(matrix, exponent, __func__); // harmlessly re-validates fields and is-sync
bool conj = false;
- auto qubits = util_getVector(targets, numTargets);
- localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, {}, {}, qubits, matrix, exponent, conj);
+ auto qubits = lists_getList64(targets, numTargets);
+ localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, none, none, qubits, matrix, exponent, conj);
}
void rightapplyDiagMatrPower(Qureg qureg, int* targets, int numTargets, DiagMatr matrix, qcomp exponent) {
@@ -265,8 +274,8 @@ void rightapplyDiagMatrPower(Qureg qureg, int* targets, int numTargets, DiagMatr
validate_matrixExpIsNonDiverging(matrix, exponent, __func__); // harmlessly re-validates fields and is-sync
bool conj = false;
- auto qubits = util_getBraQubits(util_getVector(targets, numTargets), qureg);
- localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, {}, {}, qubits, matrix, exponent, conj);
+ auto qubits = util_getBraQubits(lists_getList64(targets, numTargets), qureg);
+ localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, none, none, qubits, matrix, exponent, conj);
}
} // end de-mangler
@@ -350,7 +359,7 @@ void leftapplySwap(Qureg qureg, int qubit1, int qubit2) {
validate_quregFields(qureg, __func__);
validate_twoTargets(qureg, qubit1, qubit2, __func__);
- localiser_statevec_anyCtrlSwap(qureg, {}, {}, qubit1, qubit2);
+ localiser_statevec_anyCtrlSwap(qureg, none, none, qubit1, qubit2);
}
void rightapplySwap(Qureg qureg, int qubit1, int qubit2) {
@@ -360,7 +369,7 @@ void rightapplySwap(Qureg qureg, int qubit1, int qubit2) {
qubit1 = util_getBraQubit(qubit1, qureg);
qubit2 = util_getBraQubit(qubit2, qureg);
- localiser_statevec_anyCtrlSwap(qureg, {}, {}, qubit1, qubit2);
+ localiser_statevec_anyCtrlSwap(qureg, none, none, qubit1, qubit2);
}
} // end de-mangler
@@ -378,7 +387,7 @@ void leftapplyPauliX(Qureg qureg, int target) {
validate_target(qureg, target, __func__);
PauliStr str = getPauliStr("X", {target});
- localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str);
+ localiser_statevec_anyCtrlPauliTensor(qureg, none, none, str);
}
void leftapplyPauliY(Qureg qureg, int target) {
@@ -386,7 +395,7 @@ void leftapplyPauliY(Qureg qureg, int target) {
validate_target(qureg, target, __func__);
PauliStr str = getPauliStr("Y", {target});
- localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str);
+ localiser_statevec_anyCtrlPauliTensor(qureg, none, none, str);
}
void leftapplyPauliZ(Qureg qureg, int target) {
@@ -394,7 +403,7 @@ void leftapplyPauliZ(Qureg qureg, int target) {
validate_target(qureg, target, __func__);
PauliStr str = getPauliStr("Z", {target});
- localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str);
+ localiser_statevec_anyCtrlPauliTensor(qureg, none, none, str);
}
void rightapplyPauliX(Qureg qureg, int target) {
@@ -404,7 +413,7 @@ void rightapplyPauliX(Qureg qureg, int target) {
PauliStr str = getPauliStr("X", {target});
str = paulis_getShiftedPauliStr(str, qureg.numQubits);
- localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str);
+ localiser_statevec_anyCtrlPauliTensor(qureg, none, none, str);
}
void rightapplyPauliY(Qureg qureg, int target) {
@@ -415,7 +424,7 @@ void rightapplyPauliY(Qureg qureg, int target) {
qcomp factor = -1; // undo transpose
PauliStr str = getPauliStr("Y", {target});
str = paulis_getShiftedPauliStr(str, qureg.numQubits);
- localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str, factor);
+ localiser_statevec_anyCtrlPauliTensor(qureg, none, none, str, factor);
}
void rightapplyPauliZ(Qureg qureg, int target) {
@@ -425,7 +434,7 @@ void rightapplyPauliZ(Qureg qureg, int target) {
PauliStr str = getPauliStr("Z", {target});
str = paulis_getShiftedPauliStr(str, qureg.numQubits);
- localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str);
+ localiser_statevec_anyCtrlPauliTensor(qureg, none, none, str);
}
} // end de-mangler
@@ -442,7 +451,7 @@ void leftapplyPauliStr(Qureg qureg, PauliStr str) {
validate_quregFields(qureg, __func__);
validate_pauliStrTargets(qureg, str, __func__);
- localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str);
+ localiser_statevec_anyCtrlPauliTensor(qureg, none, none, str);
}
void rightapplyPauliStr(Qureg qureg, PauliStr str) {
@@ -452,7 +461,7 @@ void rightapplyPauliStr(Qureg qureg, PauliStr str) {
qcomp factor = paulis_getSignOfPauliStrConj(str); // undo transpose
str = paulis_getShiftedPauliStr(str, qureg.numQubits);
- localiser_statevec_anyCtrlPauliTensor(qureg, {}, {}, str, factor);
+ localiser_statevec_anyCtrlPauliTensor(qureg, none, none, str, factor);
}
} // end de-mangler
@@ -470,7 +479,7 @@ void leftapplyPauliGadget(Qureg qureg, PauliStr str, qreal angle) {
validate_pauliStrTargets(qureg, str, __func__);
qreal phase = util_getPhaseFromGateAngle(angle);
- localiser_statevec_anyCtrlPauliGadget(qureg, {}, {}, str, phase);
+ localiser_statevec_anyCtrlPauliGadget(qureg, none, none, str, phase);
}
void rightapplyPauliGadget(Qureg qureg, PauliStr str, qreal angle) {
@@ -481,7 +490,7 @@ void rightapplyPauliGadget(Qureg qureg, PauliStr str, qreal angle) {
qreal factor = paulis_getSignOfPauliStrConj(str);
qreal phase = factor * util_getPhaseFromGateAngle(angle);
str = paulis_getShiftedPauliStr(str, qureg.numQubits);
- localiser_statevec_anyCtrlPauliGadget(qureg, {}, {}, str, phase);
+ localiser_statevec_anyCtrlPauliGadget(qureg, none, none, str, phase);
}
} // end de-mangler
@@ -499,8 +508,8 @@ void leftapplyPhaseGadget(Qureg qureg, int* targets, int numTargets, qreal angle
validate_targets(qureg, targets, numTargets, __func__);
qreal phase = util_getPhaseFromGateAngle(angle);
- auto qubits = util_getVector(targets, numTargets);
- localiser_statevec_anyCtrlPhaseGadget(qureg, {}, {}, qubits, phase);
+ auto qubits = lists_getList64(targets, numTargets);
+ localiser_statevec_anyCtrlPhaseGadget(qureg, none, none, qubits, phase);
}
void rightapplyPhaseGadget(Qureg qureg, int* targets, int numTargets, qreal angle) {
@@ -509,8 +518,8 @@ void rightapplyPhaseGadget(Qureg qureg, int* targets, int numTargets, qreal angl
validate_targets(qureg, targets, numTargets, __func__);
qreal phase = util_getPhaseFromGateAngle(angle);
- auto qubits = util_getBraQubits(util_getVector(targets, numTargets), qureg);
- localiser_statevec_anyCtrlPhaseGadget(qureg, {}, {}, qubits, phase);
+ auto qubits = util_getBraQubits(lists_getList64(targets, numTargets), qureg);
+ localiser_statevec_anyCtrlPhaseGadget(qureg, none, none, qubits, phase);
}
} // end de-mangler
@@ -578,7 +587,7 @@ void leftapplyQubitProjector(Qureg qureg, int qubit, int outcome) {
validate_measurementOutcomeIsValid(outcome, __func__);
qreal prob = 1;
- localiser_statevec_multiQubitProjector(qureg, {qubit}, {outcome}, prob);
+ localiser_statevec_multiQubitProjector(qureg, lists_getList64({qubit}), lists_getList64({outcome}), prob);
}
void leftapplyMultiQubitProjector(Qureg qureg, int* qubits, int* outcomes, int numQubits) {
@@ -587,8 +596,8 @@ void leftapplyMultiQubitProjector(Qureg qureg, int* qubits, int* outcomes, int n
validate_measurementOutcomesAreValid(outcomes, numQubits, __func__);
qreal prob = 1;
- auto qubitVec = util_getVector(qubits, numQubits);
- auto outcomeVec = util_getVector(outcomes, numQubits);
+ auto qubitVec = lists_getList64(qubits, numQubits);
+ auto outcomeVec = lists_getList64(outcomes, numQubits);
localiser_statevec_multiQubitProjector(qureg, qubitVec, outcomeVec, prob);
}
@@ -599,7 +608,8 @@ void rightapplyQubitProjector(Qureg qureg, int qubit, int outcome) {
validate_measurementOutcomeIsValid(outcome, __func__);
qreal prob = 1;
- localiser_statevec_multiQubitProjector(qureg, {util_getBraQubit(qubit,qureg)}, {outcome}, prob);
+ auto qubitList = lists_getList64({util_getBraQubit(qubit,qureg)});
+ localiser_statevec_multiQubitProjector(qureg, qubitList, lists_getList64({outcome}), prob);
}
void rightapplyMultiQubitProjector(Qureg qureg, int* qubits, int* outcomes, int numQubits) {
@@ -609,8 +619,8 @@ void rightapplyMultiQubitProjector(Qureg qureg, int* qubits, int* outcomes, int
validate_measurementOutcomesAreValid(outcomes, numQubits, __func__);
qreal prob = 1;
- auto qubitVec = util_getBraQubits(util_getVector(qubits, numQubits), qureg);
- auto outcomeVec = util_getVector(outcomes, numQubits);
+ auto qubitVec = util_getBraQubits(lists_getList64(qubits, numQubits), qureg);
+ auto outcomeVec = lists_getList64(outcomes, numQubits);
localiser_statevec_multiQubitProjector(qureg, qubitVec, outcomeVec, prob);
}
@@ -649,9 +659,9 @@ void leftapplyPauliStrSum(Qureg qureg, PauliStrSum sum, Qureg workspace) {
// left-multiply each term in-turn, mixing into output qureg, then undo using idempotency
for (qindex i=0; i qureg, and qureg -> sum * qureg
@@ -674,9 +684,9 @@ void rightapplyPauliStrSum(Qureg qureg, PauliStrSum sum, Qureg workspace) {
PauliStr str = paulis_getShiftedPauliStr(sum.strings[i], qureg.numQubits);
qcomp factor = paulis_getSignOfPauliStrConj(str); // undoes transpose
- localiser_statevec_anyCtrlPauliTensor(workspace, {}, {}, str, factor);
+ localiser_statevec_anyCtrlPauliTensor(workspace, none, none, str, factor);
localiser_statevec_setQuregToWeightedSum(qureg, {1, sum.coeffs[i]}, {qureg, workspace});
- localiser_statevec_anyCtrlPauliTensor(workspace, {}, {}, str, factor);
+ localiser_statevec_anyCtrlPauliTensor(workspace, none, none, str, factor);
}
// workspace -> qureg, and qureg -> sum * qureg
diff --git a/quest/src/api/operations.cpp b/quest/src/api/operations.cpp
index c15f9f1bd..15574b281 100644
--- a/quest/src/api/operations.cpp
+++ b/quest/src/api/operations.cpp
@@ -42,20 +42,20 @@ void validateAndApplyAnyCtrlAnyTargUnitaryMatrix(Qureg qureg, int* ctrls, int* s
if (util_isDenseMatrixType())
validate_mixedAmpsFitInNode(qureg, numTargs, caller);
- auto ctrlVec = util_getVector(ctrls, numCtrls);
- auto stateVec = util_getVector(states, numCtrls);
- auto targVec = util_getVector(targs, numTargs);
+ List64 ctrlList = lists_getList64(ctrls, numCtrls);
+ List64 stateList = util_getList64OrAllOnes(states, numCtrls);
+ List64 targList = lists_getList64(targs, numTargs);
bool conj = false;
- localiser_statevec_anyCtrlAnyTargAnyMatr(qureg, ctrlVec, stateVec, targVec, matr, conj);
+ localiser_statevec_anyCtrlAnyTargAnyMatr(qureg, ctrlList, stateList, targList, matr, conj);
if (!qureg.isDensityMatrix)
return;
conj = true;
- ctrlVec = util_getBraQubits(ctrlVec, qureg);
- targVec = util_getBraQubits(targVec, qureg);
- localiser_statevec_anyCtrlAnyTargAnyMatr(qureg, ctrlVec, stateVec, targVec, matr, conj);
+ ctrlList = util_getBraQubits(ctrlList, qureg);
+ targList = util_getBraQubits(targList, qureg);
+ localiser_statevec_anyCtrlAnyTargAnyMatr(qureg, ctrlList, stateList, targList, matr, conj);
/// @todo
/// the above logic always performs two in-turn operations upon density matrices,
@@ -144,7 +144,7 @@ void applyMultiControlledCompMatr2(Qureg qureg, vector controls, int target
applyMultiControlledCompMatr2(qureg, controls.data(), controls.size(), target1, target2, matr);
}
-void applyMultiStateControlledCompMatr2(Qureg qureg, vector controls, vector states, int numControls, int target1, int target2, CompMatr2 matr) {
+void applyMultiStateControlledCompMatr2(Qureg qureg, vector controls, vector states, int target1, int target2, CompMatr2 matr) {
validate_controlsMatchStates(controls.size(), states.size(), __func__);
applyMultiStateControlledCompMatr2(qureg, controls.data(), states.data(), controls.size(), target1, target2, matr);
@@ -410,18 +410,18 @@ void applyMultiStateControlledDiagMatrPower(Qureg qureg, int* controls, int* sta
// when numerical validation is disabled without a separate func.
bool conj = false;
- auto ctrlVec = util_getVector(controls, numControls);
- auto stateVec = util_getVector(states, numControls); // empty if states==nullptr
- auto targVec = util_getVector(targets, numTargets);
- localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, ctrlVec, stateVec, targVec, matrix, exponent, conj);
+ auto ctrlList = lists_getList64(controls, numControls);
+ auto stateList = util_getList64OrAllOnes(states, numControls);
+ auto targList = lists_getList64(targets, numTargets);
+ localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, ctrlList, stateList, targList, matrix, exponent, conj);
if (!qureg.isDensityMatrix)
return;
conj = true;
- ctrlVec = util_getBraQubits(ctrlVec, qureg);
- targVec = util_getBraQubits(targVec, qureg);
- localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, ctrlVec, stateVec, targVec, matrix, exponent, conj);
+ ctrlList = util_getBraQubits(ctrlList, qureg);
+ targList = util_getBraQubits(targList, qureg);
+ localiser_statevec_anyCtrlAnyTargDiagMatr(qureg, ctrlList, stateList, targList, matrix, exponent, conj);
}
} // end de-mangler
@@ -518,7 +518,7 @@ void applyMultiControlledS(Qureg qureg, int* controls, int numControls, int targ
void applyMultiStateControlledS(Qureg qureg, int* controls, int* states, int numControls, int target) {
- DiagMatr1 matr = getDiagMatr1({1, 1_i});
+ static const DiagMatr1 matr = getDiagMatr1({1, 1_i});
validateAndApplyAnyCtrlAnyTargUnitaryMatrix(qureg, controls, states, numControls, &target, 1, matr, __func__);
}
@@ -569,7 +569,7 @@ void applyMultiControlledT(Qureg qureg, int* controls, int numControls, int targ
void applyMultiStateControlledT(Qureg qureg, int* controls, int* states, int numControls, int target) {
- DiagMatr1 matr = getDiagMatr1({1, 1/std::sqrt(2) + 1_i/std::sqrt(2)});
+ static const DiagMatr1 matr = getDiagMatr1({1, (1 + 1_i)/std::sqrt(2)});
validateAndApplyAnyCtrlAnyTargUnitaryMatrix(qureg, controls, states, numControls, &target, 1, matr, __func__);
}
@@ -620,11 +620,11 @@ void applyMultiControlledHadamard(Qureg qureg, int* controls, int numControls, i
void applyMultiStateControlledHadamard(Qureg qureg, int* controls, int* states, int numControls, int target) {
- qcomp a = 1/std::sqrt(2);
- CompMatr1 matr = getCompMatr1({
- {a, a},
- {a,-a}});
-
+ static const qcomp a = 1 / std::sqrt(2);
+ static const CompMatr1 matr = getCompMatr1({
+ {a, a},
+ {a, -a}
+ });
validateAndApplyAnyCtrlAnyTargUnitaryMatrix(qureg, controls, states, numControls, &target, 1, matr, __func__);
}
@@ -678,17 +678,17 @@ void applyMultiStateControlledSwap(Qureg qureg, int* controls, int* states, int
validate_controlsAndTwoTargets(qureg, controls, numControls, qubit1, qubit2, __func__);
validate_controlStates(states, numControls, __func__); // permits states==nullptr
- auto ctrlVec = util_getVector(controls, numControls);
- auto stateVec = util_getVector(states, numControls); // empty if states==nullptr
- localiser_statevec_anyCtrlSwap(qureg, ctrlVec, stateVec, qubit1, qubit2);
+ auto ctrlList = lists_getList64(controls, numControls);
+ auto stateList = util_getList64OrAllOnes(states, numControls);
+ localiser_statevec_anyCtrlSwap(qureg, ctrlList, stateList, qubit1, qubit2);
if (!qureg.isDensityMatrix)
return;
- ctrlVec = util_getBraQubits(ctrlVec, qureg);
+ ctrlList = util_getBraQubits(ctrlList, qureg);
qubit1 = util_getBraQubit(qubit1, qureg);
qubit2 = util_getBraQubit(qubit2, qureg);
- localiser_statevec_anyCtrlSwap(qureg, ctrlVec, stateVec, qubit1, qubit2);
+ localiser_statevec_anyCtrlSwap(qureg, ctrlList, stateList, qubit1, qubit2);
}
} // end de-mangler
@@ -749,7 +749,7 @@ void applyMultiStateControlledSqrtSwap(Qureg qureg, int* controls, int* states,
validate_mixedAmpsFitInNode(qureg, 2, __func__); // to throw SqrtSwap error, not generic CompMatr2 error
- CompMatr2 matr = getCompMatr2({
+ static const CompMatr2 matr = getCompMatr2({
{1, 0, 0, 0},
{0, .5+.5_i, .5-.5_i, 0},
{0, .5-.5_i, .5+.5_i, 0},
@@ -869,7 +869,7 @@ void applyMultiStateControlledPauliX(Qureg qureg, int* controls, int* states, in
/// since it avoids all superfluous flops; check worthwhile for multi-qubit
// harmlessly re-validates, including hardcoded matrix unitarity
- CompMatr1 matrix = util_getPauliX();
+ static const CompMatr1 matrix = util_getPauliX();
validateAndApplyAnyCtrlAnyTargUnitaryMatrix(qureg, controls, states, numControls, &target, 1, matrix, __func__);
}
@@ -879,7 +879,7 @@ void applyMultiStateControlledPauliY(Qureg qureg, int* controls, int* states, in
validate_controlStates(states, numControls, __func__); // permits states==nullptr
// harmlessly re-validates, including hardcoded matrix unitarity
- CompMatr1 matrix = util_getPauliY();
+ static const CompMatr1 matrix = util_getPauliY();
validateAndApplyAnyCtrlAnyTargUnitaryMatrix(qureg, controls, states, numControls, &target, 1, matrix, __func__);
}
@@ -889,7 +889,7 @@ void applyMultiStateControlledPauliZ(Qureg qureg, int* controls, int* states, in
validate_controlStates(states, numControls, __func__); // permits states==nullptr
// harmlessly re-validates, including hardcoded matrix unitarity
- DiagMatr1 matrix = util_getPauliZ();
+ static const DiagMatr1 matrix = util_getPauliZ();
validateAndApplyAnyCtrlAnyTargUnitaryMatrix(qureg, controls, states, numControls, &target, 1, matrix, __func__);
}
@@ -966,27 +966,27 @@ void applyMultiStateControlledPauliStr(Qureg qureg, int* controls, int* states,
validate_controlStates(states, numControls, __func__); // permits states==nullptr
qcomp factor = 1;
- auto ctrlVec = util_getVector(controls, numControls);
- auto stateVec = util_getVector(states, numControls); // empty if states==nullptr
+ auto ctrlList = lists_getList64(controls, numControls);
+ auto stateList = util_getList64OrAllOnes(states, numControls);
// when there are no control qubits, we can merge the density matrix's
// operation sinto a single tensor, i.e. +- (shift(str) (x) str), to
// avoid superfluous re-enumeration of the state
if (qureg.isDensityMatrix && numControls == 0) {
factor = paulis_getSignOfPauliStrConj(str);
- ctrlVec = util_getConcatenated(ctrlVec, util_getBraQubits(ctrlVec, qureg));
- stateVec = util_getConcatenated(stateVec, stateVec);
+ ctrlList = util_getConcatenated(ctrlList, util_getBraQubits(ctrlList, qureg));
+ stateList = util_getConcatenated(stateList, stateList);
str = paulis_getKetAndBraPauliStr(str, qureg);
}
- localiser_statevec_anyCtrlPauliTensor(qureg, ctrlVec, stateVec, str, factor);
+ localiser_statevec_anyCtrlPauliTensor(qureg, ctrlList, stateList, str, factor);
// but density-matrix control qubits require two distinct operations
if (qureg.isDensityMatrix && numControls > 0) {
factor = paulis_getSignOfPauliStrConj(str);
- ctrlVec = util_getBraQubits(ctrlVec, qureg);
+ ctrlList = util_getBraQubits(ctrlList, qureg);
str = paulis_getShiftedPauliStr(str, qureg.numQubits);
- localiser_statevec_anyCtrlPauliTensor(qureg, ctrlVec, stateVec, str, factor);
+ localiser_statevec_anyCtrlPauliTensor(qureg, ctrlList, stateList, str, factor);
}
}
@@ -1250,7 +1250,8 @@ void applyNonUnitaryPauliGadget(Qureg qureg, PauliStr str, qcomp angle) {
validate_pauliStrTargets(qureg, str, __func__);
qcomp phase = util_getPhaseFromGateAngle(angle);
- localiser_statevec_anyCtrlPauliGadget(qureg, {}, {}, str, phase);
+ auto none = lists_getEmptyList64();
+ localiser_statevec_anyCtrlPauliGadget(qureg, none, none, str, phase);
if (!qureg.isDensityMatrix)
return;
@@ -1258,7 +1259,7 @@ void applyNonUnitaryPauliGadget(Qureg qureg, PauliStr str, qcomp angle) {
// conj(e^i(a)P) = e^(-i s conj(a) P)
phase = - std::conj(phase) * paulis_getSignOfPauliStrConj(str);
str = paulis_getShiftedPauliStr(str, qureg.numQubits);
- localiser_statevec_anyCtrlPauliGadget(qureg, {}, {}, str, phase);
+ localiser_statevec_anyCtrlPauliGadget(qureg, none, none, str, phase);
}
void applyControlledPauliGadget(Qureg qureg, int control, PauliStr str, qreal angle) {
@@ -1291,18 +1292,18 @@ void applyMultiStateControlledPauliGadget(Qureg qureg, int* controls, int* state
// which is sufficiently efficient using the existing gadget backend function
qreal phase = util_getPhaseFromGateAngle(angle);
- auto ctrlVec = util_getVector(controls, numControls);
- auto stateVec = util_getVector(states, numControls); // empty if states==nullptr
- localiser_statevec_anyCtrlPauliGadget(qureg, ctrlVec, stateVec, str, phase);
+ auto ctrlList = lists_getList64(controls, numControls);
+ auto stateList = util_getList64OrAllOnes(states, numControls);
+ localiser_statevec_anyCtrlPauliGadget(qureg, ctrlList, stateList, str, phase);
if (!qureg.isDensityMatrix)
return;
// conj(e^(i a P)) = e^(-i s a P)
phase *= - paulis_getSignOfPauliStrConj(str);
- ctrlVec = util_getBraQubits(ctrlVec, qureg);
+ ctrlList = util_getBraQubits(ctrlList, qureg);
str = paulis_getShiftedPauliStr(str, qureg.numQubits);
- localiser_statevec_anyCtrlPauliGadget(qureg, ctrlVec, stateVec, str, phase);
+ localiser_statevec_anyCtrlPauliGadget(qureg, ctrlList, stateList, str, phase);
}
} // end de-mangler
@@ -1356,18 +1357,18 @@ void applyMultiStateControlledPhaseGadget(Qureg qureg, int* controls, int* state
validate_controlStates(states, numControls, __func__);
qreal phase = util_getPhaseFromGateAngle(angle);
- auto ctrlVec = util_getVector(controls, numControls);
- auto targVec = util_getVector(targets, numTargets);
- auto stateVec = util_getVector(states, numControls); // empty if states==nullptr
- localiser_statevec_anyCtrlPhaseGadget(qureg, ctrlVec, stateVec, targVec, phase);
+ auto ctrlList = lists_getList64(controls, numControls);
+ auto stateList = util_getList64OrAllOnes(states, numControls);
+ auto targList = lists_getList64(targets, numTargets);
+ localiser_statevec_anyCtrlPhaseGadget(qureg, ctrlList, stateList, targList, phase);
if (!qureg.isDensityMatrix)
return;
phase *= -1;
- ctrlVec = util_getBraQubits(ctrlVec, qureg);
- targVec = util_getBraQubits(targVec, qureg);
- localiser_statevec_anyCtrlPhaseGadget(qureg, ctrlVec, stateVec, targVec, phase);
+ ctrlList = util_getBraQubits(ctrlList, qureg);
+ targList = util_getBraQubits(targList, qureg);
+ localiser_statevec_anyCtrlPhaseGadget(qureg, ctrlList, stateList, targList, phase);
}
} // end de-mangler
@@ -1423,7 +1424,8 @@ void applyMultiQubitPhaseShift(Qureg qureg, int* targets, int numTargets, qreal
validate_targets(qureg, targets, numTargets, __func__);
// treat as a (numTargets-1)-controlled 1-target diagonal matrix
- DiagMatr1 matr = getDiagMatr1({1, std::exp(1_i * angle)});
+ static DiagMatr1 matr = getDiagMatr1({1, /*un-init*/ 0});
+ matr.elems[1] = std::exp(1_i * angle); // micro-optimisation
// harmlessly re-validates
applyMultiStateControlledDiagMatr1(qureg, &targets[1], nullptr, numTargets-1, targets[0], matr);
@@ -1466,7 +1468,7 @@ void applyMultiQubitPhaseFlip(Qureg qureg, int* targets, int numTargets) {
validate_targets(qureg, targets, numTargets, __func__);
// treat as a (numTargets-1)-controlled 1-target Pauli Z
- DiagMatr1 matr = getDiagMatr1({1, -1});
+ static const DiagMatr1 matr = getDiagMatr1({1, -1});
// harmlessly re-validates
applyMultiStateControlledDiagMatr1(qureg, &targets[1], nullptr, numTargets-1, targets[0], matr);
@@ -1561,10 +1563,13 @@ void applyQubitProjector(Qureg qureg, int target, int outcome) {
qreal prob = 1;
+ auto targList = lists_getList64({target});
+ auto outcomeList = lists_getList64({outcome});
+
// density matrix has an optimised func in lieu of calling the statevector func twice
(qureg.isDensityMatrix)?
- localiser_densmatr_multiQubitProjector(qureg, {target}, {outcome}, prob):
- localiser_statevec_multiQubitProjector(qureg, {target}, {outcome}, prob);
+ localiser_densmatr_multiQubitProjector(qureg, targList, outcomeList, prob):
+ localiser_statevec_multiQubitProjector(qureg, targList, outcomeList, prob);
}
void applyMultiQubitProjector(Qureg qureg, int* qubits, int* outcomes, int numQubits) {
@@ -1573,13 +1578,13 @@ void applyMultiQubitProjector(Qureg qureg, int* qubits, int* outcomes, int numQu
validate_measurementOutcomesAreValid(outcomes, numQubits, __func__);
qreal prob = 1;
- auto qubitVec = util_getVector(qubits, numQubits);
- auto outcomeVec = util_getVector(outcomes, numQubits);
+ auto qubitList = lists_getList64(qubits, numQubits);
+ auto outcomeList = lists_getList64(outcomes, numQubits);
// density matrix has an optimised func in lieu of calling the statevector func twice
(qureg.isDensityMatrix)?
- localiser_densmatr_multiQubitProjector(qureg, qubitVec, outcomeVec, prob):
- localiser_statevec_multiQubitProjector(qureg, qubitVec, outcomeVec, prob);
+ localiser_densmatr_multiQubitProjector(qureg, qubitList, outcomeList, prob):
+ localiser_statevec_multiQubitProjector(qureg, qubitList, outcomeList, prob);
}
} // end de-mangler
@@ -1623,10 +1628,13 @@ int applyQubitMeasurementAndGetProb(Qureg qureg, int target, qreal* probability)
int outcome = rand_getRandomSingleQubitOutcome(probs[0]);
*probability = probs[outcome];
+ auto targList = lists_getList64({target});
+ auto outcomeList = lists_getList64({outcome});
+
// collapse to the outcome
(qureg.isDensityMatrix)?
- localiser_densmatr_multiQubitProjector(qureg, {target}, {outcome}, *probability):
- localiser_statevec_multiQubitProjector(qureg, {target}, {outcome}, *probability);
+ localiser_densmatr_multiQubitProjector(qureg, targList, outcomeList, *probability):
+ localiser_statevec_multiQubitProjector(qureg, targList, outcomeList, *probability);
return outcome;
}
@@ -1642,10 +1650,13 @@ qreal applyForcedQubitMeasurement(Qureg qureg, int target, int outcome) {
qreal prob = calcProbOfQubitOutcome(qureg, target, outcome); // harmlessly re-validates
validate_measurementOutcomeProbNotZero(outcome, prob, __func__);
+ auto targList = lists_getList64({target});
+ auto outcomeList = lists_getList64({outcome});
+
// project to the outcome, renormalising the surviving states
(qureg.isDensityMatrix)?
- localiser_densmatr_multiQubitProjector(qureg, {target}, {outcome}, prob):
- localiser_statevec_multiQubitProjector(qureg, {target}, {outcome}, prob);
+ localiser_densmatr_multiQubitProjector(qureg, targList, outcomeList, prob):
+ localiser_statevec_multiQubitProjector(qureg, targList, outcomeList, prob);
return prob;
}
@@ -1683,14 +1694,14 @@ qindex applyMultiQubitMeasurementAndGetProb(Qureg qureg, int* qubits, int numQub
*probability = probs[outcome];
// map outcome to individual qubit outcomes
- auto qubitVec = util_getVector(qubits, numQubits);
- auto outcomeVec = vector(numQubits);
- getBitsFromInteger(outcomeVec.data(), outcome, numQubits);
+ auto qubitList = lists_getList64(qubits, numQubits);
+ auto outcomeList = util_getConstantList(-1, numQubits);
+ setToBitsOfInteger(outcomeList.data(), outcome, numQubits);
// project to the outcomes, renormalising the surviving states
(qureg.isDensityMatrix)?
- localiser_densmatr_multiQubitProjector(qureg, qubitVec, outcomeVec, *probability):
- localiser_statevec_multiQubitProjector(qureg, qubitVec, outcomeVec, *probability);
+ localiser_densmatr_multiQubitProjector(qureg, qubitList, outcomeList, *probability):
+ localiser_statevec_multiQubitProjector(qureg, qubitList, outcomeList, *probability);
return outcome;
}
@@ -1700,8 +1711,8 @@ qreal applyForcedMultiQubitMeasurement(Qureg qureg, int* qubits, int* outcomes,
validate_targets(qureg, qubits, numQubits, __func__);
validate_measurementOutcomesAreValid(outcomes, numQubits, __func__);
- auto qubitVec = util_getVector(qubits, numQubits);
- auto outcomeVec = util_getVector(outcomes, numQubits);
+ auto qubitList = lists_getList64(qubits, numQubits);
+ auto outcomeList = lists_getList64(outcomes, numQubits);
// ensure probability of the forced measurement outcome is not negligible
qreal prob = calcProbOfMultiQubitOutcome(qureg, qubits, outcomes, numQubits); // harmlessly re-validates
@@ -1709,8 +1720,8 @@ qreal applyForcedMultiQubitMeasurement(Qureg qureg, int* qubits, int* outcomes,
// project to the outcome, renormalising the surviving states
(qureg.isDensityMatrix)?
- localiser_densmatr_multiQubitProjector(qureg, qubitVec, outcomeVec, prob):
- localiser_statevec_multiQubitProjector(qureg, qubitVec, outcomeVec, prob);
+ localiser_densmatr_multiQubitProjector(qureg, qubitList, outcomeList, prob):
+ localiser_statevec_multiQubitProjector(qureg, qubitList, outcomeList, prob);
return prob;
}
@@ -1782,11 +1793,7 @@ void applyQuantumFourierTransform(Qureg qureg, int* targets, int numTargets, boo
void applyFullQuantumFourierTransform(Qureg qureg, bool inverse) {
validate_quregFields(qureg, __func__);
- // tiny; no need to validate alloc
- vector targets(qureg.numQubits);
- for (size_t i=0; i& ketCtrls, vector& braCtrls,
- vector& states, PauliStrSum sum, vector& sumOrdering,
+ Qureg qureg, ConstList64 ketCtrls, ConstList64 braCtrls,
+ ConstList64 states, PauliStrSum sum, vector& sumOrdering,
qcomp angle, bool onlyLeftApply, bool reverse
) {
// apply each sum term as a gadget, in forward or reverse order
@@ -62,8 +63,8 @@ void internal_applyFirstOrderTrotterRepetition(
}
void internal_applyHigherOrderTrotterRepetition(
- Qureg qureg, vector& ketCtrls, vector& braCtrls,
- vector& states, PauliStrSum sum, vector& sumOrdering,
+ Qureg qureg, ConstList64 ketCtrls, ConstList64 braCtrls,
+ ConstList64 states, PauliStrSum sum, vector& sumOrdering,
qcomp angle, int order, bool onlyLeftApply
) {
if (order == 1) {
@@ -107,9 +108,9 @@ void internal_applyAllTrotterRepetitions(
}
// prepare control-qubit lists once for all invoked gadgets below
- auto ketCtrlsVec = util_getVector(controls, numControls);
- auto braCtrlsVec = (qureg.isDensityMatrix)? util_getBraQubits(ketCtrlsVec, qureg) : vector{};
- auto statesVec = util_getVector(states, numControls);
+ auto ketCtrlsList = lists_getList64(controls, numControls);
+ auto braCtrlsList = (qureg.isDensityMatrix)? util_getBraQubits(ketCtrlsList, qureg) : lists_getEmptyList64();
+ auto statesList = lists_getList64(states, numControls * (states != nullptr));
qcomp arg = angle / reps;
@@ -120,7 +121,7 @@ void internal_applyAllTrotterRepetitions(
rand_setListToShuffled(sumOrdering);
internal_applyHigherOrderTrotterRepetition(
- qureg, ketCtrlsVec, braCtrlsVec, statesVec, sum, sumOrdering, arg, order, onlyLeftApply);
+ qureg, ketCtrlsList, braCtrlsList, statesList, sum, sumOrdering, arg, order, onlyLeftApply);
}
}
@@ -167,7 +168,7 @@ void applyTrotterizedNonUnitaryPauliStrSumGadget(Qureg qureg, PauliStrSum sum, q
validate_quregFields(qureg, __func__);
validate_pauliStrSumFields(sum, __func__);
validate_pauliStrSumTargets(sum, qureg, __func__);
- validate_trotterParams(qureg, order, reps, __func__);
+ validate_trotterParams(order, reps, __func__);
// sum is permitted to be non-Hermitian
// |psi> -> U |psi>, rho -> U rho U^dagger
@@ -180,7 +181,7 @@ void applyTrotterizedPauliStrSumGadget(Qureg qureg, PauliStrSum sum, qreal angle
validate_pauliStrSumFields(sum, __func__);
validate_pauliStrSumTargets(sum, qureg, __func__);
validate_pauliStrSumIsHermitian(sum, __func__);
- validate_trotterParams(qureg, order, reps, __func__);
+ validate_trotterParams(order, reps, __func__);
bool onlyLeftApply = false;
internal_applyAllTrotterRepetitions(qureg, nullptr, nullptr, 0, sum, angle, order, reps, onlyLeftApply, permuteTerms, __func__);
@@ -194,7 +195,7 @@ void applyTrotterizedControlledPauliStrSumGadget(
validate_pauliStrSumFields(sum, __func__);
validate_pauliStrSumIsHermitian(sum, __func__);
validate_controlAndPauliStrSumTargets(qureg, control, sum, __func__);
- validate_trotterParams(qureg, order, reps, __func__);
+ validate_trotterParams(order, reps, __func__);
bool onlyLeftApply = false;
internal_applyAllTrotterRepetitions(qureg, &control, nullptr, 1, sum, angle, order, reps, onlyLeftApply, permuteTerms, __func__);
@@ -208,7 +209,7 @@ void applyTrotterizedMultiControlledPauliStrSumGadget(
validate_pauliStrSumFields(sum, __func__);
validate_pauliStrSumIsHermitian(sum, __func__);
validate_controlsAndPauliStrSumTargets(qureg, controls, numControls, sum, __func__);
- validate_trotterParams(qureg, order, reps, __func__);
+ validate_trotterParams(order, reps, __func__);
bool onlyLeftApply = false;
internal_applyAllTrotterRepetitions(qureg, controls, nullptr, numControls, sum, angle, order, reps, onlyLeftApply, permuteTerms, __func__);
@@ -223,7 +224,7 @@ void applyTrotterizedMultiStateControlledPauliStrSumGadget(
validate_pauliStrSumIsHermitian(sum, __func__);
validate_controlsAndPauliStrSumTargets(qureg, controls, numControls, sum, __func__);
validate_controlStates(states, numControls, __func__); // permits states==nullptr
- validate_trotterParams(qureg, order, reps, __func__);
+ validate_trotterParams(order, reps, __func__);
bool onlyLeftApply = false;
internal_applyAllTrotterRepetitions(qureg, controls, states, numControls, sum, angle, order, reps, onlyLeftApply, permuteTerms, __func__);
@@ -260,7 +261,7 @@ void applyTrotterizedUnitaryTimeEvolution(Qureg qureg, PauliStrSum hamil, qreal
validate_pauliStrSumFields(hamil, __func__);
validate_pauliStrSumTargets(hamil, qureg, __func__);
validate_pauliStrSumIsHermitian(hamil, __func__);
- validate_trotterParams(qureg, order, reps, __func__);
+ validate_trotterParams(order, reps, __func__);
// exp(-i t H) = exp(x i H) | x=-t
qcomp angle = - time;
@@ -273,7 +274,7 @@ void applyTrotterizedImaginaryTimeEvolution(Qureg qureg, PauliStrSum hamil, qrea
validate_pauliStrSumFields(hamil, __func__);
validate_pauliStrSumTargets(hamil, qureg, __func__);
validate_pauliStrSumIsHermitian(hamil, __func__);
- validate_trotterParams(qureg, order, reps, __func__);
+ validate_trotterParams(order, reps, __func__);
// exp(-tau H) = exp(x i H) | x=tau*i
qcomp angle = qcomp(0, tau);
@@ -300,7 +301,7 @@ void applyTrotterizedNoisyTimeEvolution(
validate_pauliStrSumFields(hamil, __func__);
validate_pauliStrSumTargets(hamil, qureg, __func__);
validate_pauliStrSumIsHermitian(hamil, __func__);
- validate_trotterParams(qureg, order, reps, __func__);
+ validate_trotterParams(order, reps, __func__);
validate_lindbladJumpOps(jumps, numJumps, qureg, __func__);
validate_lindbladDampingRates(damps, numJumps, __func__);
diff --git a/quest/src/api/types.cpp b/quest/src/api/types.cpp
index 4fadf1cb5..cead74301 100644
--- a/quest/src/api/types.cpp
+++ b/quest/src/api/types.cpp
@@ -22,8 +22,12 @@ using std::string;
void reportStr(std::string str) {
validate_envIsInit(__func__);
+ printer_sync();
+
print(str);
print_newlines();
+
+ printer_sync();
}
extern "C" void reportStr(const char* str) {
diff --git a/quest/src/comm/comm_config.cpp b/quest/src/comm/comm_config.cpp
index 854a12bd5..4b76ca71e 100644
--- a/quest/src/comm/comm_config.cpp
+++ b/quest/src/comm/comm_config.cpp
@@ -4,10 +4,13 @@
* implementation (like OpenMPI vs MPICH). These functions
* are callable even when MPI has not been compiled/linked.
*
- * Note that even when COMPILE_MPI=1, the user may have
+ * Note that even when QUEST_COMPILE_MPI=1, the user may have
* disabled distribution when creating the QuEST environment
- * at runtime. Ergo we use comm_isInit() to determine whether
- * functions should invoke the MPI API.
+ * at runtime - even despite they themselves initialising and
+ * using MPI. So we must be careful about consulting MPI status!
+ * Furthermore, all routines here will only ever consult/affect
+ * the QuEST communicator, never the entire MPI environment,
+ * the latter of which may contain non-participating processes.
*
* @author Tyson Jones
*/
@@ -18,7 +21,9 @@
#include "quest/src/comm/comm_config.hpp"
#include "quest/src/core/errors.hpp"
-#if COMPILE_MPI
+#include
+
+#if QUEST_COMPILE_MPI
#include
#endif
@@ -28,7 +33,8 @@
* WARN ABOUT CUDA-AWARENESS
*/
-#if COMPILE_MPI && COMPILE_CUDA
+
+#if QUEST_COMPILE_MPI && QUEST_COMPILE_CUDA
// this check is OpenMPI specific
#ifdef OPEN_MPI
@@ -50,22 +56,115 @@
+/*
+ * COMMUNICATOR MANAGEMENT
+ *
+ * QuEST will only ever use the overridable global_mpiComm communicator,
+ * so that superusers can dedicate external MPI processes to other tasks.
+ * Beware that it's valid for QuEST to be compiled with MPI, but have
+ * distribution runtime-disabled, while the user is themselves using
+ * (and ergo have initialised) MPI. In that scenario, we must not touch
+ * MPI, hence why comm_isActive() below is distinct from comm_isMpiInit().
+ */
+
+
+// We must record whether the user owns MPI, so that we do not ever attempt
+// to kill it when gracefully exiting, or due to a validation error
+static bool global_isMpiUserOwned = false;
+
+
+// Guarded since MPI_Comm cannot be exposed when not compiling MPI. This
+// communicator is overridden from NULL either BEFORE or DURING comm_init()
+#if QUEST_COMPILE_MPI
+ static MPI_Comm global_mpiComm = MPI_COMM_NULL;
+#endif
+
+
+bool comm_isActive() {
+#if QUEST_COMPILE_MPI
+
+ // comm_init(), or potentially comm_setMpiComm() before it, will only
+ // ever override mpiComm with non-NULL, indicating active comm. Note
+ // it's principally for mpiComm to later return to NULL, via comm_end(),
+ // and for QuEST execution to continue (though not supported presently).
+ // if comm_isActive() is true, then it is guaranteed MPI is initialised
+ return global_mpiComm != MPI_COMM_NULL;
+
+ // note it is legal for QuEST distribution to be disabled (and ergo
+ // mpiComm never initialised) even when the user is themselves accessing
+ // MPI, hence this function is semantically distinct from comm_isMpiInit()
+#else
+
+ // QuEST communication is obviously never active if
+ // not even MPI is compiled; though this does not
+ // imply at all the user isn't themselves using MPI!
+ return false;
+
+#endif
+}
+
+
+// Hide MPI_Comm from signatures when MPI is not compiled. Beware that
+// these are not exposed in comm_config.hpp; callers must 'extern' them!
+#if QUEST_COMPILE_MPI
+
+
+MPI_Comm comm_getMpiComm() {
+
+ // illegal to call before communicator has been overridden
+ if (global_mpiComm == MPI_COMM_NULL)
+ error_commMpiCommIsNull();
+
+ return global_mpiComm;
+}
+
+
+bool comm_setMpiComm(MPI_Comm newComm, bool userOwnsMpi) {
+
+ // illegal to re-set, or set to null
+ if (global_mpiComm != MPI_COMM_NULL)
+ error_commAlreadyHasSetMpiComm();
+ if (newComm == MPI_COMM_NULL)
+ error_commNewMpiCommIsNull();
+
+ // detect bad communicator, and inform validation
+ auto status = MPI_Comm_dup(newComm, &global_mpiComm);
+ if (status != MPI_SUCCESS)
+ return false;
+
+ // record ownership as soon as QuEST communication becomes active, so
+ // validation errors during env initialisation never kill user-owned MPI
+ global_isMpiUserOwned = userOwnsMpi;
+ return true;
+}
+
+
+#endif // QUEST_COMPILE_MPI
+
+
+
/*
* MPI ENVIRONMENT MANAGEMENT
- * all of which is safely callable in non-distributed mode
+ *
+ * which queries MPI itself (as may be user-activated), rather
+ * than QuEST's (possibly more limited) MPI environment
*/
bool comm_isMpiCompiled() {
- return (bool) COMPILE_MPI;
+ return (bool) QUEST_COMPILE_MPI;
+}
+
+bool comm_isMpiSubCommCompiled() {
+ return (bool) QUEST_COMPILE_SUBCOMM;
}
bool comm_isMpiGpuAware() {
- /// @todo these checks may be OpenMPI specific, so that
- /// non-OpenMPI MPI compilers are always dismissed as
- /// not being CUDA-aware. Check e.g. MPICH method!
+ // well duh
+ if (!comm_isMpiCompiled())
+ return false;
// definitely not GPU-aware if compiler declares it is not
#if defined(MPIX_CUDA_AWARE_SUPPORT) && ! MPIX_CUDA_AWARE_SUPPORT
@@ -77,71 +176,135 @@ bool comm_isMpiGpuAware() {
return (bool) MPIX_Query_cuda_support();
#endif
+ // check whether an MPICH env-var indicates support (we assume it never lies!)
+ static const auto var = std::getenv("MPICH_GPU_SUPPORT_ENABLED");
+ if (var && std::string(var) == "1") // ill-formed vars = 0
+ return true;
+
// if we can't ascertain CUDA-awareness, just assume no to avoid seg-fault
return false;
}
-bool comm_isInit() {
-#if COMPILE_MPI
+bool comm_isMpiInit() {
+#if QUEST_COMPILE_MPI
// safely callable before MPI initialisation, but NOT after comm_end()
int isInit;
MPI_Initialized(&isInit);
+
+ // when MPI is not initialised, it is guaranteed that QuEST's communicator
+ // is inactive, which we double check here so callers can be absolutely sure
+ if (!isInit && comm_isActive())
+ error_commActiveButMpiNotInit();
+
return (bool) isInit;
#else
// obviously MPI is never initialised if not even compiled
return false;
+
#endif
}
-void comm_init() {
-#if COMPILE_MPI
+bool comm_isMpiUserOwned() {
+
+ // this isn't presently used by the code base; I'm just naughtily silencing
+ // "unused var" warning when compiling without MPI :^)
+ return global_isMpiUserOwned;
+}
- // error if attempting re-initialisation
- if (comm_isInit())
+
+
+/*
+ * QUEST COMMUNICATION MANAGEMENT
+ *
+ * which interacts only with QuEST's MPI environment,
+ * which may be smaller than the user-controlled MPI env
+ */
+
+
+void comm_init(bool userOwnsMpi) {
+#if QUEST_COMPILE_MPI
+
+ // re-assert prior user-validations for clarity
+ if (userOwnsMpi && !comm_isMpiInit())
+ error_commNotInit();
+ if (!userOwnsMpi && comm_isMpiInit())
error_commAlreadyInit();
-
- MPI_Init(NULL, NULL);
+
+ // init MPI only when it's not the user's responsibility
+ if (!userOwnsMpi)
+ MPI_Init(NULL, NULL);
+
+ // choose communicator only when the user hasn't already
+ // (via comm_setMpiComm, during custom env initialisation)
+ if (global_mpiComm == MPI_COMM_NULL)
+ comm_setMpiComm(MPI_COMM_WORLD, userOwnsMpi);
#endif
}
void comm_end() {
-#if COMPILE_MPI
-
- // gracefully permit comm_end() before comm_init(), as input validation can trigger
- if (!comm_isInit())
+#if QUEST_COMPILE_MPI
+
+ // If QuEST isn't using distribution, regardless of whether the user is using MPI,
+ // then we gracefully exit. We do NOT attempt to end MPI on the user's behalf (as we
+ // may be tempted to do during validation failure to avoid their MPI-crash), because
+ // it's possible/legal that not all processes are participating in this comm_end()
+ // call, in which case so MPI_Finalize() could just cause a hang.
+ if (!comm_isActive())
return;
- MPI_Barrier(MPI_COMM_WORLD);
- MPI_Finalize();
+ // Syncing is not strictly necessary, but it ensures that finalizeQuESTEnv() never
+ // completes on one process while another process is still performing simulation
+ // (though that'd be weird), and so may avoid a silly user benchmarking pitfall
+ MPI_Barrier(global_mpiComm);
+ MPI_Comm_free(&global_mpiComm);
+
+ // Do NOT close MPI if the user owns; they may still wish to use it after QuEST!
+ if (!global_isMpiUserOwned)
+ MPI_Finalize();
+
+ // Presently, comm_end() is only ever called during QuESTEnv destruction (either
+ // deliberately, or because of failed validation during QuESTEnv initialisation).
+ // This means any comm_*() call hereafter is invalid/illegal and will be prevented
+ // by validation. However, we can imagine a future where distribution gets runtime
+ // disabled while QuEST execution continues (e.g. initQuESTEnv automatically
+ // disabled distribution), and so we must indicate that communication is no longer
+ // active by overwriting comm to NULL. BEWARE that this is "hacky"; we have
+ // updated mpiComm here without MPI_Comm_dup(), but that's fine, because hereafter
+ // MPI will never be used again (illegal to re-init both MPI, and QuEST!)
+ global_mpiComm = MPI_COMM_NULL;
+ global_isMpiUserOwned = false;
#endif
}
int comm_getRank() {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
// if distribution was not runtime enabled (or a validation error was
- // triggered), every node (if many MPI processes were launched)
- // believes it is the root rank
- if (!comm_isInit())
+ // triggered during distributed initialisation), every process believes
+ // it is the root rank; this may lead to unavoidable error msg spam!
+ if (!comm_isActive())
return ROOT_RANK;
+ // obtain the process rank within the QuEST communicator, which can
+ // differ from the global MPI process rank when users own MPI
int rank;
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Comm_rank(global_mpiComm, &rank);
return rank;
#else
// if MPI isn't compiled, we're definitely non-distributed; return main rank
return ROOT_RANK;
+
#endif
}
@@ -155,33 +318,42 @@ bool comm_isRootNode() {
int comm_getNumNodes() {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
// if distribution was not runtime enabled (or a validation error was
- // triggered), every node (if many MPI processes were launched)
- // believes it is the one and only node
- if (!comm_isInit())
+ // triggered during distributed initialisation), every process is told
+ // it is the one and only node; this may lead to error msg spam, but
+ // appears unavoidable!
+ if (!comm_isActive())
return 1;
+ // obtain the number of processes within the QuEST communicator, which
+ // can be smaller than global MPI process count when users own MPI
int numNodes;
- MPI_Comm_size(MPI_COMM_WORLD, &numNodes);
+ MPI_Comm_size(global_mpiComm, &numNodes);
return numNodes;
#else
- // if MPI isn't compiled, we're definitely non-distributed; return single node
+ // if MPI isn't compiled, QuEST is definitely non-distributed and
+ // each process only knows itself (though users may own MPI and
+ // actually have many processes; that's none of our business!)
return 1;
+
#endif
}
void comm_sync() {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
- // gracefully handle when not distributed, needed by e.g. pre-MPI-setup validation
- if (!comm_isInit())
+ // gracefully handle when not distributed, needed by e.g. pre-MPI-setup validation
+ if (!comm_isActive())
return;
- MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Barrier(global_mpiComm);
+
#endif
+
+ // do nothing at all when MPI is not compiled (user owned MPI processes go unsynced)
}
diff --git a/quest/src/comm/comm_config.hpp b/quest/src/comm/comm_config.hpp
index 444d1dbf0..cc009ab9a 100644
--- a/quest/src/comm/comm_config.hpp
+++ b/quest/src/comm/comm_config.hpp
@@ -10,22 +10,29 @@
#ifndef COMM_CONFIG_HPP
#define COMM_CONFIG_HPP
-
constexpr int ROOT_RANK = 0;
+// queries of MPI's global/general status (when visible)
bool comm_isMpiCompiled();
+bool comm_isMpiSubCommCompiled();
bool comm_isMpiGpuAware();
+bool comm_isMpiInit();
+bool comm_isMpiUserOwned();
-void comm_init();
+// control of QuEST's (possibly more limited) MPI env
+bool comm_isActive();
+void comm_init(bool userOwnsMpi);
void comm_end();
void comm_sync();
+// queries of QuEST's (possibly more limited) MPI env
int comm_getRank();
int comm_getNumNodes();
-
-bool comm_isInit();
bool comm_isRootNode();
bool comm_isRootNode(int rank);
+// Signatures containing MPI types which callers must extern:
+// extern MPI_Comm comm_getMpiComm()
+// extern bool comm_setMpiComm(MPI_Comm newComm, bool userOwnsMpi)
-#endif // COMM_CONFIG_HPP
\ No newline at end of file
+#endif // COMM_CONFIG_HPP
diff --git a/quest/src/comm/comm_routines.cpp b/quest/src/comm/comm_routines.cpp
index 19ebcb9f8..cf6956454 100644
--- a/quest/src/comm/comm_routines.cpp
+++ b/quest/src/comm/comm_routines.cpp
@@ -1,12 +1,12 @@
/** @file
* Functions for communicating and exchanging amplitudes between compute
* nodes, when running in distributed mode, using the C MPI standard.
- * Calling these functions when COMPILE_MPI=0, or when the passed Quregs
+ * Calling these functions when QUEST_COMPILE_MPI=0, or when the passed Quregs
* are not distributed, will throw a runtime internal error.
*
* @author Tyson Jones
* @author Jakub Adamski (sped-up large comm by asynch messages)
- * @author Oliver Brown (patched max-message inference, consulted on AR and MPICH support)
+ * @author Oliver Brown (added custom communicators, patched max-message inference, consulted on AR and MPICH support)
* @author Ania (Anna) Brown (developed QuEST v1 logic)
*/
@@ -22,8 +22,9 @@
#include "quest/src/comm/comm_config.hpp"
#include "quest/src/comm/comm_indices.hpp"
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
#include
+ extern MPI_Comm comm_getMpiComm(); // comm_config.cpp does not leak MPI_Comm
#endif
#include
@@ -108,18 +109,18 @@ qindex MAX_MESSAGE_LENGTH = powerOf2(28);
*/
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
// declare MPI types for qreal and qcomp. We always use the
// C macros, even when the deprecated CXX equivalents are
// available, to maintain compatibility with modern MPICH
- #if (FLOAT_PRECISION == 1)
+ #if (QUEST_FLOAT_PRECISION == 1)
#define MPI_QREAL MPI_FLOAT
#define MPI_QCOMP MPI_C_FLOAT_COMPLEX
- #elif (FLOAT_PRECISION == 2)
+ #elif (QUEST_FLOAT_PRECISION == 2)
#define MPI_QREAL MPI_DOUBLE
#define MPI_QCOMP MPI_C_DOUBLE_COMPLEX
- #elif (FLOAT_PRECISION == 4)
+ #elif (QUEST_FLOAT_PRECISION == 4)
#define MPI_QREAL MPI_LONG_DOUBLE
#define MPI_QCOMP MPI_C_LONG_DOUBLE_COMPLEX
#else
@@ -136,7 +137,7 @@ qindex MAX_MESSAGE_LENGTH = powerOf2(28);
int getMaxNumMessages() {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
// the max supported tag value constrains the total number of messages
// we can send in a round of communication, since we uniquely tag
@@ -149,7 +150,7 @@ int getMaxNumMessages() {
// messages. Beware the max is obtained via a void pointer and might be unset...
void* tagUpperBoundPtr;
int isAttribSet;
- MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &tagUpperBoundPtr, &isAttribSet);
+ MPI_Comm_get_attr(comm_getMpiComm(), MPI_TAG_UB, &tagUpperBoundPtr, &isAttribSet);
// if something went wrong with obtaining the tag bound, return the safe minimum
if (!isAttribSet)
@@ -214,7 +215,9 @@ std::array dividePayloadIntoMessages(qindex numAmps) {
void exchangeArrays(qcomp* send, qcomp* recv, qindex numElems, int pairRank) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
+
+ MPI_Comm mpiComm = comm_getMpiComm();
// each message is asynchronously dispatched with a final wait, as per arxiv.org/abs/2308.07402
@@ -226,8 +229,8 @@ void exchangeArrays(qcomp* send, qcomp* recv, qindex numElems, int pairRank) {
// so that messages are permitted to arrive out-of-order (supporting UCX adaptive-routing)
for (qindex m=0; m(m); // gauranteed int, but m*messageSize needs qindex
- MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, MPI_COMM_WORLD, &requests[2*m]);
- MPI_Irecv(&recv[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, MPI_COMM_WORLD, &requests[2*m+1]);
+ MPI_Irecv(&recv[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiComm, &requests[2*m]);
+ MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiComm, &requests[2*m+1]);
}
// wait for all exchanges to complete (MPI will automatically free the request memory)
@@ -246,7 +249,9 @@ void exchangeArrays(qcomp* send, qcomp* recv, qindex numElems, int pairRank) {
void asynchSendArray(qcomp* send, qindex numElems, int pairRank) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
+
+ MPI_Comm mpiComm = comm_getMpiComm();
// we will not track nor wait for the asynch send; instead, the caller will later comm_sync()
MPI_Request nullReq = MPI_REQUEST_NULL;
@@ -257,7 +262,7 @@ void asynchSendArray(qcomp* send, qindex numElems, int pairRank) {
// asynchronously send the uniquely-tagged messages
for (qindex m=0; m(m); // gauranteed int, but m*messageSize needs qindex
- MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, MPI_COMM_WORLD, &nullReq);
+ MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiComm, &nullReq);
}
#else
@@ -267,7 +272,9 @@ void asynchSendArray(qcomp* send, qindex numElems, int pairRank) {
void receiveArray(qcomp* dest, qindex numElems, int pairRank) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
+
+ MPI_Comm mpiComm = comm_getMpiComm();
// expect the data in multiple messages
auto [messageSize, numMessages] = dividePow2PayloadIntoMessages(numElems);
@@ -278,7 +285,7 @@ void receiveArray(qcomp* dest, qindex numElems, int pairRank) {
// listen to receive each uniquely-tagged message asynchronously (as per arxiv.org/abs/2308.07402)
for (qindex m=0; m(m); // gauranteed int, but m*messageSize needs qindex
- MPI_Irecv(&dest[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, MPI_COMM_WORLD, &requests[m]);
+ MPI_Irecv(&dest[m*messageSize], messageSize, MPI_QCOMP, pairRank, tag, mpiComm, &requests[m]);
}
// receivers wait for all messages to be received (while sender asynch proceeds)
@@ -301,8 +308,9 @@ void globallyCombineNonUniformSubArrays(
vector globalRecvIndPerRank, vector localSendIndPerRank, vector numSendPerRank,
bool areGpuPtrs
) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
+ auto mpiComm = comm_getMpiComm();
int myRank = comm_getRank();
int numNodes = comm_getNumNodes();
@@ -336,14 +344,14 @@ void globallyCombineNonUniformSubArrays(
for (int m=0; m 0) {
qindex recvInd = globalRecvIndPerRank[sendRank] + (numBigMsgs * bigMsgSize);
requests.push_back(MPI_REQUEST_NULL);
- MPI_Ibcast(&recv[recvInd], remMsgSize, MPI_QCOMP, sendRank, MPI_COMM_WORLD, &requests.back());
+ MPI_Ibcast(&recv[recvInd], remMsgSize, MPI_QCOMP, sendRank, mpiComm, &requests.back());
}
}
@@ -357,7 +365,7 @@ void globallyCombineNonUniformSubArrays(
void globallyCombineSubArrays(qcomp* recv, qcomp* send, qindex numAmpsPerRank, bool areGpuPtrs) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
// simply wrap and call the non-uniform case has no performance penalty,
// and is only slightly messier than a bespoke power-of-2 msg implementation
@@ -637,9 +645,9 @@ void comm_exchangeAmpsToBuffers(Qureg qureg, int pairRank) {
void comm_broadcastAmp(int sendRank, qcomp* sendAmp) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
- MPI_Bcast(sendAmp, 1, MPI_QCOMP, sendRank, MPI_COMM_WORLD);
+ MPI_Bcast(sendAmp, 1, MPI_QCOMP, sendRank, comm_getMpiComm());
#else
error_commButEnvNotDistributed();
@@ -648,7 +656,9 @@ void comm_broadcastAmp(int sendRank, qcomp* sendAmp) {
void comm_sendAmpsToRoot(int sendRank, qcomp* send, qcomp* recv, qindex numAmps) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
+
+ MPI_Comm mpiComm = comm_getMpiComm();
// only the sender and root nodes need to continue
int recvRank = ROOT_RANK;
@@ -665,8 +675,8 @@ void comm_sendAmpsToRoot(int sendRank, qcomp* send, qcomp* recv, qindex numAmps)
for (qindex m=0; m(m);
(myRank == sendRank)?
- MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, recvRank, tag, MPI_COMM_WORLD, &requests[m]): // sender
- MPI_Irecv(&recv[m*messageSize], messageSize, MPI_QCOMP, sendRank, tag, MPI_COMM_WORLD, &requests[m]); // root
+ MPI_Isend(&send[m*messageSize], messageSize, MPI_QCOMP, recvRank, tag, mpiComm, &requests[m]): // sender
+ MPI_Irecv(&recv[m*messageSize], messageSize, MPI_QCOMP, sendRank, tag, mpiComm, &requests[m]); // root
}
// wait for all exchanges to complete (MPI will automatically free the request memory)
@@ -679,10 +689,10 @@ void comm_sendAmpsToRoot(int sendRank, qcomp* send, qcomp* recv, qindex numAmps)
void comm_broadcastIntsFromRoot(int* arr, qindex length) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
int sendRank = ROOT_RANK;
- MPI_Bcast(arr, length, MPI_INT, sendRank, MPI_COMM_WORLD);
+ MPI_Bcast(arr, length, MPI_INT, sendRank, comm_getMpiComm());
#else
error_commButEnvNotDistributed();
@@ -691,10 +701,10 @@ void comm_broadcastIntsFromRoot(int* arr, qindex length) {
void comm_broadcastUnsignedsFromRoot(unsigned* arr, qindex length) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
int sendRank = ROOT_RANK;
- MPI_Bcast(arr, length, MPI_UNSIGNED, sendRank, MPI_COMM_WORLD);
+ MPI_Bcast(arr, length, MPI_UNSIGNED, sendRank, comm_getMpiComm());
#else
error_commButEnvNotDistributed();
@@ -719,9 +729,9 @@ void comm_combineSubArrays(qcomp* recv, vector recvInds, vector
void comm_reduceAmp(qcomp* localAmp) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
- MPI_Allreduce(MPI_IN_PLACE, localAmp, 1, MPI_QCOMP, MPI_SUM, MPI_COMM_WORLD);
+ MPI_Allreduce(MPI_IN_PLACE, localAmp, 1, MPI_QCOMP, MPI_SUM, comm_getMpiComm());
#else
error_commButEnvNotDistributed();
@@ -730,9 +740,9 @@ void comm_reduceAmp(qcomp* localAmp) {
void comm_reduceReal(qreal* localReal) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
- MPI_Allreduce(MPI_IN_PLACE, localReal, 1, MPI_QREAL, MPI_SUM, MPI_COMM_WORLD);
+ MPI_Allreduce(MPI_IN_PLACE, localReal, 1, MPI_QREAL, MPI_SUM, comm_getMpiComm());
#else
error_commButEnvNotDistributed();
@@ -741,9 +751,9 @@ void comm_reduceReal(qreal* localReal) {
void comm_reduceReals(qreal* localReals, qindex numLocalReals) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
- MPI_Allreduce(MPI_IN_PLACE, localReals, numLocalReals, MPI_QREAL, MPI_SUM, MPI_COMM_WORLD);
+ MPI_Allreduce(MPI_IN_PLACE, localReals, numLocalReals, MPI_QREAL, MPI_SUM, comm_getMpiComm());
#else
error_commButEnvNotDistributed();
@@ -752,12 +762,12 @@ void comm_reduceReals(qreal* localReals, qindex numLocalReals) {
bool comm_isTrueOnAllNodes(bool val) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
// perform global AND and broadcast result back to all nodes
int local = (int) val;
int global;
- MPI_Allreduce(&local, &global, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD);
+ MPI_Allreduce(&local, &global, 1, MPI_INT, MPI_LAND, comm_getMpiComm());
return (bool) global;
#else
@@ -768,7 +778,7 @@ bool comm_isTrueOnAllNodes(bool val) {
bool comm_isTrueOnRootNode(bool val) {
- #if COMPILE_MPI
+ #if QUEST_COMPILE_MPI
// this isn't really a reduction - it's a broadcast - but
// it's semantically relevant to comm_isTrueOnAllNodes()
@@ -791,7 +801,7 @@ bool comm_isTrueOnRootNode(bool val) {
vector comm_gatherStringsToRoot(char* localChars, int maxNumLocalChars) {
-#if COMPILE_MPI
+#if QUEST_COMPILE_MPI
// no need to validate array sizes and memory alloc successes;
// these are trivial O(#nodes)-size arrays containing <20 chars
@@ -803,7 +813,7 @@ vector comm_gatherStringsToRoot(char* localChars, int maxNumLocalChars)
// all nodes send root all their local chars
int recvRank = ROOT_RANK;
MPI_Gather(localChars, maxNumLocalChars, MPI_CHAR, allChars.data(),
- maxNumLocalChars, MPI_CHAR, recvRank, MPI_COMM_WORLD);
+ maxNumLocalChars, MPI_CHAR, recvRank, comm_getMpiComm());
// divide allChars into stings, delimited by each node's terminal char
vector out(numNodes);
diff --git a/quest/src/comm/comm_routines.hpp b/quest/src/comm/comm_routines.hpp
index 3d0fc8b23..e75e889f6 100644
--- a/quest/src/comm/comm_routines.hpp
+++ b/quest/src/comm/comm_routines.hpp
@@ -1,7 +1,7 @@
/** @file
* Signatures for communicating and exchanging amplitudes between compute
* nodes, when running in distributed mode, using the C MPI standard.
- * Calling these functions when COMPILE_MPI=0, or when the passed Quregs
+ * Calling these functions when QUEST_COMPILE_MPI=0, or when the passed Quregs
* are not distributed, will throw a runtime internal error.
*
* @author Tyson Jones
diff --git a/quest/src/core/accelerator.cpp b/quest/src/core/accelerator.cpp
index 7bdcc1709..677e6c74a 100644
--- a/quest/src/core/accelerator.cpp
+++ b/quest/src/core/accelerator.cpp
@@ -23,16 +23,18 @@
#include "quest/src/core/errors.hpp"
#include "quest/src/core/memory.hpp"
#include "quest/src/core/bitwise.hpp"
+#include "quest/src/core/lists.hpp"
#include "quest/src/cpu/cpu_config.hpp"
#include "quest/src/gpu/gpu_config.hpp"
#include "quest/src/cpu/cpu_subroutines.hpp"
#include "quest/src/gpu/gpu_subroutines.hpp"
+#include
#include
#include
-using std::vector;
using std::min;
+using std::array;
@@ -45,19 +47,16 @@ using std::min;
* number of controls or targets exceeds that which have optimised compilations,
* we fall back to using a generic implementation, indicated by <-1>. In essence,
* these macros simply call func albeit without illegally passing
- * a runtime variable as a template parameter. Note an awkward use of decltype()
- * is to workaround a GCC <12 bug with implicitly-typed vector initialisations.
- *
- * BEWARE that these macros are single-line expressions, so they can be used in
- * braceless if/else or ternary operators - but stay vigilant!
+ * a runtime variable as a template parameter.
*/
-#define GET_FUNC_OPTIMISED_FOR_BOOL(funcname, value) \
+
+#define GET_FUNC_OPTIMISED_FOR_BOOL( funcname, value ) \
((value)? funcname : funcname)
-#define GET_FUNC_OPTIMISED_FOR_TWO_BOOLS(funcname, b1, b2) \
+#define GET_FUNC_OPTIMISED_FOR_TWO_BOOLS( funcname, b1, b2 ) \
((b1)? \
((b2)? funcname : funcname) : \
((b2)? funcname : funcname))
@@ -69,61 +68,74 @@ using std::min;
((value)? cpu_##funcsuffix : cpu_##funcsuffix ))
-#if (MAX_OPTIMISED_NUM_CTRLS != 5) || (MAX_OPTIMISED_NUM_TARGS != 5)
+#if (MAX_OPTIMISED_PARAM != 5)
#error "The number of optimised, templated QuEST functions was inconsistent between accelerator's source and header."
#endif
+#define GET_TEMPLATE_PARAM( param ) \
+ std::min((int) param, MAX_OPTIMISED_PARAM + 1)
-#define GET_FUNC_OPTIMISED_FOR_NUM_QUREGS(f, numquregs) \
- (vector )> {&f<0>, &f<1>, &f<2>, &f<3>, &f<4>, &f<5>, &f<-1>}) \
- [std::min((int) numquregs, MAX_OPTIMISED_NUM_QUREGS + 1)]
-
-#define GET_FUNC_OPTIMISED_FOR_NUM_CTRLS(f, numctrls) \
- (vector )> {&f<0>, &f<1>, &f<2>, &f<3>, &f<4>, &f<5>, &f<-1>}) \
- [std::min((int) numctrls, MAX_OPTIMISED_NUM_CTRLS + 1)]
-
-#define GET_FUNC_OPTIMISED_FOR_NUM_TARGS(f, numtargs) \
- (vector )> {&f<0>, &f<1>, &f<2>, &f<3>, &f<4>, &f<5>, &f<-1>}) \
- [std::min((int) numtargs, MAX_OPTIMISED_NUM_TARGS + 1)]
-
-#define GET_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS(f, numctrls, numtargs) \
- (vector { \
- ARR(f) {&f<0,0>, &f<0,1>, &f<0,2>, &f<0,3>, &f<0,4>, &f<0,5>, &f<0,-1>}, \
- ARR(f) {&f<1,0>, &f<1,1>, &f<1,2>, &f<1,3>, &f<1,4>, &f<1,5>, &f<1,-1>}, \
- ARR(f) {&f<2,0>, &f<2,1>, &f<2,2>, &f<2,3>, &f<2,4>, &f<2,5>, &f<2,-1>}, \
- ARR(f) {&f<3,0>, &f<3,1>, &f<3,2>, &f<3,3>, &f<3,4>, &f<3,5>, &f<3,-1>}, \
- ARR(f) {&f<4,0>, &f<4,1>, &f<4,2>, &f<4,3>, &f<4,4>, &f<4,5>, &f<4,-1>}, \
- ARR(f) {&f<5,0>, &f<5,1>, &f<5,2>, &f<5,3>, &f<5,4>, &f<5,5>, &f<5,-1>}, \
- ARR(f) {&f<-1,0>, &f<-1,1>, &f<-1,2>, &f<-1,3>, &f<-1,4>, &f<-1,5>, &f<-1,-1>}}) \
- [std::min((int) numctrls, MAX_OPTIMISED_NUM_CTRLS + 1)] \
- [std::min((int) numtargs, MAX_OPTIMISED_NUM_TARGS + 1)]
-
-#define ARR(f) vector)>
+#define GET_ONE_PARAM_TEMPLATED_FUNC_ARRAY( f ) \
+ array {&f<0>, &f<1>, &f<2>, &f<3>, &f<4>, &f<5>, &f<-1>}
-#define GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_QUREGS(funcsuffix, qureg, numquregs) \
- ((qureg.isGpuAccelerated)? \
- GET_FUNC_OPTIMISED_FOR_NUM_QUREGS( gpu_##funcsuffix, numquregs ) : \
- GET_FUNC_OPTIMISED_FOR_NUM_QUREGS( cpu_##funcsuffix, numquregs ))
+#define GET_FUNC_OPTIMISED_FOR_ONE_PARAM( outvar, funcname, param ) \
+ static constexpr auto _ARRAY_##funcname = GET_ONE_PARAM_TEMPLATED_FUNC_ARRAY( funcname ); \
+ const auto outvar = _ARRAY_##funcname[GET_TEMPLATE_PARAM( param )];
-#define GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_CTRLS(funcsuffix, qureg, numctrls) \
- ((qureg.isGpuAccelerated)? \
- GET_FUNC_OPTIMISED_FOR_NUM_CTRLS( gpu_##funcsuffix, numctrls ) : \
- GET_FUNC_OPTIMISED_FOR_NUM_CTRLS( cpu_##funcsuffix, numctrls ))
+#define GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( outvar, funcsuffix, qureg, param ) \
+ GET_FUNC_OPTIMISED_FOR_ONE_PARAM( _GPU_FUNC, gpu_##funcsuffix, param ) \
+ GET_FUNC_OPTIMISED_FOR_ONE_PARAM( _CPU_FUNC, cpu_##funcsuffix, param ) \
+ const auto outvar = qureg.isGpuAccelerated ? _GPU_FUNC : _CPU_FUNC;
-#define GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_TARGS(funcsuffix, qureg, numtargs) \
- ((qureg.isGpuAccelerated)? \
- GET_FUNC_OPTIMISED_FOR_NUM_TARGS( gpu_##funcsuffix, numtargs ) : \
- GET_FUNC_OPTIMISED_FOR_NUM_TARGS( cpu_##funcsuffix, numtargs ))
-
-#define GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS(funcsuffix, qureg, numctrls, numtargs) \
- ((qureg.isGpuAccelerated)? \
- GET_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs ) : \
- GET_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs ))
+
+#define GET_TWO_PARAM_TEMPLATED_FUNC_MATRIX( f ) \
+ array { \
+ array {&f<0,0>, &f<0,1>, &f<0,2>, &f<0,3>, &f<0,4>, &f<0,5>, &f<0,-1>}, \
+ array {&f<1,0>, &f<1,1>, &f<1,2>, &f<1,3>, &f<1,4>, &f<1,5>, &f<1,-1>}, \
+ array {&f<2,0>, &f<2,1>, &f<2,2>, &f<2,3>, &f<2,4>, &f<2,5>, &f<2,-1>}, \
+ array {&f<3,0>, &f<3,1>, &f<3,2>, &f<3,3>, &f<3,4>, &f<3,5>, &f<3,-1>}, \
+ array {&f<4,0>, &f<4,1>, &f<4,2>, &f<4,3>, &f<4,4>, &f<4,5>, &f<4,-1>}, \
+ array {&f<5,0>, &f<5,1>, &f<5,2>, &f<5,3>, &f<5,4>, &f<5,5>, &f<5,-1>}, \
+ array {&f<-1,0>, &f<-1,1>, &f<-1,2>, &f<-1,3>, &f<-1,4>, &f<-1,5>, &f<-1,-1>}}
+
+#define GET_FUNC_OPTIMISED_FOR_TWO_PARAMS( outvar, funcname, param1, param2 ) \
+ static constexpr auto _MATRIX_##funcname = GET_TWO_PARAM_TEMPLATED_FUNC_MATRIX( funcname ); \
+ const auto outvar = _MATRIX_##funcname[GET_TEMPLATE_PARAM( param1 )][GET_TEMPLATE_PARAM( param2 )];
+
+#define GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_TWO_PARAMS( outvar, funcsuffix, qureg, param1, param2 ) \
+ GET_FUNC_OPTIMISED_FOR_TWO_PARAMS( _GPU_FUNC, gpu_##funcsuffix, param1, param2 ) \
+ GET_FUNC_OPTIMISED_FOR_TWO_PARAMS( _CPU_FUNC, cpu_##funcsuffix, param1, param2 ) \
+ const auto outvar = qureg.isGpuAccelerated ? _GPU_FUNC : _CPU_FUNC;
+
+
+#define GET_TWO_PARAM_TWO_BOOL_SUB_MATRIX( f, b1, b2 ) \
+ array { \
+ array {&f<0,0,b1,b2>, &f<0,1,b1,b2>, &f<0,2,b1,b2>, &f<0,3,b1,b2>, &f<0,4,b1,b2>, &f<0,5,b1,b2>, &f<0,-1,b1,b2>}, \
+ array {&f<1,0,b1,b2>, &f<1,1,b1,b2>, &f<1,2,b1,b2>, &f<1,3,b1,b2>, &f<1,4,b1,b2>, &f<1,5,b1,b2>, &f<1,-1,b1,b2>}, \
+ array {&f<2,0,b1,b2>, &f<2,1,b1,b2>, &f<2,2,b1,b2>, &f<2,3,b1,b2>, &f<2,4,b1,b2>, &f<2,5,b1,b2>, &f<2,-1,b1,b2>}, \
+ array {&f<3,0,b1,b2>, &f<3,1,b1,b2>, &f<3,2,b1,b2>, &f<3,3,b1,b2>, &f<3,4,b1,b2>, &f<3,5,b1,b2>, &f<3,-1,b1,b2>}, \
+ array {&f<4,0,b1,b2>, &f<4,1,b1,b2>, &f<4,2,b1,b2>, &f<4,3,b1,b2>, &f<4,4,b1,b2>, &f<4,5,b1,b2>, &f<4,-1,b1,b2>}, \
+ array {&f<5,0,b1,b2>, &f<5,1,b1,b2>, &f<5,2,b1,b2>, &f<5,3,b1,b2>, &f<5,4,b1,b2>, &f<5,5,b1,b2>, &f<5,-1,b1,b2>}, \
+ array {&f<-1,0,b1,b2>, &f<-1,1,b1,b2>, &f<-1,2,b1,b2>, &f<-1,3,b1,b2>, &f<-1,4,b1,b2>, &f<-1,5,b1,b2>, &f<-1,-1,b1,b2>}}
+
+#define GET_TWO_PARAM_TWO_BOOL_TEMPLATED_FUNC_MATRIX( f ) \
+ array { \
+ array{ GET_TWO_PARAM_TWO_BOOL_SUB_MATRIX( f, 0, 0 ), GET_TWO_PARAM_TWO_BOOL_SUB_MATRIX( f, 0, 1 ) }, \
+ array{ GET_TWO_PARAM_TWO_BOOL_SUB_MATRIX( f, 1, 0 ), GET_TWO_PARAM_TWO_BOOL_SUB_MATRIX( f, 1, 1 ) }}
+
+#define GET_FUNC_OPTIMISED_FOR_TWO_PARAMS_TWO_BOOLS( outvar, funcname, param1, param2, bool1, bool2 ) \
+ static constexpr auto _MATRIX_##funcname = GET_TWO_PARAM_TWO_BOOL_TEMPLATED_FUNC_MATRIX( funcname ); \
+ const auto outvar = _MATRIX_##funcname[bool1][bool2][GET_TEMPLATE_PARAM( param1 )][GET_TEMPLATE_PARAM( param2 )];
+
+#define GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_TWO_PARAMS_TWO_BOOLS( outvar, funcsuffix, qureg, param1, param2, bool1, bool2 ) \
+ GET_FUNC_OPTIMISED_FOR_TWO_PARAMS_TWO_BOOLS( _GPU_FUNC, gpu_##funcsuffix, param1, param2, bool1, bool2 ) \
+ GET_FUNC_OPTIMISED_FOR_TWO_PARAMS_TWO_BOOLS( _CPU_FUNC, cpu_##funcsuffix, param1, param2, bool1, bool2 ) \
+ const auto outvar = qureg.isGpuAccelerated ? _GPU_FUNC : _CPU_FUNC;
/// @todo
-/// GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS as defined below
+/// GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_TWO_PARAMS_TWO_BOOLS as defined above
/// is used by anyCtrlAnyTargDiagMatr and anyCtrlAnyTargDenseMatr; the
/// latter only ever receives numTargs>=3 (due to accelerator redirecting
/// fewer targets to faster bespoke functions which e.g. avoid global GPU
@@ -133,40 +145,6 @@ using std::min;
/// can ergo non-negligibly speed up compilation by avoiding these redundant
/// instances at the cost of increased code complexity/asymmetry. Consider!
-#define GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS(f, numctrls, numtargs, c, h) \
- (vector { \
- POWER_CONJ_ARR(f) {&f<0,0,c,h>, &f<0,1,c,h>, &f<0,2,c,h>, &f<0,3,c,h>, &f<0,4,c,h>, &f<0,5,c,h>, &f<0,-1,c,h>}, \
- POWER_CONJ_ARR(f) {&f<1,0,c,h>, &f<1,1,c,h>, &f<1,2,c,h>, &f<1,3,c,h>, &f<1,4,c,h>, &f<1,5,c,h>, &f<1,-1,c,h>}, \
- POWER_CONJ_ARR(f) {&f<2,0,c,h>, &f<2,1,c,h>, &f<2,2,c,h>, &f<2,3,c,h>, &f<2,4,c,h>, &f<2,5,c,h>, &f<2,-1,c,h>}, \
- POWER_CONJ_ARR(f) {&f<3,0,c,h>, &f<3,1,c,h>, &f<3,2,c,h>, &f<3,3,c,h>, &f<3,4,c,h>, &f<3,5,c,h>, &f<3,-1,c,h>}, \
- POWER_CONJ_ARR(f) {&f<4,0,c,h>, &f<4,1,c,h>, &f<4,2,c,h>, &f<4,3,c,h>, &f<4,4,c,h>, &f<4,5,c,h>, &f<4,-1,c,h>}, \
- POWER_CONJ_ARR(f) {&f<5,0,c,h>, &f<5,1,c,h>, &f<5,2,c,h>, &f<5,3,c,h>, &f<5,4,c,h>, &f<5,5,c,h>, &f<5,-1,c,h>}, \
- POWER_CONJ_ARR(f) {&f<-1,0,c,h>, &f<-1,1,c,h>, &f<-1,2,c,h>, &f<-1,3,c,h>, &f<-1,4,c,h>, &f<-1,5,c,h>, &f<-1,-1,c,h>}}) \
- [std::min((int) numctrls, MAX_OPTIMISED_NUM_CTRLS + 1)] \
- [std::min((int) numtargs, MAX_OPTIMISED_NUM_TARGS + 1)]
-
-#define POWER_CONJ_ARR(f) vector)>
-
-#define GET_CPU_OR_GPU_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS(funcsuffix, qureg, numctrls, numtargs, conj, haspower) \
- ((qureg.isGpuAccelerated)? \
- ((conj)? \
- ((haspower)? \
- GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, true, true ) : \
- GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, true, false ) ) : \
- ((haspower)? \
- GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, false, true ) : \
- GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( gpu_##funcsuffix, numctrls, numtargs, false, false ) ) ) : \
- ((conj)? \
- ((haspower)? \
- GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, true, true ) : \
- GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, true, false ) ) : \
- ((haspower)? \
- GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, false, true ) : \
- GET_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( cpu_##funcsuffix, numctrls, numtargs, false, false ) ) ) )
-
-/// @todo
-/// The above macro spaghetti is diabolical - update using C++ metaprogamming!
-
/*
@@ -244,7 +222,7 @@ void accel_fullstatediagmatr_setElemsToPauliStrSum(FullStateDiagMatr out, PauliS
*/
-qindex accel_statevec_packAmpsIntoBuffer(Qureg qureg, vector qubits, vector qubitStates) {
+qindex accel_statevec_packAmpsIntoBuffer(Qureg qureg, ConstList64 qubits, ConstList64 qubitStates) {
// we can never pack and swap buffers when there are no constrained qubit states, because we'd
// then fill the entire buffer andhave no room to receive the other node's buffer; caller would
@@ -253,7 +231,7 @@ qindex accel_statevec_packAmpsIntoBuffer(Qureg qureg, vector qubits, vector
error_noCtrlsGivenToBufferPacker();
// note qubits may incidentally be ctrls or targs; it doesn't matter
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_TARGS( statevec_packAmpsIntoBuffer, qureg, qubits.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevec_packAmpsIntoBuffer, qureg, qubits.size() );
// return the number of packed amps, for caller convenience
return func(qureg, qubits, qubitStates);
@@ -274,19 +252,19 @@ qindex accel_statevec_packPairSummedAmpsIntoBuffer(Qureg qureg, int qubit1, int
*/
-void accel_statevec_anyCtrlSwap_subA(Qureg qureg, vector ctrls, vector ctrlStates, int targ1, int targ2) {
+void accel_statevec_anyCtrlSwap_subA(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, int targ1, int targ2) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_CTRLS( statevec_anyCtrlSwap_subA, qureg, ctrls.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevec_anyCtrlSwap_subA, qureg, ctrls.size() );
func(qureg, ctrls, ctrlStates, targ1, targ2);
}
-void accel_statevec_anyCtrlSwap_subB(Qureg qureg, vector ctrls, vector ctrlStates) {
+void accel_statevec_anyCtrlSwap_subB(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_CTRLS( statevec_anyCtrlSwap_subB, qureg, ctrls.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevec_anyCtrlSwap_subB, qureg, ctrls.size() );
func(qureg, ctrls, ctrlStates);
}
-void accel_statevec_anyCtrlSwap_subC(Qureg qureg, vector ctrls, vector ctrlStates, int targ, int targState) {
+void accel_statevec_anyCtrlSwap_subC(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, int targ, int targState) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_CTRLS( statevec_anyCtrlSwap_subC, qureg, ctrls.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevec_anyCtrlSwap_subC, qureg, ctrls.size() );
func(qureg, ctrls, ctrlStates, targ, targState);
}
@@ -297,28 +275,28 @@ void accel_statevec_anyCtrlSwap_subC(Qureg qureg, vector ctrls, vector
*/
-void accel_statevec_anyCtrlOneTargDenseMatr_subA(Qureg qureg, vector ctrls, vector ctrlStates, int targ, CompMatr1 matr) {
+void accel_statevec_anyCtrlOneTargDenseMatr_subA(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, int targ, CompMatr1 matr) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_CTRLS( statevec_anyCtrlOneTargDenseMatr_subA, qureg, ctrls.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevec_anyCtrlOneTargDenseMatr_subA, qureg, ctrls.size() );
func(qureg, ctrls, ctrlStates, targ, matr);
}
-void accel_statevec_anyCtrlOneTargDenseMatr_subB(Qureg qureg, vector ctrls, vector ctrlStates, qcomp fac0, qcomp fac1) {
+void accel_statevec_anyCtrlOneTargDenseMatr_subB(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, qcomp fac0, qcomp fac1) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_CTRLS( statevec_anyCtrlOneTargDenseMatr_subB, qureg, ctrls.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevec_anyCtrlOneTargDenseMatr_subB, qureg, ctrls.size() );
func(qureg, ctrls, ctrlStates, fac0, fac1);
}
-void accel_statevec_anyCtrlTwoTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, int targ1, int targ2, CompMatr2 matr) {
+void accel_statevec_anyCtrlTwoTargDenseMatr_sub(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, int targ1, int targ2, CompMatr2 matr) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_CTRLS( statevec_anyCtrlTwoTargDenseMatr_sub, qureg, ctrls.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevec_anyCtrlTwoTargDenseMatr_sub, qureg, ctrls.size() );
func(qureg, ctrls, ctrlStates, targ1, targ2, matr);
}
-void accel_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr, bool conj, bool transp) {
+void accel_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, ConstList64 targs, CompMatr matr, bool conj, bool transp) {
- auto func = GET_CPU_OR_GPU_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( statevec_anyCtrlAnyTargDenseMatr_sub, qureg, ctrls.size(), targs.size(), conj, transp );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_TWO_PARAMS_TWO_BOOLS( func, statevec_anyCtrlAnyTargDenseMatr_sub, qureg, ctrls.size(), targs.size(), conj, transp );
func(qureg, ctrls, ctrlStates, targs, matr);
}
@@ -329,25 +307,25 @@ void accel_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls,
*/
-void accel_statevec_anyCtrlOneTargDiagMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, int targ, DiagMatr1 matr) {
+void accel_statevec_anyCtrlOneTargDiagMatr_sub(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, int targ, DiagMatr1 matr) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_CTRLS( statevec_anyCtrlOneTargDiagMatr_sub, qureg, ctrls.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevec_anyCtrlOneTargDiagMatr_sub, qureg, ctrls.size() );
func(qureg, ctrls, ctrlStates, targ, matr);
}
-void accel_statevec_anyCtrlTwoTargDiagMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, int targ1, int targ2, DiagMatr2 matr) {
+void accel_statevec_anyCtrlTwoTargDiagMatr_sub(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, int targ1, int targ2, DiagMatr2 matr) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_CTRLS( statevec_anyCtrlTwoTargDiagMatr_sub, qureg, ctrls.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevec_anyCtrlTwoTargDiagMatr_sub, qureg, ctrls.size() );
func(qureg, ctrls, ctrlStates, targ1, targ2, matr);
}
-void accel_statevec_anyCtrlAnyTargDiagMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, DiagMatr matr, qcomp exponent, bool conj) {
+void accel_statevec_anyCtrlAnyTargDiagMatr_sub(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, ConstList64 targs, DiagMatr matr, qcomp exponent, bool conj) {
bool hasPower = exponent != qcomp(1, 0);
- auto func = GET_CPU_OR_GPU_TWO_BOOL_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( statevec_anyCtrlAnyTargDiagMatr_sub, qureg, ctrls.size(), targs.size(), conj, hasPower );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_TWO_PARAMS_TWO_BOOLS( func, statevec_anyCtrlAnyTargDiagMatr_sub, qureg, ctrls.size(), targs.size(), conj, hasPower );
func(qureg, ctrls, ctrlStates, targs, matr, exponent);
}
@@ -520,24 +498,24 @@ void accel_densmatr_allTargDiagMatr_subB(Qureg qureg, FullStateDiagMatr matr, qc
*/
-void accel_statevector_anyCtrlPauliTensorOrGadget_subA(Qureg qureg, vector ctrls, vector states, vector x, vector y, vector z, qcomp f0, qcomp f1) {
+void accel_statevector_anyCtrlPauliTensorOrGadget_subA(Qureg qureg, ConstList64 ctrls, ConstList64 states, ConstList64 x, ConstList64 y, ConstList64 z, qcomp f0, qcomp f1) {
// only X and Y constitute target qubits (Z merely induces a phase)
int numTargs = x.size() + y.size();
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS( statevector_anyCtrlPauliTensorOrGadget_subA, qureg, ctrls.size(), numTargs );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_TWO_PARAMS( func, statevector_anyCtrlPauliTensorOrGadget_subA, qureg, ctrls.size(), numTargs );
func(qureg, ctrls, states, x, y, z, f0, f1);
}
-void accel_statevector_anyCtrlPauliTensorOrGadget_subB(Qureg qureg, vector ctrls, vector states, vector x, vector y, vector z, qcomp f0, qcomp f1, qindex mask) {
+void accel_statevector_anyCtrlPauliTensorOrGadget_subB(Qureg qureg, ConstList64 ctrls, ConstList64 states, ConstList64 x, ConstList64 y, ConstList64 z, qcomp f0, qcomp f1, qindex mask) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_CTRLS( statevector_anyCtrlPauliTensorOrGadget_subB, qureg, ctrls.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevector_anyCtrlPauliTensorOrGadget_subB, qureg, ctrls.size() );
func(qureg, ctrls, states, x, y, z, f0, f1, mask);
}
-void accel_statevector_anyCtrlAnyTargZOrPhaseGadget_sub(Qureg qureg, vector ctrls, vector states, vector targs, qcomp f0, qcomp f1) {
+void accel_statevector_anyCtrlAnyTargZOrPhaseGadget_sub(Qureg qureg, ConstList64 ctrls, ConstList64 states, ConstList64 targs, qcomp f0, qcomp f1) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_CTRLS( statevector_anyCtrlAnyTargZOrPhaseGadget_sub, qureg, ctrls.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevector_anyCtrlAnyTargZOrPhaseGadget_sub, qureg, ctrls.size() );
func(qureg, ctrls, states, targs, f0, f1);
}
@@ -548,10 +526,10 @@ void accel_statevector_anyCtrlAnyTargZOrPhaseGadget_sub(Qureg qureg, vector
*/
-void accel_statevec_setQuregToWeightedSum_sub(Qureg outQureg, vector coeffs, vector inQuregs) {
+void accel_statevec_setQuregToWeightedSum_sub(Qureg outQureg, std::vector coeffs, std::vector inQuregs) {
// consult outQureg's deployment since others are prior validated to match
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_QUREGS( statevec_setQuregToWeightedSum_sub, outQureg, inQuregs.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevec_setQuregToWeightedSum_sub, outQureg, inQuregs.size() );
func(outQureg, coeffs, inQuregs);
}
@@ -845,15 +823,12 @@ void accel_densmatr_oneQubitDamping_subD(Qureg qureg, int qubit, qreal prob) {
*/
-void accel_densmatr_partialTrace_sub(Qureg inQureg, Qureg outQureg, vector targs, vector pairTargs) {
+void accel_densmatr_partialTrace_sub(Qureg inQureg, Qureg outQureg, ConstList64 targs, ConstList64 pairTargs) {
assert_partialTraceQuregsAreIdenticallyDeployed(inQureg, outQureg);
- auto cpuFunc = GET_FUNC_OPTIMISED_FOR_NUM_TARGS( cpu_densmatr_partialTrace_sub, targs.size() );
- auto gpuFunc = GET_FUNC_OPTIMISED_FOR_NUM_TARGS( gpu_densmatr_partialTrace_sub, targs.size() );
-
- // inQureg == outQureg except for dimension, so use common backend
- auto useFunc = (inQureg.isGpuAccelerated)? gpuFunc : cpuFunc;
- useFunc(inQureg, outQureg, targs, pairTargs);
+ // inQureg == outQureg (except for dimension), so use common backend, informed by inQureg
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, densmatr_partialTrace_sub, inQureg, targs.size() );
+ func(inQureg, outQureg, targs, pairTargs);
}
@@ -877,26 +852,26 @@ qreal accel_densmatr_calcTotalProb_sub(Qureg qureg) {
}
-qreal accel_statevec_calcProbOfMultiQubitOutcome_sub(Qureg qureg, vector qubits, vector outcomes) {
+qreal accel_statevec_calcProbOfMultiQubitOutcome_sub(Qureg qureg, ConstList64 qubits, ConstList64 outcomes) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_TARGS( statevec_calcProbOfMultiQubitOutcome_sub, qureg, qubits.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevec_calcProbOfMultiQubitOutcome_sub, qureg, qubits.size() );
return func(qureg, qubits, outcomes);
}
-qreal accel_densmatr_calcProbOfMultiQubitOutcome_sub(Qureg qureg, vector qubits, vector outcomes) {
+qreal accel_densmatr_calcProbOfMultiQubitOutcome_sub(Qureg qureg, ConstList64 qubits, ConstList64 outcomes) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_TARGS( densmatr_calcProbOfMultiQubitOutcome_sub, qureg, qubits.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, densmatr_calcProbOfMultiQubitOutcome_sub, qureg, qubits.size() );
return func(qureg, qubits, outcomes);
}
-void accel_statevec_calcProbsOfAllMultiQubitOutcomes_sub(qreal* outProbs, Qureg qureg, vector qubits) {
+void accel_statevec_calcProbsOfAllMultiQubitOutcomes_sub(qreal* outProbs, Qureg qureg, ConstList64 qubits) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_TARGS( statevec_calcProbsOfAllMultiQubitOutcomes_sub, qureg, qubits.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevec_calcProbsOfAllMultiQubitOutcomes_sub, qureg, qubits.size() );
func(outProbs, qureg, qubits);
}
-void accel_densmatr_calcProbsOfAllMultiQubitOutcomes_sub(qreal* outProbs, Qureg qureg, vector qubits) {
+void accel_densmatr_calcProbsOfAllMultiQubitOutcomes_sub(qreal* outProbs, Qureg qureg, ConstList64 qubits) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_TARGS( densmatr_calcProbsOfAllMultiQubitOutcomes_sub, qureg, qubits.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, densmatr_calcProbsOfAllMultiQubitOutcomes_sub, qureg, qubits.size() );
func(outProbs, qureg, qubits);
}
@@ -982,13 +957,13 @@ qcomp accel_densmatr_calcFidelityWithPureState_sub(Qureg rho, Qureg psi, bool co
*/
-qreal accel_statevec_calcExpecAnyTargZ_sub(Qureg qureg, vector targs) {
+qreal accel_statevec_calcExpecAnyTargZ_sub(Qureg qureg, ConstList64 targs) {
return (qureg.isGpuAccelerated)?
gpu_statevec_calcExpecAnyTargZ_sub(qureg, targs):
cpu_statevec_calcExpecAnyTargZ_sub(qureg, targs);
}
-qcomp accel_densmatr_calcExpecAnyTargZ_sub(Qureg qureg, vector targs) {
+qcomp accel_densmatr_calcExpecAnyTargZ_sub(Qureg qureg, ConstList64 targs) {
return (qureg.isGpuAccelerated)?
gpu_densmatr_calcExpecAnyTargZ_sub(qureg, targs):
@@ -996,19 +971,19 @@ qcomp accel_densmatr_calcExpecAnyTargZ_sub(Qureg qureg, vector targs) {
}
-qcomp accel_statevec_calcExpecPauliStr_subA(Qureg qureg, vector x, vector y, vector z) {
+qcomp accel_statevec_calcExpecPauliStr_subA(Qureg qureg, ConstList64 x, ConstList64 y, ConstList64 z) {
return (qureg.isGpuAccelerated)?
gpu_statevec_calcExpecPauliStr_subA(qureg, x, y, z):
cpu_statevec_calcExpecPauliStr_subA(qureg, x, y, z);
}
-qcomp accel_statevec_calcExpecPauliStr_subB(Qureg qureg, vector x, vector y, vector z) {
+qcomp accel_statevec_calcExpecPauliStr_subB(Qureg qureg, ConstList64 x, ConstList64 y, ConstList64 z) {
return (qureg.isGpuAccelerated)?
gpu_statevec_calcExpecPauliStr_subB(qureg, x, y, z):
cpu_statevec_calcExpecPauliStr_subB(qureg, x, y, z);
}
-qcomp accel_densmatr_calcExpecPauliStr_sub(Qureg qureg, vector x, vector y, vector z) {
+qcomp accel_densmatr_calcExpecPauliStr_sub(Qureg qureg, ConstList64 x, ConstList64 y, ConstList64 z) {
return (qureg.isGpuAccelerated)?
gpu_densmatr_calcExpecPauliStr_sub(qureg, x, y, z):
@@ -1110,14 +1085,14 @@ qcomp accel_densmatr_calcExpecFullStateDiagMatr_sub(Qureg qureg, FullStateDiagMa
*/
-void accel_statevec_multiQubitProjector_sub(Qureg qureg, vector qubits, vector outcomes, qreal prob) {
+void accel_statevec_multiQubitProjector_sub(Qureg qureg, ConstList64 qubits, ConstList64 outcomes, qreal prob) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_TARGS( statevec_multiQubitProjector_sub, qureg, qubits.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, statevec_multiQubitProjector_sub, qureg, qubits.size() );
func(qureg, qubits, outcomes, prob);
}
-void accel_densmatr_multiQubitProjector_sub(Qureg qureg, vector qubits, vector outcomes, qreal prob) {
+void accel_densmatr_multiQubitProjector_sub(Qureg qureg, ConstList64 qubits, ConstList64 outcomes, qreal prob) {
- auto func = GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_NUM_TARGS( densmatr_multiQubitProjector_sub, qureg, qubits.size() );
+ GET_CPU_OR_GPU_FUNC_OPTIMISED_FOR_ONE_PARAM( func, densmatr_multiQubitProjector_sub, qureg, qubits.size() );
func(qureg, qubits, outcomes, prob);
}
diff --git a/quest/src/core/accelerator.hpp b/quest/src/core/accelerator.hpp
index be50e22da..5a8dc37fb 100644
--- a/quest/src/core/accelerator.hpp
+++ b/quest/src/core/accelerator.hpp
@@ -24,9 +24,9 @@
#include "quest/include/qureg.h"
#include "quest/include/matrices.h"
-#include
+#include "quest/src/core/lists.hpp"
-using std::vector;
+#include
/*
@@ -42,9 +42,7 @@ using std::vector;
*/
// must match the macros below, and those in accelerator.cpp
-#define MAX_OPTIMISED_NUM_CTRLS 5
-#define MAX_OPTIMISED_NUM_TARGS 5
-#define MAX_OPTIMISED_NUM_QUREGS 5
+#define MAX_OPTIMISED_PARAM 5
#define INSTANTIATE_FUNC_OPTIMISED_FOR_NUM_TARGS(returntype, funcname, args) \
@@ -82,10 +80,6 @@ using std::vector;
template returntype funcname <-1,numtargs> args;
-#define INSTANTIATE_CONJUGABLE_FUNC_OPTIMISED_FOR_NUM_CTRLS_AND_TARGS(returntype, funcname, args) \
- private_CONJUGABLE_INSTANTIATE_outer(returntype, funcname, true, args) \
- private_CONJUGABLE_INSTANTIATE_outer(returntype, funcname, false, args)
-
#define private_CONJUGABLE_INSTANTIATE_outer(returntype, funcname, conj, args) \
private_CONJUGABLE_INSTANTIATE_inner(returntype, funcname, 0, conj, args) \
private_CONJUGABLE_INSTANTIATE_inner(returntype, funcname, 1, conj, args) \
@@ -175,7 +169,7 @@ void accel_fullstatediagmatr_setElemsToPauliStrSum(FullStateDiagMatr out, PauliS
* COMMUNICATION BUFFER PACKING
*/
-qindex accel_statevec_packAmpsIntoBuffer(Qureg qureg, vector qubits, vector qubitStates);
+qindex accel_statevec_packAmpsIntoBuffer(Qureg qureg, ConstList64 qubits, ConstList64 qubitStates);
qindex accel_statevec_packPairSummedAmpsIntoBuffer(Qureg qureg, int qubit1, int qubit2, int qubit3, int bit2);
@@ -184,32 +178,32 @@ qindex accel_statevec_packPairSummedAmpsIntoBuffer(Qureg qureg, int qubit1, int
* SWAPS
*/
-void accel_statevec_anyCtrlSwap_subA(Qureg qureg, vector ctrls, vector ctrlStates, int targ1, int targ2);
-void accel_statevec_anyCtrlSwap_subB(Qureg qureg, vector ctrls, vector ctrlStates);
-void accel_statevec_anyCtrlSwap_subC(Qureg qureg, vector ctrls, vector ctrlStates, int targ, int targState);
+void accel_statevec_anyCtrlSwap_subA(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, int targ1, int targ2);
+void accel_statevec_anyCtrlSwap_subB(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates);
+void accel_statevec_anyCtrlSwap_subC(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, int targ, int targState);
/*
* DENSE MATRICES
*/
-void accel_statevec_anyCtrlOneTargDenseMatr_subA(Qureg qureg, vector ctrls, vector ctrlStates, int targ, CompMatr1 matr);
-void accel_statevec_anyCtrlOneTargDenseMatr_subB(Qureg qureg, vector ctrls, vector ctrlStates, qcomp fac0, qcomp fac1);
+void accel_statevec_anyCtrlOneTargDenseMatr_subA(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, int targ, CompMatr1 matr);
+void accel_statevec_anyCtrlOneTargDenseMatr_subB(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, qcomp fac0, qcomp fac1);
-void accel_statevec_anyCtrlTwoTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, int targ1, int targ2, CompMatr2 matr);
+void accel_statevec_anyCtrlTwoTargDenseMatr_sub(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, int targ1, int targ2, CompMatr2 matr);
-void accel_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, CompMatr matr, bool conj, bool transp);
+void accel_statevec_anyCtrlAnyTargDenseMatr_sub(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, ConstList64 targs, CompMatr matr, bool conj, bool transp);
/*
* DIAGONAL MATRICES
*/
-void accel_statevec_anyCtrlOneTargDiagMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, int targ, DiagMatr1 matr);
+void accel_statevec_anyCtrlOneTargDiagMatr_sub(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, int targ, DiagMatr1 matr);
-void accel_statevec_anyCtrlTwoTargDiagMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, int targ1, int targ2, DiagMatr2 matr);
+void accel_statevec_anyCtrlTwoTargDiagMatr_sub(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, int targ1, int targ2, DiagMatr2 matr);
-void accel_statevec_anyCtrlAnyTargDiagMatr_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector targs, DiagMatr matr, qcomp exponent, bool conj);
+void accel_statevec_anyCtrlAnyTargDiagMatr_sub(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, ConstList64 targs, DiagMatr matr, qcomp exponent, bool conj);
void accel_statevec_allTargDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp exponent);
@@ -222,17 +216,17 @@ void accel_densmatr_allTargDiagMatr_subB(Qureg qureg, FullStateDiagMatr matr, qc
* PAULI TENSOR AND GADGET
*/
-void accel_statevector_anyCtrlAnyTargZOrPhaseGadget_sub(Qureg qureg, vector ctrls, vector ctrlStates, vector z, qcomp ampFac, qcomp pairAmpFac);
+void accel_statevector_anyCtrlAnyTargZOrPhaseGadget_sub(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, ConstList64 z, qcomp ampFac, qcomp pairAmpFac);
-void accel_statevector_anyCtrlPauliTensorOrGadget_subA(Qureg qureg, vector ctrls, vector ctrlStates, vector x, vector y, vector z, qcomp ampFac, qcomp pairAmpFac);
-void accel_statevector_anyCtrlPauliTensorOrGadget_subB(Qureg qureg, vector ctrls, vector ctrlStates, vector x, vector y, vector z, qcomp ampFac, qcomp pairAmpFac, qindex bufferMaskXY);
+void accel_statevector_anyCtrlPauliTensorOrGadget_subA(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, ConstList64 x, ConstList64 y, ConstList64 z, qcomp ampFac, qcomp pairAmpFac);
+void accel_statevector_anyCtrlPauliTensorOrGadget_subB(Qureg qureg, ConstList64 ctrls, ConstList64 ctrlStates, ConstList64 x, ConstList64 y, ConstList64 z, qcomp ampFac, qcomp pairAmpFac, qindex bufferMaskXY);
/*
* QUREG COMBINATION
*/
-void accel_statevec_setQuregToWeightedSum_sub(Qureg outQureg, vector coeffs, vector inQuregs);
+void accel_statevec_setQuregToWeightedSum_sub(Qureg outQureg, std::vector coeffs, std::vector inQuregs);
void accel_densmatr_mixQureg_subA(qreal outProb, Qureg out, qreal inProb, Qureg in);
void accel_densmatr_mixQureg_subB(qreal outProb, Qureg out, qreal inProb, Qureg in);
@@ -273,7 +267,7 @@ void accel_densmatr_oneQubitDamping_subD(Qureg qureg, int qubit, qreal prob);
* PARTIAL TRACE
*/
-void accel_densmatr_partialTrace_sub(Qureg inQureg, Qureg outQureg, vector targs, vector pairTargs);
+void accel_densmatr_partialTrace_sub(Qureg inQureg, Qureg outQureg, ConstList64 targs, ConstList64 pairTargs);
/*
@@ -283,11 +277,11 @@ void accel_densmatr_partialTrace_sub(Qureg inQureg, Qureg outQureg, vector
qreal accel_statevec_calcTotalProb_sub(Qureg qureg);
qreal accel_densmatr_calcTotalProb_sub(Qureg qureg);
-qreal accel_statevec_calcProbOfMultiQubitOutcome_sub(Qureg qureg, vector qubits, vector outcomes);
-qreal accel_densmatr_calcProbOfMultiQubitOutcome_sub(Qureg qureg, vector qubits, vector outcomes);
+qreal accel_statevec_calcProbOfMultiQubitOutcome_sub(Qureg qureg, ConstList64 qubits, ConstList64 outcomes);
+qreal accel_densmatr_calcProbOfMultiQubitOutcome_sub(Qureg qureg, ConstList64 qubits, ConstList64 outcomes);
-void accel_statevec_calcProbsOfAllMultiQubitOutcomes_sub(qreal* outProbs, Qureg qureg, vector qubits);
-void accel_densmatr_calcProbsOfAllMultiQubitOutcomes_sub(qreal* outProbs, Qureg qureg, vector qubits);
+void accel_statevec_calcProbsOfAllMultiQubitOutcomes_sub(qreal* outProbs, Qureg qureg, ConstList64 qubits);
+void accel_densmatr_calcProbsOfAllMultiQubitOutcomes_sub(qreal* outProbs, Qureg qureg, ConstList64 qubits);
/*
@@ -305,12 +299,12 @@ qreal accel_densmatr_calcHilbertSchmidtDistance_sub(Qureg quregA, Qureg quregB);
* EXPECTATION VALUES
*/
-qreal accel_statevec_calcExpecAnyTargZ_sub(Qureg qureg, vector sufTargs);
-qcomp accel_densmatr_calcExpecAnyTargZ_sub(Qureg qureg, vector allTargs);;
+qreal accel_statevec_calcExpecAnyTargZ_sub(Qureg qureg, ConstList64 sufTargs);
+qcomp accel_densmatr_calcExpecAnyTargZ_sub(Qureg qureg, ConstList64 allTargs);;
-qcomp accel_statevec_calcExpecPauliStr_subA(Qureg qureg, vector x, vector y, vector z);
-qcomp accel_statevec_calcExpecPauliStr_subB(Qureg qureg, vector x, vector y, vector z);
-qcomp accel_densmatr_calcExpecPauliStr_sub (Qureg qureg, vector x, vector y, vector z);
+qcomp accel_statevec_calcExpecPauliStr_subA(Qureg qureg, ConstList64 x, ConstList64 y, ConstList64 z);
+qcomp accel_statevec_calcExpecPauliStr_subB(Qureg qureg, ConstList64 x, ConstList64 y, ConstList64 z);
+qcomp accel_densmatr_calcExpecPauliStr_sub (Qureg qureg, ConstList64 x, ConstList64 y, ConstList64 z);
qcomp accel_statevec_calcExpecFullStateDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool useRealPow);
qcomp accel_densmatr_calcExpecFullStateDiagMatr_sub(Qureg qureg, FullStateDiagMatr matr, qcomp exponent, bool useRealPow);
@@ -320,8 +314,8 @@ qcomp accel_densmatr_calcExpecFullStateDiagMatr_sub(Qureg qureg, FullStateDiagMa
* PROJECTORS
*/
-void accel_statevec_multiQubitProjector_sub(Qureg qureg, vector qubits, vector outcomes, qreal prob);
-void accel_densmatr_multiQubitProjector_sub(Qureg qureg, vector qubits, vector outcomes, qreal prob);
+void accel_statevec_multiQubitProjector_sub(Qureg qureg, ConstList64 qubits, ConstList64 outcomes, qreal prob);
+void accel_densmatr_multiQubitProjector_sub(Qureg qureg, ConstList64 qubits, ConstList64 outcomes, qreal prob);
/*
diff --git a/quest/src/core/bitwise.hpp b/quest/src/core/bitwise.hpp
index 4d455c2d8..f5266afa4 100644
--- a/quest/src/core/bitwise.hpp
+++ b/quest/src/core/bitwise.hpp
@@ -163,7 +163,7 @@ INLINE int getBitMaskParity(qindex mask) {
*/
-INLINE qindex insertBits(qindex number, int* bitIndices, int numIndices, int bitValue) {
+INLINE qindex insertBits(qindex number, const int* bitIndices, int numIndices, int bitValue) {
// bitIndices must be strictly increasing
for (int i=0; i
#include
@@ -26,8 +28,9 @@ using std::string;
namespace envvar_names {
- string PERMIT_NODES_TO_SHARE_GPU = "PERMIT_NODES_TO_SHARE_GPU";
- string DEFAULT_VALIDATION_EPSILON = "DEFAULT_VALIDATION_EPSILON";
+ string QUEST_PERMIT_NODES_TO_SHARE_GPU = "QUEST_PERMIT_NODES_TO_SHARE_GPU";
+ string QUEST_DEFAULT_VALIDATION_EPSILON = "QUEST_DEFAULT_VALIDATION_EPSILON";
+ string QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK = "QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK";
}
@@ -41,11 +44,15 @@ namespace envvar_values {
// by default, do not permit GPU sharing since it sabotages performance
// and should only ever be carefully, deliberately enabled
- bool PERMIT_NODES_TO_SHARE_GPU = false;
+ bool QUEST_PERMIT_NODES_TO_SHARE_GPU = false;
// by default, the initial validation epsilon (before being overriden
// by users at runtime) should depend on qreal (i.e. FLOAT_PRECISION)
- qreal DEFAULT_VALIDATION_EPSILON = UNSPECIFIED_DEFAULT_VALIDATION_EPSILON;
+ qreal QUEST_DEFAULT_VALIDATION_EPSILON = QUEST_UNSPECIFIED_DEFAULT_VALIDATION_EPSILON;
+
+ // by default, the initial number of GPU threads per block is informed by
+ // the below cmake variable (before being overridden by env-var or at runtime)
+ int QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK = QUEST_UNSPECIFIED_DEFAULT_NUM_GPU_THREADS_PER_BLOCK;
}
@@ -94,7 +101,7 @@ void assertEnvVarsAreLoaded() {
void validateAndSetWhetherGpuSharingIsPermitted(const char* caller) {
// permit unspecified, falling back to default value
- string name = envvar_names::PERMIT_NODES_TO_SHARE_GPU;
+ string name = envvar_names::QUEST_PERMIT_NODES_TO_SHARE_GPU;
if (!isEnvVarSpecified(name))
return;
@@ -103,14 +110,14 @@ void validateAndSetWhetherGpuSharingIsPermitted(const char* caller) {
validate_envVarPermitNodesToShareGpu(value, caller);
// overwrite default env-var value
- envvar_values::PERMIT_NODES_TO_SHARE_GPU = (value[0] == '1');
+ envvar_values::QUEST_PERMIT_NODES_TO_SHARE_GPU = (value[0] == '1');
}
void validateAndSetDefaultValidationEpsilon(const char* caller) {
// permit unspecified, falling back to the hardcoded precision-specific default
- string name = envvar_names::DEFAULT_VALIDATION_EPSILON;
+ string name = envvar_names::QUEST_DEFAULT_VALIDATION_EPSILON;
if (!isEnvVarSpecified(name))
return;
@@ -119,7 +126,22 @@ void validateAndSetDefaultValidationEpsilon(const char* caller) {
validate_envVarDefaultValidationEpsilon(value, caller);
// overwrite default env-var value
- envvar_values::DEFAULT_VALIDATION_EPSILON = parser_parseReal(value);
+ envvar_values::QUEST_DEFAULT_VALIDATION_EPSILON = parser_parseReal(value);
+}
+
+
+void validateAndSetDefaultNumGpuThreadsPerBlock(const char* caller) {
+
+ // permit unspecified, falling back to the hardcoded default
+ string name = envvar_names::QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK;
+ if (!isEnvVarSpecified(name))
+ return;
+
+ string value = getSpecifiedEnvVarValue(name);
+ validate_envVarDefaultNumGpuThreadsPerBlockIsAnInt(value, caller);
+
+ // overwrite default env-var value
+ envvar_values::QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK = parser_parseInteger(value);
}
@@ -138,6 +160,7 @@ void envvars_validateAndLoadEnvVars(const char* caller) {
// load all env-vars
validateAndSetWhetherGpuSharingIsPermitted(caller);
validateAndSetDefaultValidationEpsilon(caller);
+ validateAndSetDefaultNumGpuThreadsPerBlock(caller);
// ensure no re-loading
global_areEnvVarsLoaded = true;
@@ -147,12 +170,19 @@ void envvars_validateAndLoadEnvVars(const char* caller) {
bool envvars_getWhetherGpuSharingIsPermitted() {
assertEnvVarsAreLoaded();
- return envvar_values::PERMIT_NODES_TO_SHARE_GPU;
+ return envvar_values::QUEST_PERMIT_NODES_TO_SHARE_GPU;
}
qreal envvars_getDefaultValidationEpsilon() {
assertEnvVarsAreLoaded();
- return envvar_values::DEFAULT_VALIDATION_EPSILON;
+ return envvar_values::QUEST_DEFAULT_VALIDATION_EPSILON;
+}
+
+
+int envvars_getDefaultNumGpuThreadsPerBlock() {
+ assertEnvVarsAreLoaded();
+
+ return envvar_values::QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK;
}
diff --git a/quest/src/core/envvars.hpp b/quest/src/core/envvars.hpp
index 828d5605e..4862e8d08 100644
--- a/quest/src/core/envvars.hpp
+++ b/quest/src/core/envvars.hpp
@@ -13,8 +13,9 @@
namespace envvar_names {
- extern std::string PERMIT_NODES_TO_SHARE_GPU;
- extern std::string DEFAULT_VALIDATION_EPSILON;
+ extern std::string QUEST_PERMIT_NODES_TO_SHARE_GPU;
+ extern std::string QUEST_DEFAULT_VALIDATION_EPSILON;
+ extern std::string QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK;
}
@@ -33,5 +34,7 @@ bool envvars_getWhetherGpuSharingIsPermitted();
qreal envvars_getDefaultValidationEpsilon();
+int envvars_getDefaultNumGpuThreadsPerBlock();
+
#endif // ENVVARS_HPP
diff --git a/quest/src/core/errors.cpp b/quest/src/core/errors.cpp
index 9e72b1e0b..807cad105 100644
--- a/quest/src/core/errors.cpp
+++ b/quest/src/core/errors.cpp
@@ -41,6 +41,8 @@ using std::string;
void raiseInternalError(string errorMsg) {
+ printer_sync();
+
print(string("")
+ "\n\n"
+ "A fatal internal QuEST error occurred. "
@@ -49,6 +51,8 @@ void raiseInternalError(string errorMsg) {
+ "\n"
);
+ printer_sync();
+
exit(EXIT_FAILURE);
}
@@ -181,6 +185,26 @@ void error_commNumMessagesExceedTagMax() {
raiseInternalError("A function attempted to communicate via more messages than permitted (since there would be more uniquely-tagged messages than the tag upperbound).");
}
+void error_commAlreadyHasSetMpiComm() {
+
+ raiseInternalError("An attempt was made to set the QuEST MPI communicator after it had already been set (and changed from MPI_COMM_NULL).");
+}
+
+void error_commMpiCommIsNull() {
+
+ raiseInternalError("The MPI communicator was queried but was unexpectedly MPI_COMM_NULL.");
+}
+
+void error_commNewMpiCommIsNull() {
+
+ raiseInternalError("The MPI communicator was attemptedly set to MPI_COMM_NULL, which validation should have prior caught.");
+}
+
+void error_commActiveButMpiNotInit() {
+
+ raiseInternalError("QuEST believed communication was active, but MPI_Init reported MPI was not initialised.");
+}
+
void assert_commBoundsAreValid(Qureg qureg, qindex sendInd, qindex recvInd, qindex numAmps) {
bool valid = (
@@ -243,11 +267,6 @@ void assert_receiverCanFitSendersEntireElems(Qureg receiver, FullStateDiagMatr s
* LOCALISER ERRORS
*/
-void error_localiserNumCtrlStatesInconsistentWithNumCtrls() {
-
- raiseInternalError("An inconsistent number of ctrls and ctrlStates were passed to a function in localiser.cpp.");
-}
-
void error_localiserGivenPauliTensorOrGadgetWithoutXOrY() {
raiseInternalError("The localiser was asked to simulate a Pauli tensor or gadget which contained no X or Y Paulis, which is a special case reserved for phase gadgets.");
@@ -278,6 +297,11 @@ void error_localiserGivenNonUnityGlobalFactorToZTensor() {
raiseInternalError("A localiser function to apply a PauliStr (as a tensor, not a gadget) was given a PauliStr containing only Z and I, along with a non-unity global factor. This is an illegal combination.");
}
+void error_calcFidStateVecDistribWhileDensMatrLocal() {
+
+ raiseInternalError("A localiser function attempted to compute the fidelity between a local density matrix and a distributed statevector, which is an illegal combination.");
+}
+
void assert_localiserSuccessfullyAllocatedTempMemory(qcomp* ptr, bool isGpu) {
if (mem_isAllocated(ptr))
@@ -314,9 +338,10 @@ void assert_localiserPartialTraceGivenCompatibleQuregs(Qureg inQureg, Qureg outQ
raiseInternalError("Inconsistent Qureg sizes and number of traced qubits given to localiser's partial trace function.");
}
-void error_calcFidStateVecDistribWhileDensMatrLocal() {
+void assert_localiserListLengthsAgree(size_t length1, size_t length2) {
- raiseInternalError("A localiser function attempted to compute the fidelity between a local density matrix and a distributed statevector, which is an illegal combination.");
+ if (length1 != length2)
+ raiseInternalError("Two corresponding lists (such as ctrls & ctrlStates, or qubits & outcomes) passed to localiser.cpp differed in length.");
}
void assert_localiserDistribQuregSpooferGivenValidQuregs(Qureg local, Qureg distrib) {
@@ -625,6 +650,11 @@ void error_gpuUnexpectedlyInaccessible() {
raiseInternalError("A function internally assumed (as a precondition) that QuEST was compiled with GPU-acceleration enabled, and that one was physically accessible, though this was untrue.");
}
+void error_gpuNumThreadsPerBlockNotSet() {
+
+ raiseInternalError("A function queried the GPU numThreadsPerBlock before it had been set (intendedly by QuESTEnv initialisation).");
+}
+
void error_gpuMemSyncQueriedButEnvNotGpuAccelerated() {
raiseInternalError("A function checked whether persistent GPU memory (such as in a CompMatr) had been synchronised, but the QuEST environment is not GPU accelerated.");
@@ -753,6 +783,37 @@ void error_pauliStrSumConjHasIncorrectNumTerms() {
+/*
+ * LIST ERRORS
+ */
+
+void error_smallListLengthExceededMax() {
+
+ raiseInternalError("A List64 was attemptedly allocated or grown to an illegally large size.");
+}
+
+void error_smallListIndexWasNegative() {
+
+ raiseInternalError("A List64 index was negative.");
+}
+
+void error_smallListIndexExceededLength() {
+
+ raiseInternalError("A List64 index equalled or exceeded the list length.");
+}
+
+void error_smallListWasEmpty() {
+
+ raiseInternalError("A List64 was unexpectedly empty.");
+}
+
+void error_smallListNullPtrWithPositiveLength() {
+
+ raiseInternalError("The List64 constructor was given a nullptr yet a non-zero length.");
+}
+
+
+
/*
* UTILITY ERRORS
*/
@@ -828,6 +889,16 @@ void error_attemptedToParseRealFromInvalidString() {
raiseInternalError("A function attempted to parse a string to a qreal but the string was not validly formatted. This should have been caught by prior user validation.");
}
+void error_attemptedToParseIntegerFromInvalidString() {
+
+ raiseInternalError("A function attempted to parse a string to an int but the string was not validly formatted. This should have been caught by prior user validation.");
+}
+
+void error_attemptedToParseOutOfRangeInteger() {
+
+ raiseInternalError("A function attempted to parse a string to an integer but the numerical value of the string literal exceeded the range of the integer. This should have been caught by prior validation.");
+}
+
void error_attemptedToParseOutOfRangeReal() {
raiseInternalError("A function attempted to parse a string to a qreal but the numerical value of the string literal exceeded the range of the qreal. This should have been caught by prior user validation.");
diff --git a/quest/src/core/errors.hpp b/quest/src/core/errors.hpp
index 950ac17ed..f91f890b0 100644
--- a/quest/src/core/errors.hpp
+++ b/quest/src/core/errors.hpp
@@ -4,6 +4,12 @@
* hardware accelerators are behaving as expected, and that runtime
* deployment is consistent with the compiled deployment modes.
*
+ * Some error() functions are explicitly marked as [[noreturn]] so that
+ * the compiler knows code after their invocation is never executed,
+ * avoiding warnings about (e.g.) invalid static array indexing. In
+ * theory, all error() functions can be [[noreturn]], but we only
+ * bother with the ones that make a compile-time difference.
+ *
* @author Tyson Jones
* @author Luc Jaulmes (NUMA & pagesize errors)
*/
@@ -85,6 +91,14 @@ void error_commGivenInconsistentNumSubArraysANodes();
void error_commNumMessagesExceedTagMax();
+void error_commAlreadyHasSetMpiComm();
+
+void error_commMpiCommIsNull();
+
+void error_commNewMpiCommIsNull();
+
+void error_commActiveButMpiNotInit();
+
void assert_commBoundsAreValid(Qureg qureg, qindex sendInd, qindex recvInd, qindex numAmps);
void assert_commPayloadIsPowerOf2(qindex numAmps);
@@ -107,8 +121,6 @@ void assert_receiverCanFitSendersEntireElems(Qureg receiver, FullStateDiagMatr s
* LOCALISER ERRORS
*/
-void error_localiserNumCtrlStatesInconsistentWithNumCtrls();
-
void error_localiserGivenPauliTensorOrGadgetWithoutXOrY();
void error_localiserPassedStateVecToChannelComCheck();
@@ -121,6 +133,8 @@ void error_localiserGivenPauliStrWithoutXorY();
void error_localiserGivenNonUnityGlobalFactorToZTensor();
+void error_calcFidStateVecDistribWhileDensMatrLocal();
+
void assert_localiserSuccessfullyAllocatedTempMemory(qcomp* ptr, bool isGpu);
void assert_localiserGivenStateVec(Qureg qureg);
@@ -129,7 +143,7 @@ void assert_localiserGivenDensMatr(Qureg qureg);
void assert_localiserPartialTraceGivenCompatibleQuregs(Qureg inQureg, Qureg outQureg, int numTargs);
-void error_calcFidStateVecDistribWhileDensMatrLocal();
+void assert_localiserListLengthsAgree(size_t length1, size_t length2);
void assert_localiserDistribQuregSpooferGivenValidQuregs(Qureg local, Qureg distrib);
@@ -235,12 +249,16 @@ void error_gpuCopyButMatrixNotGpuAccelerated();
void error_gpuMemSyncQueriedButEnvNotGpuAccelerated();
+void error_gpuNumThreadsPerBlockNotSet();
+
void error_gpuUnexpectedlyInaccessible();
void error_gpuDeadCopyMatrixFunctionCalled();
void error_gpuDenseMatrixConjugatedAndTransposed();
+void error_gpuBadNumThreadsPerBlock();
+
void assert_gpuIsAccessible();
void assert_gpuHasBeenBound(bool isBound);
@@ -301,6 +319,22 @@ void error_pauliStrSumConjHasIncorrectNumTerms();
+/*
+ * LIST ERRORS
+ */
+
+[[noreturn]] void error_smallListLengthExceededMax();
+
+[[noreturn]] void error_smallListIndexWasNegative();
+
+[[noreturn]] void error_smallListIndexExceededLength();
+
+[[noreturn]] void error_smallListWasEmpty();
+
+[[noreturn]] void error_smallListNullPtrWithPositiveLength();
+
+
+
/*
* UTILITY ERRORS
*/
@@ -335,6 +369,10 @@ void error_attemptedToParseComplexFromInvalidString();
void error_attemptedToParseRealFromInvalidString();
+void error_attemptedToParseIntegerFromInvalidString();
+
+void error_attemptedToParseOutOfRangeInteger();
+
void error_attemptedToParseOutOfRangeReal();
void error_attemptedToParsePauliStringFromInvalidString();
@@ -383,4 +421,4 @@ void error_unexpectedNumLindbladSuperpropTerms();
-#endif // ERRORS_HPP
\ No newline at end of file
+#endif // ERRORS_HPP
diff --git a/quest/src/core/lists.hpp b/quest/src/core/lists.hpp
new file mode 100644
index 000000000..68a15d3ec
--- /dev/null
+++ b/quest/src/core/lists.hpp
@@ -0,0 +1,247 @@
+/** @file
+ * A stack-based list of length <= 64, primarily
+ * for storing qubit indices, as an alternative to
+ * std::vector and associated heap-alloc/copy
+ * overheads. Use of List64 optimises few-qubit
+ * simulation where STL container costs dominate;
+ * and in the GPU backend, use of List64 avoids
+ * CUDA memory writes before kernel launches!
+ *
+ * This header also defines ConstList64, which is
+ * merely 'const List64&', to avoid superfluous
+ * stack copies when passing non-mutated List64.
+ *
+ * The functions herein are inlined (in this header-
+ * only file) in the hopes of unbridled compiler
+ * optimisations, but this may prove incompatible
+ * with GPU mode (since INLINE specifies __device__,
+ * which may be incompatible with initialiser lists)
+ *
+ * @author Tyson Jones
+ */
+
+#ifndef LISTS_HPP
+#define LISTS_HPP
+
+#include "quest/src/core/errors.hpp"
+#include "quest/src/core/inliner.hpp"
+
+
+
+/*
+ * CAPACITY
+ *
+ * Since stored in stack, we must upperbound the length of
+ * a List64; we choose 64, which is around the maximum
+ * addressable number of qubits by qindex. In theory, we
+ * could permit users to compile-time reduce this length,
+ * restricting their max simulable system but speeding up
+ * List64 copies in function calls - this may have a
+ * measurable benefit for Quregs of 1-8 qubits. But Donald
+ * Knuth knows and sees all, and he won't be happy!
+ */
+
+
+constexpr size_t MAX_LIST_LENGTH = 64;
+
+
+
+/*
+ * LIST64 DECLARATION
+ *
+ * which mimics an STL container so that it is easily
+ * substituted for std::vector in our codebase, but
+ * crucially, remains (almost) POD and with no heap
+ * allocs, and compatible with CUDA kernels
+ */
+
+
+struct List64 {
+
+private:
+
+ // Keep data private to dissuade inconsistent
+ // access patterns (e.g. .elems vs .data()),
+ // and so users cannot invalidly mutate length.
+ // Readers may wonder why we avoid std::array;
+ // it has a surprise overhead in pass-by-ref!
+ int elems[MAX_LIST_LENGTH];
+
+ // We use size_t, over the arguably internally
+ // natural int, for consistency with STL containers
+ size_t length;
+
+public:
+
+ // Note there is deliberately no constructor!
+ // This keeps the struct trivial and compatible
+ // with CUDA; we must forego initializer ctors
+ // and other syntactic goodies :(
+
+ // let List64 be iterable, e.g. for(auto x : list)
+ INLINE auto begin() { return elems; }
+ INLINE auto begin() const { return elems; }
+ INLINE auto end() { return elems + length; }
+ INLINE auto end() const { return elems + length; }
+
+ // let List64 be indexable, e.g. list[3]
+ INLINE const int& operator[](int index) const {
+
+ if (index < 0)
+ error_smallListIndexWasNegative();
+ if (index >= static_cast(length))
+ error_smallListIndexExceededLength();
+
+ return elems[index];
+ }
+ INLINE int& operator[](int index) {
+
+ return const_cast(
+ static_cast(*this)[index]);
+ }
+
+ // give List64 all the familiar methods of std::vector
+ INLINE void clear() {
+ length = 0;
+ }
+ INLINE bool empty() const {
+ return length == 0;
+ }
+ INLINE size_t size() const {
+ return length;
+ }
+ INLINE int* data() {
+ return elems;
+ }
+ INLINE const int* data() const {
+ return elems;
+ }
+
+ INLINE void push_back(int elem) {
+
+ if (length >= MAX_LIST_LENGTH)
+ error_smallListLengthExceededMax();
+
+ elems[length++] = elem;
+ }
+
+ INLINE void resize(size_t newLength, int value=0) {
+
+ if (newLength > MAX_LIST_LENGTH)
+ error_smallListLengthExceededMax();
+
+ for (auto i=length; i(
+ static_cast(*this).back());
+ }
+
+ INLINE void assign(size_t count, int value) {
+
+ if (count > MAX_LIST_LENGTH)
+ error_smallListLengthExceededMax();
+
+ for (size_t i = 0; i < count; i++)
+ elems[i] = value;
+
+ length = count;
+ }
+};
+
+
+
+/*
+ * LIST64 CONSTRUCTORS
+ *
+ * which are separated here because making them actual
+ * constructors stops List64 being POD/trivial, and
+ * makes it incompatible with CUDA kernels
+ */
+
+
+INLINE List64 lists_getEmptyList64() {
+
+ List64 out{};
+ out.clear();
+ return out;
+}
+
+
+INLINE List64 lists_getList64(const int* begin, const int* end) {
+
+ if (end < begin)
+ error_smallListIndexExceededLength();
+
+ auto length = static_cast(end - begin);
+ if (length > MAX_LIST_LENGTH)
+ error_smallListLengthExceededMax();
+
+ List64 out = lists_getEmptyList64();
+
+ for (const int* ptr = begin; ptr != end; ++ptr)
+ out.push_back(*ptr);
+
+ return out;
+}
+
+
+INLINE List64 lists_getList64(const int* elems, size_t length) {
+
+ if (elems == nullptr && length > 0)
+ error_smallListNullPtrWithPositiveLength();
+
+ // no ptr necessary whgen list is empty
+ if (elems == nullptr)
+ return lists_getEmptyList64();
+
+ return lists_getList64(elems, elems + length); // validates length <= MAX
+}
+
+
+INLINE List64 lists_getList64(std::initializer_list init) {
+
+ return lists_getList64(init.begin(), init.end());
+}
+
+
+
+/*
+ * ASSERT TRIVIAL
+ *
+ * which doesn't really gaurantee CUDA compatibility, but may
+ * catch a developer accidentally breaking compatibility
+ */
+
+
+static_assert(std::is_trivially_copyable_v);
+static_assert(std::is_standard_layout_v);
+
+
+
+/*
+ * CONST LIST64 DECLARATION
+ *
+ * Functions can accept ConstList64 (over List64) to avoid
+ * a stack copy. A List64 can always be passed to a
+ * function accepting a ConstList64, but a ConstList64 can never
+ * be returned from a function (duh).
+ */
+
+using ConstList64 = const List64&;
+
+
+
+#endif // LISTS_HPP
diff --git a/quest/src/core/localiser.cpp b/quest/src/core/localiser.cpp
index 9d4dbce09..83a23b921 100644
--- a/quest/src/core/localiser.cpp
+++ b/quest/src/core/localiser.cpp
@@ -18,6 +18,7 @@
#include "quest/src/core/errors.hpp"
#include "quest/src/core/bitwise.hpp"
+#include "quest/src/core/lists.hpp"
#include "quest/src/core/utilities.hpp"
#include "quest/src/core/paulilogic.hpp"
#include "quest/src/core/localiser.hpp"
@@ -44,31 +45,7 @@ using std::tuple;
*/
-void assertValidCtrlStates(vector ctrls, vector ctrlStates) {
-
- // providing no control states is always valid (to invoke default all-on-1)
- if (ctrlStates.empty())
- return;
-
- // otherwise a state must be explicitly given for each ctrl
- if (ctrlStates.size() != ctrls.size())
- error_localiserNumCtrlStatesInconsistentWithNumCtrls();
-}
-
-
-void setDefaultCtrlStates(vector ctrls, vector &states) {
-
- // no states necessary if there are no control qubits
- if (ctrls.empty())
- return;
-
- // default ctrl state is all-1
- if (states.empty())
- states.insert(states.end(), ctrls.size(), 1);
-}
-
-
-bool doesGateRequireComm(Qureg qureg, vector targs) {
+bool doesGateRequireComm(Qureg qureg, ConstList64 targs) {
// non-distributed quregs never communicate (duh)
if (!qureg.isDistributed)
@@ -80,11 +57,11 @@ bool doesGateRequireComm(Qureg qureg, vector targs) {
bool doesGateRequireComm(Qureg qureg, int targ) {
- return doesGateRequireComm(qureg, vector{targ});
+ return doesGateRequireComm(qureg, lists_getList64({targ}));
}
-bool doesChannelRequireComm(Qureg qureg, vector ketQubits) {
+bool doesChannelRequireComm(Qureg qureg, ConstList64 ketQubits) {
if (!qureg.isDensityMatrix)
error_localiserPassedStateVecToChannelComCheck();
@@ -96,11 +73,11 @@ bool doesChannelRequireComm(Qureg qureg, vector ketQubits) {
bool doesChannelRequireComm(Qureg qureg, int ketQubit) {
- return doesChannelRequireComm(qureg, vector{ketQubit});
+ return doesChannelRequireComm(qureg, lists_getList64({ketQubit}));
}
-bool doAnyLocalStatesHaveQubitValues(Qureg qureg, vector qubits, vector states) {
+bool doAnyLocalStatesHaveQubitValues(Qureg qureg, ConstList64 qubits, ConstList64 states) {
// this answers the generic question of "do any of the given qubits lie in the
// prefix substate with node-fixed values inconsistent with the given states?"
@@ -126,25 +103,23 @@ bool doAnyLocalStatesHaveQubitValues(Qureg qureg, vector qubits, vector &qubits, vector &states) {
+tuple getSuffixQubitsAndStates(Qureg qureg, ConstList64 qubits, ConstList64 states) {
- vector