Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
16179f1
Make env.isX bool to match env.userOwnsMpi
TysonRayJones May 29, 2026
1ddfb6b
Add MPI status validation
TysonRayJones May 29, 2026
507c2e4
Simplify comm_init()
TysonRayJones May 29, 2026
e80f768
Enable error msgs even when MPI config is invalid
TysonRayJones May 29, 2026
8b73cd3
Add Oliver's custom MPI examples
TysonRayJones May 29, 2026
e91f54f
renamed env.userOwnsMpi to env.isMpiUserOwned
TysonRayJones May 29, 2026
fe1020c
Remove redundant stdbool include
TysonRayJones May 29, 2026
d363d09
Add validation to initCustomMpiCommQuESTEnv
TysonRayJones May 29, 2026
70ac569
Rename mpiCommQuest to global_mpiComm
TysonRayJones May 29, 2026
ef6860b
Rename mpiCommQuest (local var) to mpiComm
TysonRayJones May 29, 2026
d85e064
Made environment.cpp adhere to global_ convention
TysonRayJones May 29, 2026
8fe9bbe
Remove suspicious updateQuESTEnvDistInfo()
TysonRayJones May 29, 2026
314e72e
Error in comm_getMpiComm() when comm=NULL
TysonRayJones May 29, 2026
51c0731
Remove MPI leak from comm_config.hpp
TysonRayJones May 29, 2026
047ede7
Rename comm_isMpiSubCommunicatorCompiled to comm_isMpiSubCommCompiled
TysonRayJones May 29, 2026
1680a12
Replace magic number
TysonRayJones May 29, 2026
00332a8
Make initCustomMpiCommQuESTEnv validate against re-init
TysonRayJones May 29, 2026
6763af0
Make initCustomMpiCommQuESTEnv validate subcomm is non-null
TysonRayJones May 29, 2026
1c9072c
Make initCustomMpiCommQuESTEnv validate set-subcomm succeeds
TysonRayJones May 29, 2026
7c75e72
Remove redundant env.bool tests
TysonRayJones May 29, 2026
93f30f2
Rename error_commDoubleSetMpiComm
TysonRayJones May 29, 2026
790d11c
Skip custom MPI examples when no MPI
TysonRayJones May 29, 2026
ac86d12
Patches
TysonRayJones May 29, 2026
a483af5
Permit usage of MPI when QuEST is non-distributed
TysonRayJones May 30, 2026
752e89f
patch bug where user-MPI was finalised
TysonRayJones May 30, 2026
53d3f28
moved new custom-env funcs to experimental.h
TysonRayJones May 31, 2026
6c07b28
Merge remote-tracking branch 'origin/permit-mpi-usage-without-distrib…
TysonRayJones May 31, 2026
dc2cf6c
moved numTBP API to experimental.h
TysonRayJones May 31, 2026
fe5aaf8
Flag register-spill risk when increasing TBP
TysonRayJones May 31, 2026
3f550a7
add numTBP validation
TysonRayJones Jun 1, 2026
db43c17
Allow numTPB query/set when GPU not compiled
TysonRayJones Jun 1, 2026
853a151
improve unit tests
TysonRayJones Jun 1, 2026
4a0a079
revise HIP significance
TysonRayJones Jun 1, 2026
7e68a4d
Replace TBP cmake var with environment var
TysonRayJones Jun 1, 2026
9494a29
Remove env.isHipCompiled
TysonRayJones Jun 1, 2026
d1afb18
restored numTPB cmake var
TysonRayJones Jun 1, 2026
9532b26
warn during config when CMake-var contradicts env-var
TysonRayJones Jun 1, 2026
9f6c82f
lil clean
TysonRayJones Jun 1, 2026
e75459d
Improve numTPB validation msg
TysonRayJones Jun 1, 2026
139457c
Merge branch 'set_gpu_tpb' into gpu-tpb-cleanup
TysonRayJones Jun 1, 2026
e909419
Crossref'd cmake var, env-var and setter in docs
TysonRayJones Jun 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 57 additions & 17 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ option(
)
message(STATUS "Custom communicator support is turned ${QUEST_ENABLE_SUBCOMM}. Set QUEST_ENABLE_SUBCOMM to modify.")


# GPU Acceleration
option(
QUEST_ENABLE_CUDA
Expand All @@ -183,20 +184,20 @@ option(
)
message(STATUS "AMD GPU acceleration is turned ${QUEST_ENABLE_HIP}. Set QUEST_ENABLE_HIP to modify.")


# GPU Performance Tuning
## We do not print this value when configuring CMake as it is for advanced users only.
# (We do not print this value when configuring CMake as it is for advanced users only)

set(QUEST_GPU_NUM_THREADS_PER_BLOCK 128
CACHE
STRING
"The default number of threads per block QuEST will use when offloading to a GPU. Set to 128 by default. Must be a multiple of 32."
set(quest_tpb_description # (the games we play for multi-line set() strings!)
"The default number of threads per block QuEST will use when offloading to a GPU. Set to 128 by default. "
"Must be a multiple of 32 (on NVIDIA GPUs) or 64 (on AMD GPUs). Can be overridden at executable launch "
"via an environment variable of the same name, or during runtime via a corresponding API setter function."
)
mark_as_advanced(QUEST_GPU_NUM_THREADS_PER_BLOCK)
set(QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK 128
CACHE STRING
"${quest_tpb_description}")
mark_as_advanced(QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK)

math(EXPR quest_tpb_remainder "${QUEST_GPU_NUM_THREADS_PER_BLOCK} % 32")
if ((NOT (quest_tpb_remainder EQUAL 0)) OR (QUEST_GPU_NUM_THREADS_PER_BLOCK LESS 32))
message(FATAL_ERROR "QUEST_GPU_NUM_THREADS_PER_BLOCK must be a multiple of 32. QUEST_GPU_NUM_THREADS_PER_BLOCK=${QUEST_GPU_NUM_THREADS_PER_BLOCK}.")
endif()

# Deprecated API
option(
Expand All @@ -211,9 +212,15 @@ option(
"Whether to disable compile-time warnings ordinarily triggered by use of the deprecated API. Turned OFF by default."
OFF
)
message(STATUS "Disabling of deprecated API warnings is turned ${QUEST_DISABLE_DEPRECATION_WARNINGS}. Set QUEST_DISABLE_DEPRECATION_WARNINGS to modify.")
message(STATUS
"Disabling of deprecated API warnings is turned ${QUEST_DISABLE_DEPRECATION_WARNINGS}. "
"Set QUEST_DISABLE_DEPRECATION_WARNINGS to modify."
)

option(QUEST_INSTALL_BINARIES "Whether to include example and user binaries in the install." OFF)
if (QUEST_INSTALL_BINARIES)
message(STATUS "Including example and user binaries in the install (if built).")
endif()



Expand All @@ -236,10 +243,12 @@ if (QUEST_ENABLE_CUQUANTUM AND NOT QUEST_ENABLE_CUDA)
message(FATAL_ERROR "Use of cuQuantum requires CUDA.")
endif()


if (QUEST_ENABLE_SUBCOMM AND NOT QUEST_ENABLE_MPI)
message(FATAL_ERROR "Distribution must be enabled to make use of a user-defined communicator for QuEST.")
endif()


if(WIN32)

# Force MSVC to export all symbols in a shared library, like GCC and clang
Expand All @@ -257,6 +266,37 @@ if(WIN32)
endif()


# validate numTPB even when GPU not compiled
if (QUEST_ENABLE_HIP)
set(quest_warp_size 64)
set(quest_gpu_model "AMD GPUs (via HIP)")
else()
set(quest_warp_size 32)
set(quest_gpu_model "NVIDIA GPUs (via CUDA), or when not targeting GPUs")
endif()
math(EXPR quest_tpb_remainder "${QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK} % ${quest_warp_size}")
if ((NOT (quest_tpb_remainder EQUAL 0)) OR NOT (QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK GREATER 0))
message(FATAL_ERROR
"QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK was set to ${QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK}, "
"but it must be a positive multiple of ${quest_warp_size} when compiling for ${quest_gpu_model}."
)
endif()


# warn when numTPB will be later overridden by the current environment variable
if(
DEFINED ENV{QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK}
AND NOT "$ENV{QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK}" STREQUAL ""
AND NOT "$ENV{QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK}" STREQUAL "${QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK}"
)
message(WARNING
"The CMake option QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK=${QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK} "
"differs from the current environment variable (of the same name) value of $ENV{QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK}. "
"If not cleared before QuEST is launched, the latter will override the former."
)
endif()


# Encourage high-performance Release build

# Taken from Kitware's exmaple of problematic code at
Expand Down Expand Up @@ -514,7 +554,6 @@ set(QUEST_COMPILE_MPI ${QUEST_ENABLE_MPI})
set(QUEST_COMPILE_SUBCOMM ${QUEST_ENABLE_SUBCOMM})
set(QUEST_COMPILE_CUQUANTUM ${QUEST_ENABLE_CUQUANTUM})
set(QUEST_INCLUDE_DEPRECATED_FUNCTIONS ${QUEST_ENABLE_DEPRECATED_API})
set(QUEST_DEFAULT_NUM_THREADS_PER_BLOCK ${QUEST_GPU_NUM_THREADS_PER_BLOCK})


# (for the love of God cmake, create a concise syntax for this)
Expand All @@ -523,18 +562,19 @@ if (QUEST_ENABLE_CUDA OR QUEST_ENABLE_HIP)
else()
set(QUEST_COMPILE_CUDA 0)
endif()
set(QUEST_COMPILE_HIP ${QUEST_ENABLE_HIP})


# non-binary set vars which will be written to config.h.in (with a differing name)
set(QUEST_UNSPECIFIED_DEFAULT_NUM_GPU_THREADS_PER_BLOCK ${QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK})


# these vars are already set, but repeated here for clarity
# these vars are already set (cmake name matches the macro name), but repeated here for clarity
set(QUEST_FLOAT_PRECISION ${QUEST_FLOAT_PRECISION})
set(QUEST_ENABLE_NUMA ${QUEST_ENABLE_NUMA})
set(QUEST_DISABLE_DEPRECATION_WARNINGS ${QUEST_DISABLE_DEPRECATION_WARNINGS})


# these do not appear in src but are saved for record-keeping in config.h.in
set(QUEST_COMPILE_HIP ${QUEST_ENABLE_HIP})



# ============================
# Pass files to library
Expand Down
2 changes: 1 addition & 1 deletion docs/cmake.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ make
| `QUEST_DISABLE_DEPRECATION_WARNINGS` | (`OFF`), `ON` | Whether to disable the compile-time deprecation warnings when using the deprecated (v3) API. |
| `USER_SOURCE_NAMES` | (Undefined), String | The source file for a user program which will be compiled alongside QuEST. `USER_OUTPUT_EXE_NAME` *must* also be defined. |
| `USER_OUTPUT_EXE_NAME` | (Undefined), String | The name of the executable which will be created from the provided `USER_SOURCE_NAMES`. `USER_SOURCE_NAMES` *must* also be defined. |
| `QUEST_GPU_NUM_THREADS_PER_BLOCK` | (128), Number | The default number of threads per block QuEST will use when offloading to a GPU. *Must* be a multiple of 32. For AMD GPUs this *should* be a multiple of 64. |
| `QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK` | (128), Number | The default number of threads per block QuEST will use when offloading to a GPU. *Must* be a multiple of 32 (on NVIDIA GPUs) or 64 (on AMD GPUs). This CMake variable sets the default if not later overridden. The number can be overridden at process launch time using an [environment variable](https://quest-kit.github.io/QuEST/group__modes.html#gaf1b71f54d270d3353fe072c66827339b) of the same name, or during runtime using [`setQuESTNumGpuThreadsPerBlock()`](https://quest-kit.github.io/QuEST/group__experimental.html#gae35a55c6d9366ce677e6aaaf4c1ff5ef). |



Expand Down
1 change: 1 addition & 0 deletions docs/launch.md
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ QuEST execution can be configured prior to runtime using the below [environment

- [`QUEST_PERMIT_NODES_TO_SHARE_GPU`](https://quest-kit.github.io/QuEST/group__modes.html#ga84b134d552464a82d29517e1ce1309a7)
- [`QUEST_DEFAULT_VALIDATION_EPSILON`](https://quest-kit.github.io/QuEST/group__modes.html#gac4ab30619e411c965377c910680e242c)
- [`QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK`](https://quest-kit.github.io/QuEST/group__modes.html#gaf1b71f54d270d3353fe072c66827339b)

Note the unit tests in the preceding section accept additional environment variables.

Expand Down
7 changes: 3 additions & 4 deletions quest/include/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,15 @@
#cmakedefine01 QUEST_COMPILE_SUBCOMM
#cmakedefine01 QUEST_COMPILE_CUDA
#cmakedefine01 QUEST_COMPILE_CUQUANTUM
#cmakedefine01 QUEST_COMPILE_HIP

// default parameters which may have been tuned for performance when building the library
#cmakedefine QUEST_DEFAULT_NUM_THREADS_PER_BLOCK @QUEST_DEFAULT_NUM_THREADS_PER_BLOCK@

// crucial to QuEST source (informs optional NUMA usage)
#cmakedefine01 QUEST_ENABLE_NUMA


// not consulted by src (included for book-keeping)
#cmakedefine01 QUEST_COMPILE_HIP
// default parameters which may have been tuned for performance when building the library
#cmakedefine QUEST_UNSPECIFIED_DEFAULT_NUM_GPU_THREADS_PER_BLOCK @QUEST_UNSPECIFIED_DEFAULT_NUM_GPU_THREADS_PER_BLOCK@



Expand Down
8 changes: 0 additions & 8 deletions quest/include/environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,6 @@ int isQuESTEnvInit();
QuESTEnv getQuESTEnv();


/** @notyetdoced
* GPU thread per block control
* This is somehow probably the best pre-existing place for this. It only really applies to GPU, because for
* OpenMP the user can just export OMP_NUM_THREADS or call omp_set_num_threads.
*/
int getQuESTNumGpuThreadsPerBlock();
void setQuESTNumGpuThreadsPerBlock(const int newThreadsPerBlock);


// end de-mangler
#ifdef __cplusplus
Expand Down
39 changes: 37 additions & 2 deletions quest/include/experimental.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ void initCustomMpiQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, in


#if QUEST_COMPILE_SUBCOMM

/** @notyetdoced
*
* Advanced initialiser which allows the user to provide an MPI communicator for QuEST to use.
Expand All @@ -61,10 +60,46 @@ void initCustomMpiQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, in
* @author Oliver Brown
*/
void initCustomMpiCommQuESTEnv(MPI_Comm questComm, int useGpuAccel, int useMultithread);

#endif // QUEST_COMPILE_SUBCOMM


/** @notyetdoced
*
* @author Oliver Brown
*/
int getQuESTNumGpuThreadsPerBlock();


/** Overrides the number of CUDA threads per block (or @p blockDim) used by QuEST's GPU-accelerated backend.
*
* This changes the GPU parallelisation granularity and can affect performance, and is useful
* for performance tuning or diagnostics. Before this function is called, QuEST will use the
* number as specified by the environment variable @p QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK,
* if defined. Otherwise, it will use the value specified by the CMake/compile option of the
* same name, which itself presently defaults to @p 128. After this function is called, QuEST
* will adopt @p numThreadsPerBlock for the remainder of execution, or until this function is
* called again.
*
* Practical values of @p numThreadsPerBlock can vary with the simulation size, the user's GPU hardware,
* and whether it is NVIDIA or AMD, which have respective warp sizes of @p 32 and @p 64.
*
* @note
* This function has no effect when QuEST is not deployed with GPU-acceleration enabled.
*
* @param[in] numThreadsPerBlock the new block size.
* @throws @validationerror
* - if the @p QuESTEnv is not initialised.
* - if @p numThreadsPerBlock is negative.
* - if @p numThreadsPerBlock is not a multiple of the GPU warp size.
* - if @p numThreadsPerBlock exceeds the maximum @p blockDim imposed by the GPU hardware.
* @see
* - QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK
* @author Oliver Brown
* @author Tyson Jones
*/
void setQuESTNumGpuThreadsPerBlock(int numThreadsPerBlock);


// end de-mangler
#ifdef __cplusplus
}
Expand Down
40 changes: 39 additions & 1 deletion quest/include/modes.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@
* - forbid sharing: @p 0, @p '0', @p '', @p , (unspecified)
* - permit sharing: @p 1, @p '1'
*
* @constraints
* The function initQuESTEnv() will throw a validation error if any of the below are not satisfied.
* - The specified string does not evaluate to an integer @p 0 or @p 1.
*
* @author Tyson Jones
*/
const int QUEST_PERMIT_NODES_TO_SHARE_GPU = 0;
Expand All @@ -68,7 +72,7 @@
* default validation epsilon.
*
* @constraints
* The function initQuESTEnv() will throw a validation error if:
* The function initQuESTEnv() will throw a validation error if any of the below are not satisfied.
* - The specified epsilon must be `0` or positive.
* - The specified epsilon must not exceed that maximum or minimum value which can be stored
* in a `qreal`, which is specific to its precision.
Expand All @@ -78,6 +82,40 @@
const qreal QUEST_DEFAULT_VALIDATION_EPSILON = 0;


/** @envvardoc
*
* Specifies the default number of threads per block (or "block dimension") used by GPU acceleration.
*
* The number of dispatched CUDA threads per block controls the parallelisation granularity of
* QuEST's GPU backend, affecting performance.
* Specifying `QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK` to a valid, positive integer overrides
* QuEST's default otherwise set during compilation via a CMake option of the same name. If
* that CMake option was not set, the default is assumed to be @p 128.
*
* The number specified by this environment variable will be used as the block dimension by all of
* QuEST's GPU backend functions, unless overridden at runtime via setQuESTNumGpuThreadsPerBlock().
* The actual number of threads per block used at any time can be queried via
* getQuESTNumGpuThreadsPerBlock(), or reported by reportQuESTEnv().
*
* @envvarvalues
* - use internal default of `128`: @p '', @p , (unspecified)
* - use number `x`: @p x, @p 'x', @p '+x'
*
* @constraints
* The function initQuESTEnv() will throw a validation error if any of the below are not satisfied.
* - The specified number must be a positive integer.
* - The specified number must not exceed the minimum or maximum value which can be stored in an @p int.
* - The specified number must be divisible by the GPU warp size, which is 32 or 64, depending on
* whether deployed to an NVIDIA or AMD GPU. This restriction is imposed even when QuEST is not
* deployed with GPU-acceleration.
* - The specified number exceeds the maximum imposed by the available GPU hardware.
*
* @author Oliver Brown
* @author Tyson Jones
*/
const qreal QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK = 0;


#endif


Expand Down
6 changes: 3 additions & 3 deletions quest/include/precision.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,13 @@
*/

#if QUEST_FLOAT_PRECISION == 1
#define UNSPECIFIED_DEFAULT_VALIDATION_EPSILON 1E-5
#define QUEST_UNSPECIFIED_DEFAULT_VALIDATION_EPSILON 1E-5

#elif QUEST_FLOAT_PRECISION == 2
#define UNSPECIFIED_DEFAULT_VALIDATION_EPSILON 1E-12
#define QUEST_UNSPECIFIED_DEFAULT_VALIDATION_EPSILON 1E-12

#elif QUEST_FLOAT_PRECISION == 4
#define UNSPECIFIED_DEFAULT_VALIDATION_EPSILON 1E-13
#define QUEST_UNSPECIFIED_DEFAULT_VALIDATION_EPSILON 1E-13

#endif

Expand Down
27 changes: 10 additions & 17 deletions quest/src/api/environment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,10 @@ void validateAndInitCustomQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuA
validate_envNeverInit(global_envPtr != nullptr, global_hasEnvBeenFinalized, caller);

// load env-vars before validating deployment mode, because some env vars can
// affect validation (such as QUEST_PERMIT_NODES_TO_SHARE_GPU)
// affect validation (such as QUEST_PERMIT_NODES_TO_SHARE_GPU). note that
// some env-vars (like QUEST_DEFAULT_NUM_GPU_THREADS_PER_BLOCK) will be here
// validated to have a correct format (like an int), but the validity of its
// actual value will be checked later (since it requires deciding GPU-accel).
envvars_validateAndLoadEnvVars(caller);
validateconfig_setEpsilonToDefault();

Expand Down Expand Up @@ -131,6 +134,11 @@ void validateAndInitCustomQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuA
/// should we warn here if each machine contains
/// more GPUs than deployed MPI-processes (some GPUs idle)?

// validate the initial numTPB env-var (if specified) is valid
int initNumThreadsPerBlock = envvars_getDefaultNumGpuThreadsPerBlock();
validate_numGpuThreadsPerBlock(initNumThreadsPerBlock, useGpuAccel, caller);
gpu_setNumThreadsPerBlock(initNumThreadsPerBlock);

// cuQuantum is always used in GPU-accelerated envs when available
bool useCuQuantum = useGpuAccel && gpu_isCuQuantumCompiled();
if (useCuQuantum) {
Expand All @@ -157,7 +165,7 @@ void validateAndInitCustomQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuA
global_envPtr->isGpuAccelerated = useGpuAccel;
global_envPtr->isDistributed = useDistrib;
global_envPtr->isMpiUserOwned = userOwnsMpi;
global_envPtr->isMpiGpuAware = isMpiGpuAware;
global_envPtr->isMpiGpuAware = isMpiGpuAware;
global_envPtr->isCuQuantumEnabled = useCuQuantum;
global_envPtr->isGpuSharingEnabled = permitGpuSharing;

Expand Down Expand Up @@ -535,20 +543,5 @@ void getQuESTEnvironmentString(char str[200]) {
}


int getQuESTNumGpuThreadsPerBlock() {
validate_envIsInit(__func__);

return gpu_getNumThreadsPerBlock();
}

void setQuESTNumGpuThreadsPerBlock(const int newThreadsPerBlock) {
validate_envIsInit(__func__);

// just rely on the internal function to throw an error if there's no GPU support compiled
// or if newThreadsPerBlock is not a multiple of 32 (NVIDIA) or 64 (AMD)
gpu_setNumThreadsPerBlock(newThreadsPerBlock);
return;
}

// end de-mangler
}
Loading