Skip to content

Commit 14e91e7

Browse files
committed
add an environment variable control for the suggested local work size
1 parent b818276 commit 14e91e7

File tree

4 files changed

+20
-4
lines changed

4 files changed

+20
-4
lines changed

layers/10_cmdbufemu/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ The following environment variables can modify the behavior of the command buffe
3434
|----------------------|----------|-----------------|
3535
| `CMDBUFEMU_EnhancedErrorChecking` | Enables additional error checking when commands are added to a command buffer using a command buffer "test queue". By default, the additional error checking is disabled. | `export CMDBUFEMU_EnhancedErrorChecking=1`<br/><br/>`set CMDBUFEMU_EnhancedErrorChecking=1` |
3636
| `CMDBUFEMU_KernelForProfiling` | Enables use of an empty kernel for event profiling instead of event profiling on a command-queue barrier. By default, to minimize overhead, the empty kernel is not used. | `export CMDBUFEMU_KernelForProfiling=1`<br/><br/>`set CMDBUFEMU_KernelForProfiling=1` |
37+
| `CMDBUFEMU_SuggestedLocalWorkSize` | Enables use of the suggested local work-group size extension to eliminate `NULL` local work-group sizes. Only valid when an implementation supports the local work-group size extension and the command is not mutable. By default, use of the suggested local work-group size is enabled. | `export CMDBUFEMU_SuggestedLocalWorkSize=0`<br/><br/>`set CMDBUFEMU_SuggestedLocalWorkSize=0` |
3738

3839
## Known Limitations
3940

layers/10_cmdbufemu/emulate.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2023,7 +2023,7 @@ std::unique_ptr<NDRangeKernel> NDRangeKernel::create(
20232023
local_work_size,
20242024
local_work_size + work_dim);
20252025
}
2026-
else if( isMutable == false )
2026+
else if( g_SuggestedLocalWorkSize && isMutable == false )
20272027
{
20282028
command->local_work_size.resize(work_dim);
20292029
cl_int checkError = cmdbuf->clGetKernelSuggestedLocalWorkSize(

layers/10_cmdbufemu/emulate.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
extern bool g_EnhancedErrorChecking;
1313
extern bool g_KernelForProfiling;
14+
extern bool g_SuggestedLocalWorkSize;
1415

1516
extern const struct _cl_icd_dispatch* g_pNextDispatch;
1617

layers/10_cmdbufemu/main.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,16 @@
3535
bool g_EnhancedErrorChecking = false;
3636

3737
// Using kernels for profiling can fix issues with some implementations
38-
// that do not properly support event profiling on barrkers.
38+
// that do not properly support event profiling on barriers.
3939

4040
bool g_KernelForProfiling = false;
4141

42+
// Using the suggested local work-group size can reduce overhead by determining
43+
// the values for a NULL local work-group size when the command buffer is
44+
// created rather than when it is executed.
45+
46+
bool g_SuggestedLocalWorkSize = true;
47+
4248
const struct _cl_icd_dispatch* g_pNextDispatch = NULL;
4349

4450
static cl_int CL_API_CALL
@@ -231,7 +237,7 @@ static void _init_dispatch()
231237
}
232238

233239
CL_API_ENTRY cl_int CL_API_CALL clGetLayerInfo(
234-
cl_layer_info param_name,
240+
cl_layer_info param_name,
235241
size_t param_value_size,
236242
void* param_value,
237243
size_t* param_value_size_ret)
@@ -251,10 +257,17 @@ CL_API_ENTRY cl_int CL_API_CALL clGetLayerInfo(
251257
#if defined(CL_LAYER_NAME)
252258
case CL_LAYER_NAME:
253259
{
260+
char str[256];
261+
snprintf(str, 256, "Emulation Layer for "
262+
CL_KHR_COMMAND_BUFFER_EXTENSION_NAME
263+
" (EEC: %s, KFP: %s, SLWS: %s)",
264+
g_EnhancedErrorChecking ? "Y" : "N",
265+
g_KernelForProfiling ? "Y" : "N",
266+
g_SuggestedLocalWorkSize ? "Y" : "N");
254267
auto ptr = (char*)param_value;
255268
return writeStringToMemory(
256269
param_value_size,
257-
"Emulation Layer for " CL_KHR_COMMAND_BUFFER_EXTENSION_NAME,
270+
str,
258271
param_value_size_ret,
259272
ptr);
260273
}
@@ -290,6 +303,7 @@ CL_API_ENTRY cl_int CL_API_CALL clInitLayerWithProperties(
290303

291304
getControl("CMDBUFEMU_EnhancedErrorChecking", g_EnhancedErrorChecking);
292305
getControl("CMDBUFEMU_KernelForProfiling", g_KernelForProfiling);
306+
getControl("CMDBUFEMU_SuggestedLocalWorkSize", g_SuggestedLocalWorkSize);
293307

294308
g_pNextDispatch = target_dispatch;
295309

0 commit comments

Comments
 (0)