diff --git a/c_src/py_exec.c b/c_src/py_exec.c index aea03af..2fb2820 100644 --- a/c_src/py_exec.c +++ b/c_src/py_exec.c @@ -859,21 +859,20 @@ static void executor_stop(void) { /** * Main function for a multi-executor thread. * Each executor has its own queue and processes requests independently. + * + * GIL handling: Acquire GIL only when processing work, not while idle. + * This prevents idle executors from competing with dirty schedulers + * running actual Python work via the context-based API. */ static void *multi_executor_thread_main(void *arg) { executor_t *exec = (executor_t *)arg; - /* Acquire GIL for this thread */ - PyGILState_STATE gstate = PyGILState_Ensure(); - exec->running = true; while (!exec->shutdown) { py_request_t *req = NULL; - /* Release GIL while waiting for work */ - Py_BEGIN_ALLOW_THREADS - + /* Wait for work - NO GIL held while idle */ pthread_mutex_lock(&exec->mutex); while (exec->queue_head == NULL && !exec->shutdown) { pthread_cond_wait(&exec->cond, &exec->mutex); @@ -890,8 +889,6 @@ static void *multi_executor_thread_main(void *arg) { } pthread_mutex_unlock(&exec->mutex); - Py_END_ALLOW_THREADS - if (req != NULL) { if (req->type == PY_REQ_SHUTDOWN) { pthread_mutex_lock(&req->mutex); @@ -900,10 +897,16 @@ static void *multi_executor_thread_main(void *arg) { pthread_mutex_unlock(&req->mutex); break; } else { - /* Process the request with GIL held */ + /* Acquire GIL only for actual work */ + PyGILState_STATE gstate = PyGILState_Ensure(); + + /* Process the request */ process_request(req); - /* Signal completion */ + /* Release GIL immediately after processing */ + PyGILState_Release(gstate); + + /* Signal completion (outside GIL) */ pthread_mutex_lock(&req->mutex); req->completed = true; pthread_cond_signal(&req->cond); @@ -913,7 +916,6 @@ static void *multi_executor_thread_main(void *arg) { } exec->running = false; - PyGILState_Release(gstate); return NULL; } @@ -953,8 +955,8 @@ static int multi_executor_start(int num_executors) { return 0; } - if (num_executors <= 0) { - num_executors = 4; + if (num_executors < MIN_EXECUTORS) { + num_executors = MIN_EXECUTORS; } if (num_executors > MAX_EXECUTORS) { num_executors = MAX_EXECUTORS; diff --git a/c_src/py_nif.c b/c_src/py_nif.c index e5c7ae8..9ae1422 100644 --- a/c_src/py_nif.c +++ b/c_src/py_nif.c @@ -710,7 +710,7 @@ static ERL_NIF_TERM nif_py_init(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg default: /* Start multiple executors for GIL contention mode */ { - int num_exec = 4; /* Default */ + int num_exec = MIN_EXECUTORS; /* Fallback if not provided */ /* Check for config */ if (argc > 0 && enif_is_map(env, argv[0])) { ERL_NIF_TERM key = enif_make_atom(env, "num_executors"); diff --git a/c_src/py_nif.h b/c_src/py_nif.h index 8002889..01adeee 100644 --- a/c_src/py_nif.h +++ b/c_src/py_nif.h @@ -1063,11 +1063,17 @@ typedef struct { * @{ */ +/** + * @def MIN_EXECUTORS + * @brief Minimum number of executor threads in the pool + */ +#define MIN_EXECUTORS 2 + /** * @def MAX_EXECUTORS * @brief Maximum number of executor threads in the pool */ -#define MAX_EXECUTORS 16 +#define MAX_EXECUTORS 32 /** * @struct executor_t diff --git a/src/erlang_python_sup.erl b/src/erlang_python_sup.erl index 5847309..ae33ddd 100644 --- a/src/erlang_python_sup.erl +++ b/src/erlang_python_sup.erl @@ -37,8 +37,12 @@ init([]) -> ContextMode = application:get_env(erlang_python, context_mode, auto), NumAsyncWorkers = application:get_env(erlang_python, num_async_workers, 2), + %% Default executors: 4 (benchmarked sweet spot for most workloads) + %% Can be overridden via {erlang_python, [{num_executors, N}]} + NumExecutors = application:get_env(erlang_python, num_executors, 4), + %% Initialize Python runtime first - ok = py_nif:init(), + ok = py_nif:init(#{num_executors => NumExecutors}), %% Initialize the semaphore ETS table for rate limiting ok = py_semaphore:init(),