From 6d72b2e1ebf883799d0da4da519a16169508f868 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Sun, 8 Mar 2026 10:20:13 +0100 Subject: [PATCH] Improve executor pool: fix GIL contention and set default to 4 - Add MIN_EXECUTORS (2) and increase MAX_EXECUTORS to 32 - Set default executors to 4 (benchmarked sweet spot) - Fix GIL contention: acquire GIL only when processing actual work, not while idle waiting for requests. This prevents idle executor threads from competing with dirty schedulers running Python work via the context-based API. Benchmark results (14 dirty CPU schedulers): - 4 executors: best sync (400k/sec) and concurrent performance - Context API: stable regardless of executor count (GIL fix verified) --- c_src/py_exec.c | 28 +++++++++++++++------------- c_src/py_nif.c | 2 +- c_src/py_nif.h | 8 +++++++- src/erlang_python_sup.erl | 6 +++++- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/c_src/py_exec.c b/c_src/py_exec.c index aea03af..2fb2820 100644 --- a/c_src/py_exec.c +++ b/c_src/py_exec.c @@ -859,21 +859,20 @@ static void executor_stop(void) { /** * Main function for a multi-executor thread. * Each executor has its own queue and processes requests independently. + * + * GIL handling: Acquire GIL only when processing work, not while idle. + * This prevents idle executors from competing with dirty schedulers + * running actual Python work via the context-based API. */ static void *multi_executor_thread_main(void *arg) { executor_t *exec = (executor_t *)arg; - /* Acquire GIL for this thread */ - PyGILState_STATE gstate = PyGILState_Ensure(); - exec->running = true; while (!exec->shutdown) { py_request_t *req = NULL; - /* Release GIL while waiting for work */ - Py_BEGIN_ALLOW_THREADS - + /* Wait for work - NO GIL held while idle */ pthread_mutex_lock(&exec->mutex); while (exec->queue_head == NULL && !exec->shutdown) { pthread_cond_wait(&exec->cond, &exec->mutex); @@ -890,8 +889,6 @@ static void *multi_executor_thread_main(void *arg) { } pthread_mutex_unlock(&exec->mutex); - Py_END_ALLOW_THREADS - if (req != NULL) { if (req->type == PY_REQ_SHUTDOWN) { pthread_mutex_lock(&req->mutex); @@ -900,10 +897,16 @@ static void *multi_executor_thread_main(void *arg) { pthread_mutex_unlock(&req->mutex); break; } else { - /* Process the request with GIL held */ + /* Acquire GIL only for actual work */ + PyGILState_STATE gstate = PyGILState_Ensure(); + + /* Process the request */ process_request(req); - /* Signal completion */ + /* Release GIL immediately after processing */ + PyGILState_Release(gstate); + + /* Signal completion (outside GIL) */ pthread_mutex_lock(&req->mutex); req->completed = true; pthread_cond_signal(&req->cond); @@ -913,7 +916,6 @@ static void *multi_executor_thread_main(void *arg) { } exec->running = false; - PyGILState_Release(gstate); return NULL; } @@ -953,8 +955,8 @@ static int multi_executor_start(int num_executors) { return 0; } - if (num_executors <= 0) { - num_executors = 4; + if (num_executors < MIN_EXECUTORS) { + num_executors = MIN_EXECUTORS; } if (num_executors > MAX_EXECUTORS) { num_executors = MAX_EXECUTORS; diff --git a/c_src/py_nif.c b/c_src/py_nif.c index e5c7ae8..9ae1422 100644 --- a/c_src/py_nif.c +++ b/c_src/py_nif.c @@ -710,7 +710,7 @@ static ERL_NIF_TERM nif_py_init(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg default: /* Start multiple executors for GIL contention mode */ { - int num_exec = 4; /* Default */ + int num_exec = MIN_EXECUTORS; /* Fallback if not provided */ /* Check for config */ if (argc > 0 && enif_is_map(env, argv[0])) { ERL_NIF_TERM key = enif_make_atom(env, "num_executors"); diff --git a/c_src/py_nif.h b/c_src/py_nif.h index 8002889..01adeee 100644 --- a/c_src/py_nif.h +++ b/c_src/py_nif.h @@ -1063,11 +1063,17 @@ typedef struct { * @{ */ +/** + * @def MIN_EXECUTORS + * @brief Minimum number of executor threads in the pool + */ +#define MIN_EXECUTORS 2 + /** * @def MAX_EXECUTORS * @brief Maximum number of executor threads in the pool */ -#define MAX_EXECUTORS 16 +#define MAX_EXECUTORS 32 /** * @struct executor_t diff --git a/src/erlang_python_sup.erl b/src/erlang_python_sup.erl index 5847309..ae33ddd 100644 --- a/src/erlang_python_sup.erl +++ b/src/erlang_python_sup.erl @@ -37,8 +37,12 @@ init([]) -> ContextMode = application:get_env(erlang_python, context_mode, auto), NumAsyncWorkers = application:get_env(erlang_python, num_async_workers, 2), + %% Default executors: 4 (benchmarked sweet spot for most workloads) + %% Can be overridden via {erlang_python, [{num_executors, N}]} + NumExecutors = application:get_env(erlang_python, num_executors, 4), + %% Initialize Python runtime first - ok = py_nif:init(), + ok = py_nif:init(#{num_executors => NumExecutors}), %% Initialize the semaphore ETS table for rate limiting ok = py_semaphore:init(),