From bc357f50f4261ced17bce093a99cf08e9ab4a92d Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Sun, 22 Mar 2026 23:09:59 +0100 Subject: [PATCH 01/18] Refactor import caching to per-interpreter model - Replace per-process import caching with global ETS registry - Imports are applied to sys.modules when interpreters are created - Remove deprecated loop import NIFs (loop_import_module, etc.) - Add interp_apply_imports/2 and interp_flush_imports/2 NIFs - flush_imports now removes modules from sys.modules in all interpreters - Contexts and loops sharing the same interpreter share sys.modules - Add py:init_import_registry/0 called on application startup - Add py:get_imports/0, py:del_import/1,2, py:clear_imports/0 - Add py_context:flush_imports/2 and py_context_router:flush_all_imports/1 - Add del_import_reimport_test verifying module removal from sys.modules --- c_src/py_nif.c | 347 +++++++++++++++++- c_src/py_nif.h | 7 +- src/erlang_python_app.erl | 2 + src/py.erl | 199 ++++++++-- src/py_context.erl | 45 ++- src/py_context_router.erl | 34 ++ src/py_event_loop_pool.erl | 80 +--- src/py_nif.erl | 72 +--- test/py_import_SUITE.erl | 632 +++++++++++++++++++++++++++++--- test/py_import_owngil_SUITE.erl | 6 +- 10 files changed, 1201 insertions(+), 223 deletions(-) diff --git a/c_src/py_nif.c b/c_src/py_nif.c index deaa52b..229ebd6 100644 --- a/c_src/py_nif.c +++ b/c_src/py_nif.c @@ -3168,6 +3168,96 @@ static void owngil_execute_create_local_env(py_context_t *ctx) { ctx->response_ok = true; } +/** + * @brief Execute apply_imports in OWN_GIL context + * + * Applies a list of imports to the interpreter's sys.modules. + * The imports list is passed via request_term. + * + * Note: OWN_GIL contexts have their own dedicated interpreter, + * so sys.modules is per-context in this mode. + */ +static void owngil_execute_apply_imports(py_context_t *ctx) { + /* Process each import from request_term */ + ERL_NIF_TERM head, tail = ctx->request_term; + int arity; + const ERL_NIF_TERM *tuple; + + while (enif_get_list_cell(ctx->shared_env, tail, &head, &tail)) { + if (!enif_get_tuple(ctx->shared_env, head, &arity, &tuple) || arity != 2) { + continue; + } + + ErlNifBinary module_bin; + if (!enif_inspect_binary(ctx->shared_env, tuple[0], &module_bin)) { + continue; + } + + /* Convert to C string */ + char *module_name = enif_alloc(module_bin.size + 1); + if (module_name == NULL) continue; + memcpy(module_name, module_bin.data, module_bin.size); + module_name[module_bin.size] = '\0'; + + /* Skip __main__ */ + if (strcmp(module_name, "__main__") == 0) { + enif_free(module_name); + continue; + } + + /* Import the module - caches in this interpreter's sys.modules */ + PyObject *mod = PyImport_ImportModule(module_name); + if (mod != NULL) { + Py_DECREF(mod); /* sys.modules holds the reference */ + } else { + /* Clear error - import failure is not fatal */ + PyErr_Clear(); + } + + enif_free(module_name); + } + + ctx->response_term = enif_make_atom(ctx->shared_env, "ok"); + ctx->response_ok = true; +} + +/** + * @brief Execute flush_imports in OWN_GIL context + * + * Clears the per-context module cache optimization layer. + * Does NOT clear sys.modules (that would break Python). + */ +static void owngil_execute_flush_imports(py_context_t *ctx) { + /* Remove specified modules from sys.modules */ + PyObject *sys_modules = PyImport_GetModuleDict(); /* Borrowed ref */ + if (sys_modules != NULL) { + ERL_NIF_TERM head, tail = ctx->request_data; + while (enif_get_list_cell(ctx->shared_env, tail, &head, &tail)) { + ErlNifBinary mod_bin; + if (enif_inspect_binary(ctx->shared_env, head, &mod_bin)) { + char *mod_name = enif_alloc(mod_bin.size + 1); + if (mod_name != NULL) { + memcpy(mod_name, mod_bin.data, mod_bin.size); + mod_name[mod_bin.size] = '\0'; + /* Remove module from sys.modules if present */ + if (PyDict_GetItemString(sys_modules, mod_name) != NULL) { + PyDict_DelItemString(sys_modules, mod_name); + } + enif_free(mod_name); + } + } + } + } + + /* Clear the per-context optimization cache if it exists */ + if (ctx->module_cache != NULL) { + PyDict_Clear(ctx->module_cache); + } + + ctx->response_term = enif_make_atom(ctx->shared_env, "ok"); + ctx->response_ok = true; +} + /** * @brief Execute a request based on its type */ @@ -3203,6 +3293,12 @@ static void owngil_execute_request(py_context_t *ctx) { case CTX_REQ_CREATE_LOCAL_ENV: owngil_execute_create_local_env(ctx); break; + case CTX_REQ_APPLY_IMPORTS: + owngil_execute_apply_imports(ctx); + break; + case CTX_REQ_FLUSH_IMPORTS: + owngil_execute_flush_imports(ctx); + break; default: ctx->response_term = enif_make_tuple2(ctx->shared_env, enif_make_atom(ctx->shared_env, "error"), @@ -3768,6 +3864,94 @@ static ERL_NIF_TERM dispatch_create_local_env_to_owngil( return result; } +/** + * @brief Dispatch apply_imports to OWN_GIL worker thread + * + * @param env NIF environment + * @param ctx Context resource + * @param imports_term List of {ModuleBin, FuncBin | all} tuples + * @return ok | {error, Reason} + */ +static ERL_NIF_TERM dispatch_apply_imports_to_owngil( + ErlNifEnv *env, py_context_t *ctx, ERL_NIF_TERM imports_term +) { + if (!atomic_load(&ctx->thread_running)) { + return make_error(env, "thread_not_running"); + } + + pthread_mutex_lock(&ctx->request_mutex); + + enif_clear_env(ctx->shared_env); + ctx->request_term = enif_make_copy(ctx->shared_env, imports_term); + ctx->request_type = CTX_REQ_APPLY_IMPORTS; + + pthread_cond_signal(&ctx->request_ready); + + /* Wait for response with timeout */ + struct timespec deadline; + clock_gettime(CLOCK_REALTIME, &deadline); + deadline.tv_sec += OWNGIL_DISPATCH_TIMEOUT_SECS; + + while (ctx->request_type != CTX_REQ_NONE) { + int rc = pthread_cond_timedwait(&ctx->response_ready, &ctx->request_mutex, &deadline); + if (rc == ETIMEDOUT) { + atomic_store(&ctx->thread_running, false); + pthread_mutex_unlock(&ctx->request_mutex); + fprintf(stderr, "OWN_GIL apply_imports dispatch timeout: worker thread unresponsive\n"); + return make_error(env, "worker_timeout"); + } + } + + ERL_NIF_TERM result = enif_make_copy(env, ctx->response_term); + pthread_mutex_unlock(&ctx->request_mutex); + + return result; +} + +/** + * @brief Dispatch flush_imports to OWN_GIL worker thread + * + * @param env NIF environment + * @param ctx Context resource + * @param modules_list List of module names to remove from sys.modules + * @return ok + */ +static ERL_NIF_TERM dispatch_flush_imports_to_owngil( + ErlNifEnv *env, py_context_t *ctx, ERL_NIF_TERM modules_list +) { + if (!atomic_load(&ctx->thread_running)) { + return make_error(env, "thread_not_running"); + } + + pthread_mutex_lock(&ctx->request_mutex); + + enif_clear_env(ctx->shared_env); + ctx->request_type = CTX_REQ_FLUSH_IMPORTS; + ctx->request_data = enif_make_copy(ctx->shared_env, modules_list); + + pthread_cond_signal(&ctx->request_ready); + + /* Wait for response with timeout */ + struct timespec deadline; + clock_gettime(CLOCK_REALTIME, &deadline); + deadline.tv_sec += OWNGIL_DISPATCH_TIMEOUT_SECS; + + while (ctx->request_type != CTX_REQ_NONE) { + int rc = pthread_cond_timedwait(&ctx->response_ready, &ctx->request_mutex, &deadline); + if (rc == ETIMEDOUT) { + atomic_store(&ctx->thread_running, false); + pthread_mutex_unlock(&ctx->request_mutex); + fprintf(stderr, "OWN_GIL flush_imports dispatch timeout: worker thread unresponsive\n"); + return make_error(env, "worker_timeout"); + } + } + + ERL_NIF_TERM result = enif_make_copy(env, ctx->response_term); + pthread_mutex_unlock(&ctx->request_mutex); + + return result; +} + #endif /* HAVE_SUBINTERPRETERS */ /** @@ -3786,6 +3970,9 @@ static int owngil_context_init(py_context_t *ctx) { atomic_store(&ctx->init_error, false); atomic_store(&ctx->shutdown_requested, false); ctx->request_type = CTX_REQ_NONE; + ctx->request_term = 0; + ctx->request_data = 0; + ctx->response_term = 0; ctx->response_ok = false; /* Initialize mutex and condition variables */ @@ -4735,6 +4922,158 @@ static ERL_NIF_TERM nif_create_local_env(ErlNifEnv *env, int argc, const ERL_NIF return enif_make_tuple2(env, ATOM_OK, ref); } +/** + * @brief Apply a list of imports to an interpreter's sys.modules + * + * nif_interp_apply_imports(Ref, Imports) -> ok | {error, Reason} + * + * Imports: [{ModuleBin, FuncBin | 'all'}, ...] + * Imports modules into the interpreter's sys.modules (shared by all + * contexts/loops using this interpreter). + * + * Note: This imports into the INTERPRETER's module cache (sys.modules), + * not a per-context cache. All contexts using this interpreter will + * see the imported modules. + */ +static ERL_NIF_TERM nif_interp_apply_imports(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + (void)argc; + py_context_t *ctx; + + if (!runtime_is_running()) { + return make_error(env, "python_not_running"); + } + + if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) { + return make_error(env, "invalid_context"); + } + + if (ctx->destroyed) { + return make_error(env, "context_destroyed"); + } + +#ifdef HAVE_SUBINTERPRETERS + /* OWN_GIL mode: dispatch to the dedicated thread */ + if (ctx->uses_own_gil) { + return dispatch_apply_imports_to_owngil(env, ctx, argv[1]); + } +#endif + + py_context_guard_t guard = py_context_acquire(ctx); + if (!guard.acquired) { + return make_error(env, "acquire_failed"); + } + + /* Process each import - imports go into interpreter's sys.modules */ + ERL_NIF_TERM head, tail = argv[1]; + int arity; + const ERL_NIF_TERM *tuple; + + while (enif_get_list_cell(env, tail, &head, &tail)) { + if (!enif_get_tuple(env, head, &arity, &tuple) || arity != 2) { + continue; + } + + ErlNifBinary module_bin; + if (!enif_inspect_binary(env, tuple[0], &module_bin)) { + continue; + } + + /* Convert to C string */ + char *module_name = enif_alloc(module_bin.size + 1); + if (module_name == NULL) continue; + memcpy(module_name, module_bin.data, module_bin.size); + module_name[module_bin.size] = '\0'; + + /* Skip __main__ */ + if (strcmp(module_name, "__main__") == 0) { + enif_free(module_name); + continue; + } + + /* Import the module - this caches in interpreter's sys.modules + * which is shared by all contexts using this interpreter */ + PyObject *mod = PyImport_ImportModule(module_name); + if (mod != NULL) { + Py_DECREF(mod); /* sys.modules holds the reference */ + } else { + /* Clear error - import failure is not fatal */ + PyErr_Clear(); + } + + enif_free(module_name); + } + + py_context_release(&guard); + return ATOM_OK; +} + +/** + * @brief Flush imports from an interpreter's sys.modules + * + * nif_interp_flush_imports(Ref, Modules) -> ok + * + * Removes specified modules from sys.modules and clears the per-context + * module_cache optimization layer. + * + * @param Ref Context reference + * @param Modules List of binary module names to remove from sys.modules + */ +static ERL_NIF_TERM nif_interp_flush_imports(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + (void)argc; + py_context_t *ctx; + + if (!runtime_is_running()) { + return ATOM_OK; /* Nothing to flush if Python not running */ + } + + if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) { + return make_error(env, "invalid_context"); + } + + if (ctx->destroyed) { + return ATOM_OK; /* Already destroyed */ + } + +#ifdef HAVE_SUBINTERPRETERS + /* OWN_GIL mode: dispatch to the dedicated thread */ + if (ctx->uses_own_gil) { + return dispatch_flush_imports_to_owngil(env, ctx, argv[1]); + } +#endif + + py_context_guard_t guard = py_context_acquire(ctx); + if (!guard.acquired) { + return ATOM_OK; /* Can't acquire - just return ok */ + } + + /* Remove specified modules from sys.modules */ + PyObject *sys_modules = PyImport_GetModuleDict(); /* Borrowed ref */ + if (sys_modules != NULL) { + ERL_NIF_TERM head, tail = argv[1]; + while (enif_get_list_cell(env, tail, &head, &tail)) { + ErlNifBinary mod_bin; + if (enif_inspect_binary(env, head, &mod_bin)) { + char *mod_name = binary_to_string(&mod_bin); + if (mod_name != NULL) { + /* Remove module from sys.modules if present */ + if (PyDict_GetItemString(sys_modules, mod_name) != NULL) { + PyDict_DelItemString(sys_modules, mod_name); + } + enif_free(mod_name); + } + } + } + } + + /* Clear per-context optimization cache */ + if (ctx->module_cache != NULL) { + PyDict_Clear(ctx->module_cache); + } + + py_context_release(&guard); + return ATOM_OK; +} + /** * @brief Execute Python statements using a process-local environment * @@ -6792,12 +7131,6 @@ static ErlNifFunc nif_funcs[] = { /* Per-process namespace NIFs */ {"event_loop_exec", 2, nif_event_loop_exec, ERL_NIF_DIRTY_JOB_IO_BOUND}, {"event_loop_eval", 2, nif_event_loop_eval, ERL_NIF_DIRTY_JOB_IO_BOUND}, - /* Module import caching NIFs */ - {"loop_import_module", 2, nif_loop_import_module, ERL_NIF_DIRTY_JOB_IO_BOUND}, - {"loop_import_function", 3, nif_loop_import_function, ERL_NIF_DIRTY_JOB_IO_BOUND}, - {"loop_flush_import_cache", 1, nif_loop_flush_import_cache, 0}, - {"loop_import_stats", 1, nif_loop_import_stats, 0}, - {"loop_import_list", 1, nif_loop_import_list, 0}, {"add_reader", 3, nif_add_reader, 0}, {"remove_reader", 2, nif_remove_reader, 0}, {"add_writer", 3, nif_add_writer, 0}, @@ -6870,6 +7203,8 @@ static ErlNifFunc nif_funcs[] = { {"context_eval", 4, nif_context_eval_with_env, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"context_call", 6, nif_context_call_with_env, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"create_local_env", 1, nif_create_local_env, 0}, + {"interp_apply_imports", 2, nif_interp_apply_imports, ERL_NIF_DIRTY_JOB_CPU_BOUND}, + {"interp_flush_imports", 2, nif_interp_flush_imports, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"context_call_method", 4, nif_context_call_method, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"context_to_term", 1, nif_context_to_term, 0}, {"context_interp_id", 1, nif_context_interp_id, 0}, diff --git a/c_src/py_nif.h b/c_src/py_nif.h index a3f73ae..f287aa7 100644 --- a/c_src/py_nif.h +++ b/c_src/py_nif.h @@ -730,7 +730,9 @@ typedef enum { CTX_REQ_CALL_WITH_ENV, /**< Call with process-local environment */ CTX_REQ_EVAL_WITH_ENV, /**< Eval with process-local environment */ CTX_REQ_EXEC_WITH_ENV, /**< Exec with process-local environment */ - CTX_REQ_CREATE_LOCAL_ENV /**< Create process-local env dicts */ + CTX_REQ_CREATE_LOCAL_ENV, /**< Create process-local env dicts */ + CTX_REQ_APPLY_IMPORTS, /**< Apply imports to module cache */ + CTX_REQ_FLUSH_IMPORTS /**< Flush module cache */ } ctx_request_type_t; /** @@ -848,6 +850,9 @@ typedef struct { /** @brief Request term (copied into shared_env) */ ERL_NIF_TERM request_term; + /** @brief Additional request data (e.g., modules list for flush) */ + ERL_NIF_TERM request_data; + /** @brief Response term (created in shared_env) */ ERL_NIF_TERM response_term; diff --git a/src/erlang_python_app.erl b/src/erlang_python_app.erl index bc1115a..01e443e 100644 --- a/src/erlang_python_app.erl +++ b/src/erlang_python_app.erl @@ -20,6 +20,8 @@ -export([start/2, stop/1]). start(_StartType, _StartArgs) -> + %% Initialize the import registry ETS table + py:init_import_registry(), erlang_python_sup:start_link(). stop(_State) -> diff --git a/src/py.erl b/src/py.erl index 9617d2a..8a15d8e 100644 --- a/src/py.erl +++ b/src/py.erl @@ -62,6 +62,12 @@ flush_imports/0, import_stats/0, import_list/0, + %% Import registry (global list applied to all interpreters) + init_import_registry/0, + get_imports/0, + del_import/1, + del_import/2, + clear_imports/0, version/0, memory_stats/0, gc/0, @@ -164,6 +170,9 @@ %% Process dictionary key for local Python environment -define(LOCAL_ENV_KEY, py_local_env). +%% ETS table for global import registry +-define(IMPORT_REGISTRY, py_import_registry). + %% @doc Get or create a process-local Python environment for a context. %% %% Each Erlang process can have Python environments per interpreter. @@ -337,73 +346,200 @@ exec(Ctx, Code) when is_pid(Ctx) -> %%% Module Import Caching %%% ============================================================================ -%% @doc Import and cache a module in the current interpreter. +%% @doc Initialize the import registry ETS table. +%% +%% This is called automatically during application startup. +%% Safe to call multiple times - does nothing if already initialized. +%% +%% @returns ok +-spec init_import_registry() -> ok. +init_import_registry() -> + case ets:info(?IMPORT_REGISTRY) of + undefined -> + %% Use bag type to allow multiple entries with same module name + %% e.g., {<<"json">>, all} and {<<"json">>, <<"dumps">>} + ets:new(?IMPORT_REGISTRY, [bag, public, named_table]), + ok; + _ -> + ok + end. + +%% @doc Register a module for import in all interpreters. %% -%% The module is imported in the interpreter handling this process (via affinity). -%% The `__main__' module is never cached in the interpreter cache. +%% Adds the module to the global import registry. When new interpreters +%% are created, they will automatically import all registered modules. +%% The module will be imported lazily when first used. %% -%% This is useful for pre-warming imports before making calls, ensuring the -%% first call doesn't pay the import penalty. +%% The `__main__' module is never cached. %% %% Example: %% ``` %% ok = py:import(json), -%% {ok, Result} = py:call(json, dumps, [Data]). %% Uses cached module +%% {ok, Result} = py:call(json, dumps, [Data]). %% Module imported on first use %% ''' %% %% @param Module Python module name %% @returns ok | {error, Reason} -spec import(py_module()) -> ok | {error, term()}. import(Module) -> - py_event_loop_pool:import(Module). + ModuleBin = ensure_binary(Module), + %% Reject __main__ + case ModuleBin of + <<"__main__">> -> + {error, main_not_cacheable}; + _ -> + %% Add to global registry - module will be imported lazily + case ets:info(?IMPORT_REGISTRY) of + undefined -> ok; + _ -> ets:insert(?IMPORT_REGISTRY, {ModuleBin, all}) + end, + ok + end. -%% @doc Import and cache a module function in the current interpreter. +%% @doc Register a module/function for import in all interpreters. %% -%% Pre-imports the module and caches the function reference for faster -%% subsequent calls. The `__main__' module is never cached. +%% Adds the module/function to the global import registry. When new +%% interpreters are created, they will automatically import the module. +%% The module will be imported lazily when first used. +%% +%% The `__main__' module is never cached. %% %% Example: %% ``` %% ok = py:import(json, dumps), -%% {ok, Result} = py:call(json, dumps, [Data]). %% Uses cached function +%% {ok, Result} = py:call(json, dumps, [Data]). %% Module imported on first use %% ''' %% %% @param Module Python module name -%% @param Func Function name to cache +%% @param Func Function name to register %% @returns ok | {error, Reason} -spec import(py_module(), py_func()) -> ok | {error, term()}. import(Module, Func) -> - py_event_loop_pool:import(Module, Func). + ModuleBin = ensure_binary(Module), + FuncBin = ensure_binary(Func), + %% Reject __main__ + case ModuleBin of + <<"__main__">> -> + {error, main_not_cacheable}; + _ -> + %% Add to global registry - module will be imported lazily + case ets:info(?IMPORT_REGISTRY) of + undefined -> ok; + _ -> ets:insert(?IMPORT_REGISTRY, {ModuleBin, FuncBin}) + end, + ok + end. + +%% @doc Get all registered imports from the global registry. +%% +%% Returns a list of {Module, Func | all} tuples representing all +%% modules/functions registered for automatic import. +%% +%% Example: +%% ``` +%% ok = py:import(json), +%% ok = py:import(math, sqrt), +%% [{<<"json">>, all}, {<<"math">>, <<"sqrt">>}] = py:get_imports(). +%% ''' +%% +%% @returns List of {Module, Func | all} tuples +-spec get_imports() -> [{binary(), binary() | all}]. +get_imports() -> + case ets:info(?IMPORT_REGISTRY) of + undefined -> []; + _ -> ets:tab2list(?IMPORT_REGISTRY) + end. + +%% @doc Remove a module from the global import registry. +%% +%% Removes all entries for the specified module from the registry. +%% Does not affect already-running interpreters. +%% +%% @param Module Python module name +%% @returns ok +-spec del_import(py_module()) -> ok. +del_import(Module) -> + ModuleBin = ensure_binary(Module), + case ets:info(?IMPORT_REGISTRY) of + undefined -> ok; + _ -> ets:match_delete(?IMPORT_REGISTRY, {ModuleBin, '_'}) + end, + ok. -%% @doc Flush import caches across all interpreters. +%% @doc Remove a specific module/function from the global import registry. %% -%% Clears the module/function cache in all interpreters. Use this after -%% modifying Python modules on disk to force re-import. +%% Removes the specific entry from the registry. +%% Does not affect already-running interpreters. +%% +%% @param Module Python module name +%% @param Func Function name +%% @returns ok +-spec del_import(py_module(), py_func()) -> ok. +del_import(Module, Func) -> + ModuleBin = ensure_binary(Module), + FuncBin = ensure_binary(Func), + case ets:info(?IMPORT_REGISTRY) of + undefined -> ok; + %% Use delete_object to delete the specific tuple (not all objects with same key) + _ -> ets:delete_object(?IMPORT_REGISTRY, {ModuleBin, FuncBin}) + end, + ok. + +%% @doc Clear all registered imports from the global registry. +%% +%% Removes all entries from the registry. +%% Does not affect already-running interpreters. +%% +%% @returns ok +-spec clear_imports() -> ok. +clear_imports() -> + case ets:info(?IMPORT_REGISTRY) of + undefined -> ok; + _ -> ets:delete_all_objects(?IMPORT_REGISTRY) + end, + ok. + +%% @doc Flush import caches from all interpreters and clear the registry. +%% +%% Clears the global import registry and flushes the module/function cache +%% in all running interpreters. Use this after modifying Python modules +%% on disk to force re-import. %% %% @returns ok -spec flush_imports() -> ok. flush_imports() -> - py_event_loop_pool:flush_imports(). + %% Get the list of modules BEFORE clearing + Imports = get_imports(), + Modules = lists:usort([M || {M, _} <- Imports]), + %% Clear the global registry + clear_imports(), + %% Remove modules from sys.modules in all interpreters + py_context_router:flush_all_imports(Modules), + ok. -%% @doc Get import cache statistics for the current interpreter. +%% @doc Get import registry statistics. %% -%% Returns a map with cache metrics for the interpreter handling this process. +%% Returns a map with the count of registered imports. %% %% Example: %% ``` %% {ok, #{count => 5}} = py:import_stats(). %% ''' %% -%% @returns {ok, Stats} where Stats is a map with cache metrics +%% @returns {ok, Stats} where Stats is a map with registry metrics -spec import_stats() -> {ok, map()} | {error, term()}. import_stats() -> - py_event_loop_pool:import_stats(). + Count = case ets:info(?IMPORT_REGISTRY) of + undefined -> 0; + _ -> ets:info(?IMPORT_REGISTRY, size) + end, + {ok, #{count => Count}}. -%% @doc List all cached imports in the current interpreter. +%% @doc List all registered imports. %% -%% Returns a map of modules to their cached functions. +%% Returns a map of modules to their registered functions. %% Module names are binary keys, function lists are the values. -%% An empty list means only the module is cached (no specific functions). +%% An empty list means only the module is registered (no specific functions). %% %% Example: %% ``` @@ -418,7 +554,20 @@ import_stats() -> %% @returns {ok, #{Module => [Func]}} map of modules to functions -spec import_list() -> {ok, #{binary() => [binary()]}} | {error, term()}. import_list() -> - py_event_loop_pool:import_list(). + Imports = get_imports(), + %% Group by module + Map = lists:foldl(fun({Module, FuncOrAll}, Acc) -> + Existing = maps:get(Module, Acc, []), + case FuncOrAll of + all -> + %% Module-level import, don't add to function list + maps:put(Module, Existing, Acc); + Func -> + %% Function-level import + maps:put(Module, [Func | Existing], Acc) + end + end, #{}, Imports), + {ok, Map}. %%% ============================================================================ %%% Asynchronous API diff --git a/src/py_context.erl b/src/py_context.erl index b7bda72..02bb0af 100644 --- a/src/py_context.erl +++ b/src/py_context.erl @@ -53,7 +53,8 @@ get_interp_id/1, is_subinterp/1, create_local_env/1, - get_nif_ref/1 + get_nif_ref/1, + flush_imports/2 ]). %% Internal exports @@ -411,6 +412,26 @@ get_nif_ref(Ctx) when is_pid(Ctx) -> error({context_died, Reason}) end. +%% @doc Flush the module cache from this context's interpreter. +%% +%% Clears all cached modules from the interpreter's module_cache. +%% Removes specified modules from sys.modules in the interpreter. +%% +%% @param Ctx Context process +%% @param Modules List of module names (binaries) to remove +%% @returns ok +-spec flush_imports(context(), [binary()]) -> ok. +flush_imports(Ctx, Modules) when is_pid(Ctx), is_list(Modules) -> + MRef = erlang:monitor(process, Ctx), + Ctx ! {flush_imports, self(), MRef, Modules}, + receive + {MRef, ok} -> + erlang:demonitor(MRef, [flush]), + ok; + {'DOWN', MRef, process, Ctx, _Reason} -> + ok + end. + %% ============================================================================ %% Internal functions %% ============================================================================ @@ -420,6 +441,8 @@ init(Parent, Id, Mode) -> process_flag(trap_exit, true), case create_context(Mode) of {ok, Ref, InterpId} -> + %% Apply all registered imports to this interpreter + apply_registered_imports(Ref), %% For subinterpreters, create a dedicated event worker EventState = setup_event_worker(Ref, InterpId), %% For thread-model subinterpreters, spawn a dedicated callback handler @@ -497,6 +520,21 @@ extend_erlang_module_in_context(Ref) -> ok end. +%% @private Apply all imports from the global registry to this interpreter. +%% +%% Called when a new interpreter is created to pre-warm the module cache +%% with all modules registered via py:import/1,2. +apply_registered_imports(Ref) -> + case ets:info(py_import_registry) of + undefined -> ok; + _ -> + Imports = ets:tab2list(py_import_registry), + case Imports of + [] -> ok; + _ -> py_nif:interp_apply_imports(Ref, Imports) + end + end. + %% @private create_context(auto) -> case py_nif:subinterp_supported() of @@ -578,6 +616,11 @@ loop(#state{ref = Ref, interp_id = InterpId} = State) -> From ! {MRef, Ref}, loop(State); + {flush_imports, From, MRef, Modules} -> + py_nif:interp_flush_imports(Ref, Modules), + From ! {MRef, ok}, + loop(State); + {stop, From, MRef} -> terminate(normal, State), From ! {MRef, ok}; diff --git a/src/py_context_router.erl b/src/py_context_router.erl index 6335d40..67c40c5 100644 --- a/src/py_context_router.erl +++ b/src/py_context_router.erl @@ -76,6 +76,8 @@ start_pool/3, stop_pool/1, pool_started/1, + %% Import cache management + flush_all_imports/1, %% Pool registration (route module/func to specific pools) register_pool/2, register_pool/3, @@ -379,6 +381,38 @@ pool_started(Pool) when is_atom(Pool) -> end end. +%% @doc Flush import caches from all contexts in all pools. +%% +%% Iterates through all pools (default and registered) and removes +%% specified modules from sys.modules in each context's interpreter. +%% +%% @param Modules List of module names (binaries) to remove +%% @returns ok +-spec flush_all_imports([binary()]) -> ok. +flush_all_imports(Modules) -> + %% Get all pool names from the registry + RegisteredPools = case persistent_term:get(?POOL_REGISTRY_KEY, undefined) of + undefined -> []; + Registry when is_map(Registry) -> + %% Extract unique pool names from registry values + lists:usort(maps:values(Registry)) + end, + %% Always include default pool, plus any registered pools + AllPools = lists:usort([default | RegisteredPools]), + %% Flush each pool + lists:foreach(fun(Pool) -> + case pool_started(Pool) of + true -> + Ctxs = contexts(Pool), + lists:foreach(fun(Ctx) -> + catch py_context:flush_imports(Ctx, Modules) + end, Ctxs); + false -> + ok + end + end, AllPools), + ok. + %% ============================================================================ %% Pool Registration (route module/func to specific pools) %% ============================================================================ diff --git a/src/py_event_loop_pool.erl b/src/py_event_loop_pool.erl index d5f3002..e9a5996 100644 --- a/src/py_event_loop_pool.erl +++ b/src/py_event_loop_pool.erl @@ -44,11 +44,6 @@ %% Per-process namespace API exec/1, exec/2, eval/1, eval/2, - %% Module import caching - import/1, import/2, - flush_imports/0, - import_stats/0, - import_list/0, get_all_loops/0 ]). @@ -339,82 +334,9 @@ eval(LoopRef, Expr) -> py_nif:event_loop_eval(LoopRef, Expr). %%% ============================================================================ -%%% Module Import Caching +%%% Pool Utilities %%% ============================================================================ -%% @doc Import and cache a module in the current interpreter. -%% -%% The module is imported in the interpreter assigned to this process (via -%% PID hash affinity). The `__main__' module is never cached. -%% -%% Example: -%%
-%% ok = py_event_loop_pool:import(json),
-%% Ref = py_event_loop_pool:create_task(json, dumps, [[1,2,3]])
-%% 
--spec import(Module :: atom() | binary()) -> ok | {error, term()}. -import(Module) -> - case get_loop() of - {ok, LoopRef} -> - ModuleBin = py_util:to_binary(Module), - py_nif:loop_import_module(LoopRef, ModuleBin); - {error, not_available} -> - {error, event_loop_not_available} - end. - -%% @doc Import a module and cache a specific function. -%% -%% Pre-imports the module and caches the function reference for faster -%% subsequent calls. The `__main__' module is never cached. --spec import(Module :: atom() | binary(), Func :: atom() | binary()) -> ok | {error, term()}. -import(Module, Func) -> - case get_loop() of - {ok, LoopRef} -> - ModuleBin = py_util:to_binary(Module), - FuncBin = py_util:to_binary(Func), - py_nif:loop_import_function(LoopRef, ModuleBin, FuncBin); - {error, not_available} -> - {error, event_loop_not_available} - end. - -%% @doc Flush import caches across all event loop interpreters. -%% -%% Clears the module/function cache in all interpreters. Use this after -%% modifying Python modules on disk to force re-import. --spec flush_imports() -> ok. -flush_imports() -> - case get_all_loops() of - {ok, Loops} -> - [py_nif:loop_flush_import_cache(LoopRef) || {LoopRef, _} <- Loops], - ok; - {error, _} -> - ok - end. - -%% @doc Get import cache statistics for the current interpreter. -%% -%% Returns a map with cache metrics. --spec import_stats() -> {ok, map()} | {error, term()}. -import_stats() -> - case get_loop() of - {ok, LoopRef} -> - py_nif:loop_import_stats(LoopRef); - {error, not_available} -> - {error, event_loop_not_available} - end. - -%% @doc List all cached imports in the current interpreter. -%% -%% Returns a list of cached module and function names. --spec import_list() -> {ok, [binary()]} | {error, term()}. -import_list() -> - case get_loop() of - {ok, LoopRef} -> - py_nif:loop_import_list(LoopRef); - {error, not_available} -> - {error, event_loop_not_available} - end. - %% @doc Get all event loop references in the pool. %% %% Returns a list of {LoopRef, WorkerPid} tuples for all loops in the pool. diff --git a/src/py_nif.erl b/src/py_nif.erl index 2745d60..f3abe97 100644 --- a/src/py_nif.erl +++ b/src/py_nif.erl @@ -112,12 +112,9 @@ %% Per-process namespace NIFs event_loop_exec/2, event_loop_eval/2, - %% Module import caching NIFs - loop_import_module/2, - loop_import_function/3, - loop_flush_import_cache/1, - loop_import_stats/1, - loop_import_list/1, + %% Per-interpreter import caching NIFs + interp_apply_imports/2, + interp_flush_imports/2, add_reader/3, remove_reader/2, add_writer/3, @@ -853,64 +850,31 @@ event_loop_eval(_LoopRef, _Expr) -> ?NIF_STUB. %%% ============================================================================ -%%% Module Import Caching +%%% Per-Interpreter Import Caching %%% ============================================================================ -%% @doc Import and cache a module in the event loop's interpreter. +%% @doc Apply a list of imports to an interpreter's module cache. %% -%% Pre-imports the module and caches it for faster subsequent calls. -%% The `__main__' module is never cached (returns error). +%% Called when a new interpreter is created to pre-warm the cache with +%% all modules registered via py:import/1,2. %% -%% @param LoopRef Event loop reference -%% @param Module Module name as binary +%% @param Ref Context reference (from context_create/1) +%% @param Imports List of {ModuleBin, FuncBin | all} tuples %% @returns ok | {error, Reason} --spec loop_import_module(reference(), binary()) -> ok | {error, term()}. -loop_import_module(_LoopRef, _Module) -> +-spec interp_apply_imports(reference(), [{binary(), binary() | all}]) -> ok | {error, term()}. +interp_apply_imports(_Ref, _Imports) -> ?NIF_STUB. -%% @doc Import a module and cache a specific function. +%% @doc Flush the module cache from an interpreter. %% -%% Pre-imports the module and caches the function reference for faster -%% subsequent calls. The `__main__' module is never cached (returns error). +%% Removes specified modules from sys.modules and clears the module_cache. +%% Used when flushing imports from the global registry. %% -%% @param LoopRef Event loop reference -%% @param Module Module name as binary -%% @param Func Function name as binary -%% @returns ok | {error, Reason} --spec loop_import_function(reference(), binary(), binary()) -> ok | {error, term()}. -loop_import_function(_LoopRef, _Module, _Func) -> - ?NIF_STUB. - -%% @doc Flush the import cache for an event loop's interpreter. -%% -%% Clears the module/function cache. Use this after modifying Python -%% modules on disk to force re-import. -%% -%% @param LoopRef Event loop reference +%% @param Ref Context reference (from context_create/1) +%% @param Modules List of module names (binaries) to remove from sys.modules %% @returns ok --spec loop_flush_import_cache(reference()) -> ok. -loop_flush_import_cache(_LoopRef) -> - ?NIF_STUB. - -%% @doc Get import cache statistics for an event loop's interpreter. -%% -%% Returns a map with cache metrics for the calling process's namespace. -%% -%% @param LoopRef Event loop reference -%% @returns {ok, Stats} where Stats is a map with count --spec loop_import_stats(reference()) -> {ok, map()} | {error, term()}. -loop_import_stats(_LoopRef) -> - ?NIF_STUB. - -%% @doc List all cached imports in an event loop's interpreter. -%% -%% Returns a map of modules to their cached functions for the calling -%% process's namespace. -%% -%% @param LoopRef Event loop reference -%% @returns {ok, #{Module => [Func]}} map of modules to functions --spec loop_import_list(reference()) -> {ok, #{binary() => [binary()]}} | {error, term()}. -loop_import_list(_LoopRef) -> +-spec interp_flush_imports(reference(), [binary()]) -> ok. +interp_flush_imports(_Ref, _Modules) -> ?NIF_STUB. %% @doc Register a file descriptor for read monitoring. diff --git a/test/py_import_SUITE.erl b/test/py_import_SUITE.erl index 86b4413..9b08634 100644 --- a/test/py_import_SUITE.erl +++ b/test/py_import_SUITE.erl @@ -27,7 +27,26 @@ import_list_test/1, import_speeds_up_calls_test/1, import_multiprocess_test/1, - import_concurrent_stress_test/1 + import_concurrent_stress_test/1, + %% Import registry tests + import_registry_test/1, + import_applied_to_new_context_test/1, + del_import_test/1, + del_import_func_test/1, + del_import_reimport_test/1, + clear_imports_test/1, + get_imports_test/1, + flush_clears_registry_test/1, + %% Per-interpreter sharing tests + shared_interpreter_import_test/1, + event_loop_pool_import_test/1, + spawn_task_uses_import_test/1, + subinterp_isolation_test/1, + registry_applied_to_subinterp_test/1, + %% sys.modules verification tests + import_in_sys_modules_test/1, + registry_import_in_sys_modules_test/1, + context_import_in_sys_modules_test/1 ]). all() -> @@ -46,7 +65,26 @@ groups() -> import_list_test, import_speeds_up_calls_test, import_multiprocess_test, - import_concurrent_stress_test + import_concurrent_stress_test, + %% Import registry tests + import_registry_test, + import_applied_to_new_context_test, + del_import_test, + del_import_func_test, + del_import_reimport_test, + clear_imports_test, + get_imports_test, + flush_clears_registry_test, + %% Per-interpreter sharing tests + shared_interpreter_import_test, + event_loop_pool_import_test, + spawn_task_uses_import_test, + subinterp_isolation_test, + registry_applied_to_subinterp_test, + %% sys.modules verification tests + import_in_sys_modules_test, + registry_import_in_sys_modules_test, + context_import_in_sys_modules_test ]}]. init_per_suite(Config) -> @@ -109,18 +147,44 @@ import_main_rejected_test(_Config) -> {error, main_not_cacheable} = py:import('__main__', some_func), {error, main_not_cacheable} = py:import(<<"__main__">>, <<"some_func">>). -%% @doc Test importing nonexistent module returns error +%% @doc Test importing nonexistent module - registry accepts it but call fails +%% +%% py:import just adds to the registry. The actual import error happens +%% when trying to use the module. import_nonexistent_module_test(_Config) -> - {error, Reason} = py:import(nonexistent_module_xyz), - ?assert(is_list(Reason) orelse is_binary(Reason) orelse is_atom(Reason)), - ct:pal("Import error for nonexistent module: ~p", [Reason]). + %% Import succeeds (just adds to registry) + ok = py:import(nonexistent_module_xyz), -%% @doc Test importing nonexistent function returns error + %% But trying to use it fails + %% Error format is {error, {ExceptionType, Message}} or {error, atom()} + {error, Reason} = py:call(nonexistent_module_xyz, some_func, []), + ?assert(is_tuple(Reason) orelse is_list(Reason) orelse is_binary(Reason) orelse is_atom(Reason)), + + %% Clean up + ok = py:del_import(nonexistent_module_xyz), + + ct:pal("Nonexistent module error at call time: ~p", [Reason]). + +%% @doc Test importing with nonexistent function name still imports the module +%% +%% py:import/2 imports the module into sys.modules. The function name is +%% stored in the registry but not validated at import time. Function +%% validation happens at call time. import_nonexistent_function_test(_Config) -> - %% Module exists but function doesn't - {error, Reason} = py:import(json, nonexistent_function_xyz), - ?assert(is_list(Reason) orelse is_binary(Reason) orelse is_atom(Reason)), - ct:pal("Import error for nonexistent function: ~p", [Reason]). + %% Module exists but function doesn't - import still succeeds + %% because we're importing the MODULE, not the function + ok = py:import(json, nonexistent_function_xyz), + + %% The module is imported and usable + {ok, _} = py:call(json, dumps, [[1, 2, 3]]), + + %% But calling the nonexistent function will fail + {error, _Reason} = py:call(json, nonexistent_function_xyz, []), + + %% Clean up the registry entry + ok = py:del_import(json, nonexistent_function_xyz), + + ct:pal("Import with invalid function succeeds (validation at call time)"). %% @doc Test that importing same module/function twice is idempotent import_idempotent_test(_Config) -> @@ -247,83 +311,87 @@ import_speeds_up_calls_test(_Config) -> %% because timing can be variable. Just log for observation. ok. -%% @doc Test that 3 processes can independently cache imports +%% @doc Test that multiple processes can use the shared import registry +%% +%% The import registry is global (ETS table) and sys.modules is per-interpreter. +%% All processes using the same interpreter share the same cached modules. import_multiprocess_test(_Config) -> Parent = self(), + %% Clear registry first + ok = py:clear_imports(), + %% Spawn 3 processes, each importing different modules + %% They all contribute to the same global registry Pid1 = spawn_link(fun() -> ok = py:import(json), ok = py:import(json, dumps), - {ok, Stats} = py:import_stats(), - {ok, List} = py:import_list(), - %% Verify we can use the cached import + %% Verify we can use the import {ok, _} = py:call(json, dumps, [[1,2,3]]), - Parent ! {self(), {Stats, List}} + Parent ! {self(), done} end), Pid2 = spawn_link(fun() -> ok = py:import(math), ok = py:import(math, sqrt), ok = py:import(math, floor), - {ok, Stats} = py:import_stats(), - {ok, List} = py:import_list(), - %% Verify we can use the cached import + %% Verify we can use the import {ok, _} = py:call(math, sqrt, [16.0]), - Parent ! {self(), {Stats, List}} + Parent ! {self(), done} end), Pid3 = spawn_link(fun() -> ok = py:import(os), ok = py:import(os, getcwd), ok = py:import(string), - {ok, Stats} = py:import_stats(), - {ok, List} = py:import_list(), - %% Verify we can use the cached import + %% Verify we can use the import {ok, _} = py:call(os, getcwd, []), - Parent ! {self(), {Stats, List}} + Parent ! {self(), done} end), %% Collect results from all 3 processes - Results = [receive {Pid, Result} -> {Pid, Result} after 5000 -> timeout end + Results = [receive {Pid, Result} -> Result after 5000 -> timeout end || Pid <- [Pid1, Pid2, Pid3]], %% Verify no timeouts ?assertEqual(false, lists:member(timeout, Results)), - %% Extract and verify each process's cache - [{Pid1, {Stats1, List1}}, {Pid2, {Stats2, List2}}, {Pid3, {Stats3, List3}}] = Results, + %% All processes completed successfully + ?assertEqual([done, done, done], Results), + + %% Now verify the GLOBAL registry has all entries + {ok, Stats} = py:import_stats(), + {ok, List} = py:import_list(), - %% Process 1: json + json.dumps = 2 entries - ?assertEqual(2, maps:get(count, Stats1)), - ?assert(maps:is_key(<<"json">>, List1)), - ?assertEqual([<<"dumps">>], maps:get(<<"json">>, List1)), + %% Total entries: json, json.dumps, math, math.sqrt, math.floor, os, os.getcwd, string = 8 + ?assertEqual(8, maps:get(count, Stats)), - %% Process 2: math + math.sqrt + math.floor = 3 entries - ?assertEqual(3, maps:get(count, Stats2)), - ?assert(maps:is_key(<<"math">>, List2)), - MathFuncs = maps:get(<<"math">>, List2), - ?assertEqual(2, length(MathFuncs)), + %% Verify all modules are in the shared registry + ?assert(maps:is_key(<<"json">>, List)), + ?assert(maps:is_key(<<"math">>, List)), + ?assert(maps:is_key(<<"os">>, List)), + ?assert(maps:is_key(<<"string">>, List)), + + %% Verify function entries + ?assert(lists:member(<<"dumps">>, maps:get(<<"json">>, List))), + MathFuncs = maps:get(<<"math">>, List), ?assert(lists:member(<<"sqrt">>, MathFuncs)), ?assert(lists:member(<<"floor">>, MathFuncs)), + ?assert(lists:member(<<"getcwd">>, maps:get(<<"os">>, List))), - %% Process 3: os + os.getcwd + string = 3 entries - ?assertEqual(3, maps:get(count, Stats3)), - ?assert(maps:is_key(<<"os">>, List3)), - ?assert(maps:is_key(<<"string">>, List3)), - ?assertEqual([<<"getcwd">>], maps:get(<<"os">>, List3)), - ?assertEqual([], maps:get(<<"string">>, List3)), - - ct:pal("Process 1 cache: ~p", [List1]), - ct:pal("Process 2 cache: ~p", [List2]), - ct:pal("Process 3 cache: ~p", [List3]). + ct:pal("Global registry after multiprocess imports: ~p", [List]). %% @doc Stress test with many concurrent processes importing simultaneously +%% +%% All processes contribute to the shared global registry. import_concurrent_stress_test(_Config) -> Parent = self(), NumProcesses = 20, Modules = [json, math, os, string, re, base64, collections, functools, itertools, operator], + %% Clear registry first + ok = py:clear_imports(), + %% Spawn many processes that all try to import at the same time Pids = [spawn_link(fun() -> %% Each process imports a random subset of modules @@ -333,14 +401,10 @@ import_concurrent_stress_test(_Config) -> %% All imports should succeed AllOk = lists:all(fun({_, R}) -> R =:= ok end, Results), - %% Get stats and verify - {ok, Stats} = py:import_stats(), - Count = maps:get(count, Stats), - - %% Make a call to verify cache works + %% Make a call to verify imports work CallResult = py:call(json, dumps, [[N]]), - Parent ! {self(), {AllOk, Count, CallResult}} + Parent ! {self(), {AllOk, CallResult}} end) || N <- lists:seq(1, NumProcesses)], %% Collect all results @@ -350,10 +414,468 @@ import_concurrent_stress_test(_Config) -> ?assertEqual(false, lists:member(timeout, Results)), %% Verify all processes succeeded - lists:foreach(fun({AllOk, Count, CallResult}) -> + lists:foreach(fun({AllOk, CallResult}) -> ?assertEqual(true, AllOk), - ?assert(Count >= 1), ?assertMatch({ok, _}, CallResult) end, Results), - ct:pal("All ~p processes completed successfully", [NumProcesses]). + %% Verify the global registry has all modules + {ok, Stats} = py:import_stats(), + Count = maps:get(count, Stats), + %% Should have all 10 modules (some may have been imported multiple times but ETS dedupes) + ?assertEqual(10, Count), + + ct:pal("All ~p processes completed successfully, ~p modules in registry", [NumProcesses, Count]). + +%% ============================================================================ +%% Import Registry Tests +%% ============================================================================ + +%% @doc Test that imports are added to the global registry +import_registry_test(_Config) -> + %% Clear any existing registry entries + ok = py:clear_imports(), + + %% Verify registry is empty + [] = py:get_imports(), + + %% Import a module + ok = py:import(json), + + %% Verify it's in the registry + Imports1 = py:get_imports(), + ?assert(lists:member({<<"json">>, all}, Imports1)), + + %% Import a function + ok = py:import(math, sqrt), + + %% Verify both are in the registry + Imports2 = py:get_imports(), + ?assert(lists:member({<<"json">>, all}, Imports2)), + ?assert(lists:member({<<"math">>, <<"sqrt">>}, Imports2)), + + ct:pal("Registry contents: ~p", [Imports2]). + +%% @doc Test that imports are automatically applied to new contexts +import_applied_to_new_context_test(_Config) -> + %% Clear and add an import + ok = py:clear_imports(), + ok = py:import(json), + + %% Create a new context + {ok, Ctx} = py_context:new(#{mode => auto}), + + %% The json module should already be cached in the new context + %% We can verify by calling a function from it + {ok, Result} = py_context:call(Ctx, json, dumps, [[1, 2, 3]], #{}), + ?assertEqual(<<"[1, 2, 3]">>, Result), + + %% Clean up + py_context:destroy(Ctx), + ok = py:clear_imports(). + +%% @doc Test removing a module from the registry +del_import_test(_Config) -> + %% Clear and add some imports + ok = py:clear_imports(), + ok = py:import(json), + ok = py:import(math), + ok = py:import(json, dumps), + + %% Verify they're in the registry + Imports1 = py:get_imports(), + ?assert(length(Imports1) >= 2), + + %% Remove json (should remove both json and json.dumps) + ok = py:del_import(json), + + %% Verify json entries are gone but math remains + Imports2 = py:get_imports(), + ?assertEqual(false, lists:member({<<"json">>, all}, Imports2)), + ?assertEqual(false, lists:member({<<"json">>, <<"dumps">>}, Imports2)), + ?assert(lists:member({<<"math">>, all}, Imports2)), + + ct:pal("Registry after del_import: ~p", [Imports2]). + +%% @doc Test removing a specific module/function from the registry +del_import_func_test(_Config) -> + %% Clear and add some imports + ok = py:clear_imports(), + ok = py:import(json), + ok = py:import(json, dumps), + ok = py:import(json, loads), + + %% Verify they're in the registry + Imports1 = py:get_imports(), + ?assert(lists:member({<<"json">>, all}, Imports1)), + ?assert(lists:member({<<"json">>, <<"dumps">>}, Imports1)), + ?assert(lists:member({<<"json">>, <<"loads">>}, Imports1)), + + %% Remove just json.dumps + ok = py:del_import(json, dumps), + + %% Verify dumps is gone but all and loads remain + Imports2 = py:get_imports(), + ?assert(lists:member({<<"json">>, all}, Imports2)), + ?assertEqual(false, lists:member({<<"json">>, <<"dumps">>}, Imports2)), + ?assert(lists:member({<<"json">>, <<"loads">>}, Imports2)), + + ct:pal("Registry after del_import/2: ~p", [Imports2]). + +%% @doc Test removing import and reimporting works correctly +%% +%% Verifies that: +%% 1. After importing, the module is available +%% 2. After removing and flushing, new subinterpreters don't have the module +%% 3. After reimporting, new subinterpreters have the module again +del_import_reimport_test(_Config) -> + %% Clear registry + ok = py:clear_imports(), + + %% Use 'plistlib' module - pure Python, obscure, not loaded by default + ok = py:import(plistlib), + ?assert(lists:member({<<"plistlib">>, all}, py:get_imports())), + + %% Create a subinterpreter and verify plistlib is available + {ok, Ctx1} = py_context:new(#{mode => owngil}), + {ok, true} = py_context:eval(Ctx1, <<"'plistlib' in __import__('sys').modules">>), + py_context:destroy(Ctx1), + + %% Remove from registry and flush all interpreters + ok = py:del_import(plistlib), + ok = py:flush_imports(), + ?assertEqual([], py:get_imports()), + + %% Create a new subinterpreter - plistlib should NOT be pre-imported + {ok, Ctx2} = py_context:new(#{mode => owngil}), + {ok, false} = py_context:eval(Ctx2, <<"'plistlib' in __import__('sys').modules">>), + py_context:destroy(Ctx2), + + %% Re-import plistlib + ok = py:import(plistlib), + ?assert(lists:member({<<"plistlib">>, all}, py:get_imports())), + + %% Create another subinterpreter - plistlib should be pre-imported again + {ok, Ctx3} = py_context:new(#{mode => owngil}), + {ok, true} = py_context:eval(Ctx3, <<"'plistlib' in __import__('sys').modules">>), + py_context:destroy(Ctx3), + + ct:pal("del_import + reimport test passed"). + +%% @doc Test clearing all imports from the registry +clear_imports_test(_Config) -> + %% Add some imports + ok = py:import(json), + ok = py:import(math), + ok = py:import(os), + + %% Verify they're in the registry + Imports1 = py:get_imports(), + ?assert(length(Imports1) >= 3), + + %% Clear all + ok = py:clear_imports(), + + %% Verify registry is empty + Imports2 = py:get_imports(), + ?assertEqual([], Imports2). + +%% @doc Test get_imports returns the correct format +get_imports_test(_Config) -> + %% Clear and add imports + ok = py:clear_imports(), + ok = py:import(json), + ok = py:import(math, sqrt), + + %% Get imports + Imports = py:get_imports(), + + %% Verify format + ?assert(is_list(Imports)), + + %% Check the entries + {_, JsonSpec} = lists:keyfind(<<"json">>, 1, Imports), + ?assertEqual(all, JsonSpec), + + {_, MathSpec} = lists:keyfind(<<"math">>, 1, Imports), + ?assertEqual(<<"sqrt">>, MathSpec), + + ct:pal("get_imports result: ~p", [Imports]). + +%% @doc Test that flush_imports clears the registry +flush_clears_registry_test(_Config) -> + %% Add some imports + ok = py:import(json), + ok = py:import(math), + + %% Verify they're in the registry + Imports1 = py:get_imports(), + ?assert(length(Imports1) >= 2), + + %% Flush imports (clears registry + interpreter caches) + ok = py:flush_imports(), + + %% Verify registry is empty + Imports2 = py:get_imports(), + ?assertEqual([], Imports2), + + %% Verify calls still work (modules are re-imported on demand) + {ok, _} = py:call(json, dumps, [[1]]), + {ok, _} = py:call(math, sqrt, [4.0]). + +%% ============================================================================ +%% Per-Interpreter Sharing Tests +%% ============================================================================ + +%% @doc Test that two contexts sharing the same interpreter see imported modules +%% +%% When we import a module via one context, other contexts using the same +%% interpreter (same subinterpreter pool slot or main interpreter) should +%% see the module in sys.modules. +shared_interpreter_import_test(_Config) -> + %% Clear registry + ok = py:clear_imports(), + + %% Create two worker-mode contexts (they share the main interpreter) + {ok, Ctx1} = py_context:new(#{mode => worker}), + {ok, Ctx2} = py_context:new(#{mode => worker}), + + %% Import a module via Ctx1 by calling it (this adds to sys.modules) + {ok, _} = py_context:call(Ctx1, json, dumps, [[1, 2, 3]], #{}), + + %% Now Ctx2 should be able to use json without re-importing + %% (it's already in sys.modules of the shared interpreter) + {ok, Result} = py_context:call(Ctx2, json, loads, [<<"[4, 5, 6]">>], #{}), + ?assertEqual([4, 5, 6], Result), + + %% Clean up + py_context:destroy(Ctx1), + py_context:destroy(Ctx2), + + ct:pal("Worker contexts successfully shared interpreter's sys.modules"). + +%% @doc Test that event loop pool workers see imports from py:import +%% +%% When py:import is called, it imports into the current interpreter. +%% Event loop pool workers using the main interpreter should see these imports. +event_loop_pool_import_test(_Config) -> + %% Clear registry + ok = py:clear_imports(), + + %% Import via py:import (goes to event loop pool's interpreter) + ok = py:import(collections), + + %% Verify we can use it via py:call (uses event loop pool) + {ok, Result} = py:call(collections, 'Counter', [[a, b, a, c, a, b]]), + ?assert(is_map(Result) orelse is_tuple(Result)), + + %% Import another module + ok = py:import(itertools), + + %% Use it + {ok, _} = py:call(itertools, chain, [[[1, 2], [3, 4]]]), + + ct:pal("Event loop pool imports working correctly"). + +%% @doc Test that spawn_task uses imported modules +%% +%% When modules are imported via py:import, spawn_task should be able +%% to use them since they're in the interpreter's sys.modules. +spawn_task_uses_import_test(_Config) -> + %% Clear registry + ok = py:clear_imports(), + + %% Import base64 module + ok = py:import(base64), + + %% Define a simple function that uses base64 + Code = <<" +def encode_test(data): + import base64 + return base64.b64encode(data.encode()).decode() +">>, + ok = py:exec(Code), + + %% Use spawn_task to call our function + %% First verify direct call works + {ok, Encoded} = py:call('__main__', encode_test, [<<"hello">>]), + ?assertEqual(<<"aGVsbG8=">>, Encoded), + + %% Now test via spawn_task (fire and forget, but module should be available) + py_event_loop_pool:spawn_task(base64, b64encode, [<<"test">>]), + + %% Give it time to execute + timer:sleep(100), + + ct:pal("spawn_task can use imported modules"). + +%% @doc Test that different subinterpreters are isolated +%% +%% OWN_GIL contexts each have their own interpreter, so imports in one +%% should NOT be visible in another (different sys.modules). +subinterp_isolation_test(_Config) -> + %% Skip if subinterpreters not supported + case py_nif:subinterp_supported() of + false -> + {skip, "Subinterpreters not supported"}; + true -> + %% Clear registry so new contexts don't get pre-imported modules + ok = py:clear_imports(), + + %% Create two OWN_GIL contexts (each has its own interpreter) + {ok, Ctx1} = py_context:new(#{mode => owngil}), + {ok, Ctx2} = py_context:new(#{mode => owngil}), + + %% Define a variable in Ctx1's __main__ + ok = py_context:exec(Ctx1, <<"test_var_isolation = 'ctx1_value'">>), + + %% Try to access it from Ctx2 - should fail (different interpreter) + Result = py_context:eval(Ctx2, <<"test_var_isolation">>), + case Result of + {error, _} -> + %% Expected - variable not defined in Ctx2 + ok; + {ok, <<"ctx1_value">>} -> + %% This would be wrong - isolation failed + ct:fail("Subinterpreter isolation failed - variable leaked between contexts") + end, + + %% Clean up + py_context:destroy(Ctx1), + py_context:destroy(Ctx2), + + ct:pal("Subinterpreter isolation verified - different interpreters are isolated") + end. + +%% @doc Test that registry imports are applied to new subinterpreter contexts +%% +%% When py:import is called, it adds to the registry. New contexts should +%% have these imports applied to their interpreter. +registry_applied_to_subinterp_test(_Config) -> + %% Skip if subinterpreters not supported + case py_nif:subinterp_supported() of + false -> + {skip, "Subinterpreters not supported"}; + true -> + %% Clear registry and add an import + ok = py:clear_imports(), + ok = py:import(uuid), + + %% Create a new subinterp context + {ok, Ctx} = py_context:new(#{mode => subinterp}), + + %% The uuid module should be available (applied from registry) + {ok, Result} = py_context:call(Ctx, uuid, uuid4, [], #{}), + ?assert(is_binary(Result) orelse is_list(Result)), + + %% Clean up + py_context:destroy(Ctx), + ok = py:clear_imports(), + + ct:pal("Registry imports successfully applied to new subinterpreter") + end. + +%% ============================================================================ +%% sys.modules Verification Tests +%% ============================================================================ + +%% @doc Test that py:import puts the module in sys.modules +%% +%% After calling py:import, the module should be in the interpreter's +%% sys.modules dictionary. We verify this by checking that calling +%% a function from the module works (which requires it to be imported). +import_in_sys_modules_test(_Config) -> + %% Clear registry + ok = py:clear_imports(), + + %% Import a module + ok = py:import(decimal), + + %% Verify the import worked by calling a function + {ok, _} = py:call(decimal, 'Decimal', [<<"3.14">>]), + + %% Now check sys.modules using the same process (important!) + %% We use exec to define a helper, then eval to check + ok = py:exec(<<" +import sys +_test_decimal_in_sys = 'decimal' in sys.modules +">>), + {ok, InSysModules} = py:eval(<<"_test_decimal_in_sys">>), + ?assertEqual(true, InSysModules), + + ct:pal("py:import correctly adds module to sys.modules"). + +%% @doc Test that ETS registry and sys.modules stay in sync +%% +%% The ETS registry tracks what should be imported, and sys.modules +%% contains the actual imported modules. +registry_import_in_sys_modules_test(_Config) -> + %% Clear registry + ok = py:clear_imports(), + + %% Add to registry and import + ok = py:import(fractions), + ok = py:import(statistics), + + %% Verify ETS registry has the entries + Registry = py:get_imports(), + ?assert(lists:member({<<"fractions">>, all}, Registry)), + ?assert(lists:member({<<"statistics">>, all}, Registry)), + + %% Use the modules to ensure they're imported + {ok, _} = py:call(fractions, 'Fraction', [1, 3]), + {ok, _} = py:call(statistics, mean, [[1, 2, 3, 4, 5]]), + + %% Verify both are in sys.modules by checking from Python + ok = py:exec(<<" +import sys +_fractions_in_sys = 'fractions' in sys.modules +_statistics_in_sys = 'statistics' in sys.modules +_sys_modules_keys = list(sys.modules.keys()) +">>), + + {ok, FractionsInSys} = py:eval(<<"_fractions_in_sys">>), + {ok, StatsInSys} = py:eval(<<"_statistics_in_sys">>), + ?assertEqual(true, FractionsInSys), + ?assertEqual(true, StatsInSys), + + %% Get the list of modules in sys.modules that match our registry + {ok, SysModulesList} = py:eval(<<"_sys_modules_keys">>), + ?assert(lists:member(<<"fractions">>, SysModulesList)), + ?assert(lists:member(<<"statistics">>, SysModulesList)), + + ct:pal("ETS registry and sys.modules are in sync"). + +%% @doc Test that context imports go to sys.modules of that interpreter +%% +%% When using py_context to import/call, the module should end up in +%% the interpreter's sys.modules. +context_import_in_sys_modules_test(_Config) -> + %% Clear registry + ok = py:clear_imports(), + + %% Create a context + {ok, Ctx} = py_context:new(#{mode => auto}), + + %% Call a function from a module (this imports it) + {ok, _} = py_context:call(Ctx, textwrap, fill, [<<"Hello world this is a test">>, 10], #{}), + + %% Check if textwrap is in sys.modules of this interpreter + %% Use exec then eval pattern for reliable checking + ok = py_context:exec(Ctx, <<" +import sys +_textwrap_in_sys = 'textwrap' in sys.modules +_sys_keys = list(sys.modules.keys()) +">>), + + {ok, InSysModules} = py_context:eval(Ctx, <<"_textwrap_in_sys">>), + ?assertEqual(true, InSysModules), + + %% Get the sys.modules keys to see what's imported + {ok, SysKeys} = py_context:eval(Ctx, <<"_sys_keys">>), + ?assert(lists:member(<<"textwrap">>, SysKeys)), + + %% Clean up + py_context:destroy(Ctx), + + ct:pal("Context imports correctly populate sys.modules"). diff --git a/test/py_import_owngil_SUITE.erl b/test/py_import_owngil_SUITE.erl index 887060f..071f541 100644 --- a/test/py_import_owngil_SUITE.erl +++ b/test/py_import_owngil_SUITE.erl @@ -64,10 +64,12 @@ groups() -> ]}]. init_per_suite(Config) -> + %% Start application first (loads NIF) + {ok, _} = application:ensure_all_started(erlang_python), + timer:sleep(500), + %% Then check if subinterpreters are supported case py_nif:subinterp_supported() of true -> - {ok, _} = application:ensure_all_started(erlang_python), - timer:sleep(500), Config; false -> {skip, "Requires Python 3.12+"} From 07fd49461af9fd5c23b37fd8ad530d4309a9b784 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 00:21:39 +0100 Subject: [PATCH 02/18] Simplify import flushing via subinterpreter replacement Replace del_import with generation-based pool slot recycling. When flush_imports is called, all pool slots are marked stale. When a stale slot's usage count drops to 0, the subinterpreter is destroyed and reinitialized with a fresh one. Changes: - Add usage_count, marked_stale, generation to subinterp_slot_t - Add subinterp_pool_acquire/release/flush_generation functions - Remove del_import/1,2 from py.erl (no longer needed) - Update flush_imports to use generation-based approach - Remove del_import tests from test suite --- c_src/py_nif.c | 40 ++++++- c_src/py_subinterp_pool.c | 195 ++++++++++++++++++++++++++++++++ c_src/py_subinterp_pool.h | 46 ++++++++ src/py.erl | 58 ++-------- src/py_nif.erl | 13 +++ test/py_import_SUITE.erl | 102 +---------------- test/py_import_owngil_SUITE.erl | 3 + 7 files changed, 310 insertions(+), 147 deletions(-) diff --git a/c_src/py_nif.c b/c_src/py_nif.c index 229ebd6..0cc8e7e 100644 --- a/c_src/py_nif.c +++ b/c_src/py_nif.c @@ -441,6 +441,9 @@ static void context_destructor(ErlNifEnv *env, void *obj) { ctx->globals = NULL; ctx->locals = NULL; + /* Release usage count - if slot is stale and count drops to 0, + * the subinterpreter will be destroyed automatically */ + subinterp_pool_release(ctx->pool_slot); subinterp_pool_free(ctx->pool_slot); ctx->pool_slot = -1; ctx->destroyed = true; @@ -4157,9 +4160,13 @@ static ERL_NIF_TERM nif_context_create(ErlNifEnv *env, int argc, const ERL_NIF_T ctx->pool_slot = slot; + /* Acquire usage count for the pool slot */ + subinterp_pool_acquire(slot); + /* Get the pool slot for interpreter access */ subinterp_slot_t *pool_slot = subinterp_pool_get(slot); if (pool_slot == NULL || !pool_slot->initialized) { + subinterp_pool_release(slot); /* Release usage count */ subinterp_pool_free(slot); close(ctx->callback_pipe[0]); close(ctx->callback_pipe[1]); @@ -4183,6 +4190,7 @@ static ERL_NIF_TERM nif_context_create(ErlNifEnv *env, int argc, const ERL_NIF_T Py_XDECREF(ctx->module_cache); PyThreadState_Swap(saved); PyGILState_Release(gstate); + subinterp_pool_release(slot); /* Release usage count */ subinterp_pool_free(slot); close(ctx->callback_pipe[0]); close(ctx->callback_pipe[1]); @@ -4313,7 +4321,11 @@ static ERL_NIF_TERM nif_context_destroy(ErlNifEnv *env, int argc, const ERL_NIF_ ctx->locals = NULL; ctx->module_cache = NULL; - /* Release the pool slot back to the pool */ + /* Release the pool slot back to the pool. + * subinterp_pool_release decrements usage count and may destroy + * the subinterpreter if it's marked stale and count drops to 0. + * subinterp_pool_free marks the slot as available for new contexts. */ + subinterp_pool_release(ctx->pool_slot); subinterp_pool_free(ctx->pool_slot); ctx->pool_slot = -1; @@ -5074,6 +5086,31 @@ static ERL_NIF_TERM nif_interp_flush_imports(ErlNifEnv *env, int argc, const ERL return ATOM_OK; } +/** + * @brief Flush import generation and mark all pool slots stale + * + * nif_subinterp_pool_flush_generation() -> {ok, NewGeneration} + * + * Increments the global import generation counter and marks all initialized + * pool slots as stale. When a stale slot's usage count drops to 0, the + * subinterpreter is automatically destroyed and the slot becomes available + * for a fresh subinterpreter. + * + * This enables flush_imports to work by replacing subinterpreters rather + * than trying to manipulate sys.modules directly (which can crash C extensions). + */ +static ERL_NIF_TERM nif_subinterp_pool_flush_generation(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + (void)argc; + (void)argv; + +#ifdef HAVE_SUBINTERPRETERS + uint64_t new_gen = subinterp_pool_flush_generation(); + return enif_make_tuple2(env, ATOM_OK, enif_make_uint64(env, new_gen)); +#else + return enif_make_tuple2(env, ATOM_OK, enif_make_uint64(env, 0)); +#endif +} + /** * @brief Execute Python statements using a process-local environment * @@ -7205,6 +7242,7 @@ static ErlNifFunc nif_funcs[] = { {"create_local_env", 1, nif_create_local_env, 0}, {"interp_apply_imports", 2, nif_interp_apply_imports, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"interp_flush_imports", 2, nif_interp_flush_imports, ERL_NIF_DIRTY_JOB_CPU_BOUND}, + {"subinterp_pool_flush_generation", 0, nif_subinterp_pool_flush_generation, 0}, {"context_call_method", 4, nif_context_call_method, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"context_to_term", 1, nif_context_to_term, 0}, {"context_interp_id", 1, nif_context_interp_id, 0}, diff --git a/c_src/py_subinterp_pool.c b/c_src/py_subinterp_pool.c index 41b9875..9431948 100644 --- a/c_src/py_subinterp_pool.c +++ b/c_src/py_subinterp_pool.c @@ -47,6 +47,9 @@ static int g_pool_size = 0; /** @brief Pool initialized flag */ static _Atomic bool g_pool_initialized = false; +/** @brief Global import generation counter */ +static _Atomic uint64_t g_import_generation = 0; + /* ============================================================================ * Forward declarations for functions defined in other modules * ============================================================================ */ @@ -196,6 +199,9 @@ int subinterp_pool_init(int size) { } slot->initialized = true; + atomic_store(&slot->usage_count, 0); + atomic_store(&slot->marked_stale, false); + slot->generation = atomic_load(&g_import_generation); /* Swap back to main thread state before creating next subinterpreter */ PyThreadState_Swap(main_tstate); @@ -345,4 +351,193 @@ bool subinterp_pool_is_initialized(void) { return atomic_load(&g_pool_initialized); } +void subinterp_pool_acquire(int slot) { + if (slot < 0 || slot >= g_pool_size) { + return; + } + + subinterp_slot_t *s = &g_subinterp_pool[slot]; + if (!s->initialized) { + return; + } + + atomic_fetch_add(&s->usage_count, 1); +} + +/** + * @brief Reinitialize a pool slot with a fresh subinterpreter + * + * Called after destroying a stale subinterpreter to create a fresh one. + * Assumes GIL is already held. + * + * @param slot_idx Slot index + * @return 0 on success, -1 on failure + */ +static int subinterp_pool_reinit_slot(int slot_idx) { + subinterp_slot_t *slot = &g_subinterp_pool[slot_idx]; + + /* Save main thread state */ + PyThreadState *main_tstate = PyThreadState_Get(); + + /* Configure subinterpreter with SHARED GIL */ + PyInterpreterConfig config = { + .use_main_obmalloc = 0, + .allow_fork = 0, + .allow_exec = 0, + .allow_threads = 1, + .allow_daemon_threads = 0, + .check_multi_interp_extensions = 1, + .gil = PyInterpreterConfig_SHARED_GIL, + }; + + PyThreadState *tstate = NULL; + PyStatus status = Py_NewInterpreterFromConfig(&tstate, &config); + + if (PyStatus_Exception(status) || tstate == NULL) { + fprintf(stderr, "subinterp_pool_reinit_slot: failed to create subinterp for slot %d\n", slot_idx); + PyThreadState_Swap(main_tstate); + return -1; + } + + /* tstate is now the current thread state for the new interpreter */ + slot->interp = PyThreadState_GetInterpreter(tstate); + slot->tstate = tstate; + + /* Initialize globals/locals */ + slot->globals = PyDict_New(); + slot->locals = PyDict_New(); + slot->module_cache = PyDict_New(); + + if (slot->globals == NULL || slot->locals == NULL || slot->module_cache == NULL) { + Py_XDECREF(slot->module_cache); + Py_XDECREF(slot->globals); + Py_XDECREF(slot->locals); + Py_EndInterpreter(tstate); + PyThreadState_Swap(main_tstate); + return -1; + } + + /* Import __builtins__ into globals */ + PyObject *builtins = PyEval_GetBuiltins(); + PyDict_SetItemString(slot->globals, "__builtins__", builtins); + + /* Create erlang module in this subinterpreter */ + if (create_erlang_module() >= 0) { + /* Register ReactorBuffer and PyBuffer */ + ReactorBuffer_register_with_reactor(); + PyBuffer_register_with_module(); + + /* Import erlang module into globals */ + PyObject *erlang_module = PyImport_ImportModule("erlang"); + if (erlang_module != NULL) { + PyDict_SetItemString(slot->globals, "erlang", erlang_module); + Py_DECREF(erlang_module); + } + } + + /* Initialize event loop for this subinterpreter */ + init_subinterpreter_event_loop(NULL); + + slot->initialized = true; + atomic_store(&slot->usage_count, 0); + atomic_store(&slot->marked_stale, false); + slot->generation = atomic_load(&g_import_generation); + + /* Swap back to main thread state */ + PyThreadState_Swap(main_tstate); + +#ifdef DEBUG + fprintf(stderr, "subinterp_pool_reinit_slot: reinitialized slot %d\n", slot_idx); +#endif + + return 0; +} + +void subinterp_pool_release(int slot) { + if (slot < 0 || slot >= g_pool_size) { + return; + } + + subinterp_slot_t *s = &g_subinterp_pool[slot]; + if (!s->initialized) { + return; + } + + int prev_count = atomic_fetch_sub(&s->usage_count, 1); + + /* If usage drops to 0 and slot is marked stale, destroy and reinitialize + * the subinterpreter. We need the GIL to safely do Python operations. */ + if (prev_count == 1 && atomic_load(&s->marked_stale)) { + /* Acquire the GIL */ + PyGILState_STATE gstate = PyGILState_Ensure(); + + /* Save current thread state and swap to the subinterpreter */ + PyThreadState *saved = PyThreadState_Swap(s->tstate); + + /* Clean up Python objects */ + Py_XDECREF(s->module_cache); + Py_XDECREF(s->globals); + Py_XDECREF(s->locals); + + /* End the interpreter - this frees s->tstate */ + Py_EndInterpreter(s->tstate); + + /* Clear slot state */ + s->interp = NULL; + s->tstate = NULL; + s->globals = NULL; + s->locals = NULL; + s->module_cache = NULL; + s->initialized = false; + atomic_store(&s->marked_stale, false); + + /* Swap back to saved thread state (may be NULL if we didn't have one) */ + PyThreadState_Swap(saved); + + /* Reinitialize the slot with a fresh subinterpreter */ + if (subinterp_pool_reinit_slot(slot) < 0) { + /* Reinit failed - clear allocation bit so slot is skipped */ + uint64_t mask = ~(1ULL << slot); + atomic_fetch_and(&g_pool_allocation, mask); + fprintf(stderr, "subinterp_pool_release: failed to reinit slot %d\n", slot); + } + /* If reinit succeeded, slot stays allocated but now has fresh interp */ + + /* Release the GIL */ + PyGILState_Release(gstate); + +#ifdef DEBUG + fprintf(stderr, "subinterp_pool_release: recycled stale slot %d\n", slot); +#endif + } +} + +uint64_t subinterp_pool_flush_generation(void) { + if (!atomic_load(&g_pool_initialized)) { + return 0; + } + + /* Increment generation */ + uint64_t new_gen = atomic_fetch_add(&g_import_generation, 1) + 1; + + /* Mark all initialized slots as stale */ + for (int i = 0; i < g_pool_size; i++) { + subinterp_slot_t *slot = &g_subinterp_pool[i]; + if (slot->initialized) { + atomic_store(&slot->marked_stale, true); + } + } + +#ifdef DEBUG + fprintf(stderr, "subinterp_pool_flush_generation: incremented to %llu, marked all slots stale\n", + (unsigned long long)new_gen); +#endif + + return new_gen; +} + +uint64_t subinterp_pool_get_generation(void) { + return atomic_load(&g_import_generation); +} + #endif /* HAVE_SUBINTERPRETERS */ diff --git a/c_src/py_subinterp_pool.h b/c_src/py_subinterp_pool.h index 35ee917..bd00a38 100644 --- a/c_src/py_subinterp_pool.h +++ b/c_src/py_subinterp_pool.h @@ -84,6 +84,15 @@ typedef struct { /** @brief Whether this slot is initialized and ready for use */ bool initialized; + + /** @brief Number of contexts currently using this slot */ + _Atomic int usage_count; + + /** @brief Marked for destruction when usage_count reaches 0 */ + _Atomic bool marked_stale; + + /** @brief Import generation when slot was created */ + uint64_t generation; } subinterp_slot_t; /** @@ -163,6 +172,43 @@ void subinterp_pool_shutdown(void); */ bool subinterp_pool_is_initialized(void); +/** + * @brief Increment usage count for a slot + * + * Called when a context starts using a pool slot. + * + * @param slot Slot index + */ +void subinterp_pool_acquire(int slot); + +/** + * @brief Decrement usage count for a slot + * + * Called when a context stops using a pool slot. + * If the slot is marked stale and usage_count reaches 0, + * the subinterpreter is destroyed and the slot is cleared. + * + * @param slot Slot index + */ +void subinterp_pool_release(int slot); + +/** + * @brief Increment import generation and mark all pool slots stale + * + * Called by flush_imports to trigger subinterpreter replacement. + * Stale slots will be destroyed when their usage_count reaches 0. + * + * @return The new generation number + */ +uint64_t subinterp_pool_flush_generation(void); + +/** + * @brief Get the current import generation + * + * @return Current generation number + */ +uint64_t subinterp_pool_get_generation(void); + #endif /* HAVE_SUBINTERPRETERS */ #endif /* PY_SUBINTERP_POOL_H */ diff --git a/src/py.erl b/src/py.erl index 8a15d8e..ed1f139 100644 --- a/src/py.erl +++ b/src/py.erl @@ -65,8 +65,6 @@ %% Import registry (global list applied to all interpreters) init_import_registry/0, get_imports/0, - del_import/1, - del_import/2, clear_imports/0, version/0, memory_stats/0, @@ -450,41 +448,6 @@ get_imports() -> _ -> ets:tab2list(?IMPORT_REGISTRY) end. -%% @doc Remove a module from the global import registry. -%% -%% Removes all entries for the specified module from the registry. -%% Does not affect already-running interpreters. -%% -%% @param Module Python module name -%% @returns ok --spec del_import(py_module()) -> ok. -del_import(Module) -> - ModuleBin = ensure_binary(Module), - case ets:info(?IMPORT_REGISTRY) of - undefined -> ok; - _ -> ets:match_delete(?IMPORT_REGISTRY, {ModuleBin, '_'}) - end, - ok. - -%% @doc Remove a specific module/function from the global import registry. -%% -%% Removes the specific entry from the registry. -%% Does not affect already-running interpreters. -%% -%% @param Module Python module name -%% @param Func Function name -%% @returns ok --spec del_import(py_module(), py_func()) -> ok. -del_import(Module, Func) -> - ModuleBin = ensure_binary(Module), - FuncBin = ensure_binary(Func), - case ets:info(?IMPORT_REGISTRY) of - undefined -> ok; - %% Use delete_object to delete the specific tuple (not all objects with same key) - _ -> ets:delete_object(?IMPORT_REGISTRY, {ModuleBin, FuncBin}) - end, - ok. - %% @doc Clear all registered imports from the global registry. %% %% Removes all entries from the registry. @@ -499,22 +462,25 @@ clear_imports() -> end, ok. -%% @doc Flush import caches from all interpreters and clear the registry. +%% @doc Flush import caches and trigger subinterpreter replacement. +%% +%% Clears the global import registry and marks all shared-GIL pool +%% subinterpreters as stale. When existing contexts using those +%% subinterpreters are destroyed, Python GC handles cleanup via +%% Py_EndInterpreter. New contexts will get fresh subinterpreters +%% with the updated import registry. %% -%% Clears the global import registry and flushes the module/function cache -%% in all running interpreters. Use this after modifying Python modules -%% on disk to force re-import. +%% OWN_GIL contexts each have their own subinterpreter which is +%% destroyed when the context is destroyed. %% %% @returns ok -spec flush_imports() -> ok. flush_imports() -> - %% Get the list of modules BEFORE clearing - Imports = get_imports(), - Modules = lists:usort([M || {M, _} <- Imports]), %% Clear the global registry clear_imports(), - %% Remove modules from sys.modules in all interpreters - py_context_router:flush_all_imports(Modules), + %% Mark all pool slots as stale - they will be destroyed when + %% usage count drops to 0 (all contexts using them are destroyed) + py_nif:subinterp_pool_flush_generation(), ok. %% @doc Get import registry statistics. diff --git a/src/py_nif.erl b/src/py_nif.erl index f3abe97..fdc6d1b 100644 --- a/src/py_nif.erl +++ b/src/py_nif.erl @@ -115,6 +115,7 @@ %% Per-interpreter import caching NIFs interp_apply_imports/2, interp_flush_imports/2, + subinterp_pool_flush_generation/0, add_reader/3, remove_reader/2, add_writer/3, @@ -877,6 +878,18 @@ interp_apply_imports(_Ref, _Imports) -> interp_flush_imports(_Ref, _Modules) -> ?NIF_STUB. +%% @doc Flush import generation and mark all pool slots stale. +%% +%% Increments the global import generation counter and marks all initialized +%% pool slots as stale. When a stale slot's usage count drops to 0, the +%% subinterpreter is automatically destroyed. New contexts will get fresh +%% subinterpreters with updated imports from the registry. +%% +%% @returns {ok, NewGeneration} +-spec subinterp_pool_flush_generation() -> {ok, non_neg_integer()}. +subinterp_pool_flush_generation() -> + ?NIF_STUB. + %% @doc Register a file descriptor for read monitoring. %% Uses enif_select to register with the Erlang scheduler. -spec add_reader(reference(), integer(), non_neg_integer()) -> diff --git a/test/py_import_SUITE.erl b/test/py_import_SUITE.erl index 9b08634..c51ee01 100644 --- a/test/py_import_SUITE.erl +++ b/test/py_import_SUITE.erl @@ -31,9 +31,6 @@ %% Import registry tests import_registry_test/1, import_applied_to_new_context_test/1, - del_import_test/1, - del_import_func_test/1, - del_import_reimport_test/1, clear_imports_test/1, get_imports_test/1, flush_clears_registry_test/1, @@ -69,9 +66,6 @@ groups() -> %% Import registry tests import_registry_test, import_applied_to_new_context_test, - del_import_test, - del_import_func_test, - del_import_reimport_test, clear_imports_test, get_imports_test, flush_clears_registry_test, @@ -93,6 +87,8 @@ init_per_suite(Config) -> Config. end_per_suite(_Config) -> + %% Clean up imports to avoid affecting subsequent test suites + py:clear_imports(), ok. init_per_group(_Group, Config) -> @@ -160,9 +156,6 @@ import_nonexistent_module_test(_Config) -> {error, Reason} = py:call(nonexistent_module_xyz, some_func, []), ?assert(is_tuple(Reason) orelse is_list(Reason) orelse is_binary(Reason) orelse is_atom(Reason)), - %% Clean up - ok = py:del_import(nonexistent_module_xyz), - ct:pal("Nonexistent module error at call time: ~p", [Reason]). %% @doc Test importing with nonexistent function name still imports the module @@ -181,9 +174,6 @@ import_nonexistent_function_test(_Config) -> %% But calling the nonexistent function will fail {error, _Reason} = py:call(json, nonexistent_function_xyz, []), - %% Clean up the registry entry - ok = py:del_import(json, nonexistent_function_xyz), - ct:pal("Import with invalid function succeeds (validation at call time)"). %% @doc Test that importing same module/function twice is idempotent @@ -474,94 +464,6 @@ import_applied_to_new_context_test(_Config) -> py_context:destroy(Ctx), ok = py:clear_imports(). -%% @doc Test removing a module from the registry -del_import_test(_Config) -> - %% Clear and add some imports - ok = py:clear_imports(), - ok = py:import(json), - ok = py:import(math), - ok = py:import(json, dumps), - - %% Verify they're in the registry - Imports1 = py:get_imports(), - ?assert(length(Imports1) >= 2), - - %% Remove json (should remove both json and json.dumps) - ok = py:del_import(json), - - %% Verify json entries are gone but math remains - Imports2 = py:get_imports(), - ?assertEqual(false, lists:member({<<"json">>, all}, Imports2)), - ?assertEqual(false, lists:member({<<"json">>, <<"dumps">>}, Imports2)), - ?assert(lists:member({<<"math">>, all}, Imports2)), - - ct:pal("Registry after del_import: ~p", [Imports2]). - -%% @doc Test removing a specific module/function from the registry -del_import_func_test(_Config) -> - %% Clear and add some imports - ok = py:clear_imports(), - ok = py:import(json), - ok = py:import(json, dumps), - ok = py:import(json, loads), - - %% Verify they're in the registry - Imports1 = py:get_imports(), - ?assert(lists:member({<<"json">>, all}, Imports1)), - ?assert(lists:member({<<"json">>, <<"dumps">>}, Imports1)), - ?assert(lists:member({<<"json">>, <<"loads">>}, Imports1)), - - %% Remove just json.dumps - ok = py:del_import(json, dumps), - - %% Verify dumps is gone but all and loads remain - Imports2 = py:get_imports(), - ?assert(lists:member({<<"json">>, all}, Imports2)), - ?assertEqual(false, lists:member({<<"json">>, <<"dumps">>}, Imports2)), - ?assert(lists:member({<<"json">>, <<"loads">>}, Imports2)), - - ct:pal("Registry after del_import/2: ~p", [Imports2]). - -%% @doc Test removing import and reimporting works correctly -%% -%% Verifies that: -%% 1. After importing, the module is available -%% 2. After removing and flushing, new subinterpreters don't have the module -%% 3. After reimporting, new subinterpreters have the module again -del_import_reimport_test(_Config) -> - %% Clear registry - ok = py:clear_imports(), - - %% Use 'plistlib' module - pure Python, obscure, not loaded by default - ok = py:import(plistlib), - ?assert(lists:member({<<"plistlib">>, all}, py:get_imports())), - - %% Create a subinterpreter and verify plistlib is available - {ok, Ctx1} = py_context:new(#{mode => owngil}), - {ok, true} = py_context:eval(Ctx1, <<"'plistlib' in __import__('sys').modules">>), - py_context:destroy(Ctx1), - - %% Remove from registry and flush all interpreters - ok = py:del_import(plistlib), - ok = py:flush_imports(), - ?assertEqual([], py:get_imports()), - - %% Create a new subinterpreter - plistlib should NOT be pre-imported - {ok, Ctx2} = py_context:new(#{mode => owngil}), - {ok, false} = py_context:eval(Ctx2, <<"'plistlib' in __import__('sys').modules">>), - py_context:destroy(Ctx2), - - %% Re-import plistlib - ok = py:import(plistlib), - ?assert(lists:member({<<"plistlib">>, all}, py:get_imports())), - - %% Create another subinterpreter - plistlib should be pre-imported again - {ok, Ctx3} = py_context:new(#{mode => owngil}), - {ok, true} = py_context:eval(Ctx3, <<"'plistlib' in __import__('sys').modules">>), - py_context:destroy(Ctx3), - - ct:pal("del_import + reimport test passed"). - %% @doc Test clearing all imports from the registry clear_imports_test(_Config) -> %% Add some imports diff --git a/test/py_import_owngil_SUITE.erl b/test/py_import_owngil_SUITE.erl index 071f541..835da48 100644 --- a/test/py_import_owngil_SUITE.erl +++ b/test/py_import_owngil_SUITE.erl @@ -67,6 +67,9 @@ init_per_suite(Config) -> %% Start application first (loads NIF) {ok, _} = application:ensure_all_started(erlang_python), timer:sleep(500), + %% Clear any imports from previous test suites to avoid + %% importing C extensions that crash in OWN_GIL subinterpreters + py:clear_imports(), %% Then check if subinterpreters are supported case py_nif:subinterp_supported() of true -> From 87032faf39f3d99901ad518fe9f82b46e0abeac8 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 00:31:17 +0100 Subject: [PATCH 03/18] Add cache generation tracking for main interpreter contexts flush_imports now works correctly for all context types: - Subinterpreter pool: slots marked stale and replaced - OWN_GIL: each has own subinterpreter destroyed with context - Main interpreter: cache generation checked on module access Added unconditional import_cache_get_generation() and import_cache_flush_generation() functions that work regardless of subinterpreter support. --- c_src/py_nif.c | 30 +++++++++++++++++++++++++----- c_src/py_nif.h | 3 +++ c_src/py_subinterp_pool.c | 26 ++++++++++++++++++++++++++ c_src/py_subinterp_pool.h | 20 ++++++++++++++++++++ 4 files changed, 74 insertions(+), 5 deletions(-) diff --git a/c_src/py_nif.c b/c_src/py_nif.c index 0cc8e7e..8f1a3ba 100644 --- a/c_src/py_nif.c +++ b/c_src/py_nif.c @@ -4124,6 +4124,7 @@ static ERL_NIF_TERM nif_context_create(ErlNifEnv *env, int argc, const ERL_NIF_T ctx->globals = NULL; ctx->locals = NULL; ctx->module_cache = NULL; + ctx->cache_generation = 0; /* Create callback pipe for blocking callback responses */ if (pipe(ctx->callback_pipe) < 0) { @@ -4236,6 +4237,7 @@ static ERL_NIF_TERM nif_context_create(ErlNifEnv *env, int argc, const ERL_NIF_T ctx->globals = PyDict_New(); ctx->locals = PyDict_New(); ctx->module_cache = PyDict_New(); + ctx->cache_generation = import_cache_get_generation(); /* Import __builtins__ into globals */ PyObject *builtins = PyEval_GetBuiltins(); @@ -4368,6 +4370,25 @@ static ERL_NIF_TERM nif_context_destroy(ErlNifEnv *env, int argc, const ERL_NIF_ * Helper function - no mutex needed since context is process-owned. */ static PyObject *context_get_module(py_context_t *ctx, const char *module_name) { + /* Check for stale cache (main interpreter contexts only) */ + if (ctx->module_cache != NULL) { +#ifdef HAVE_SUBINTERPRETERS + /* Pool-backed contexts get fresh interpreters on flush, so only check + * for non-pool contexts (main interpreter or OWN_GIL) */ + bool is_main_interp = (ctx->pool_slot < 0 && !ctx->uses_own_gil); +#else + bool is_main_interp = true; /* All contexts use main interpreter */ +#endif + if (is_main_interp) { + uint64_t current_gen = import_cache_get_generation(); + if (ctx->cache_generation != current_gen) { + /* Cache is stale - clear it and update generation */ + PyDict_Clear(ctx->module_cache); + ctx->cache_generation = current_gen; + } + } + } + /* Check cache first */ if (ctx->module_cache != NULL) { PyObject *cached = PyDict_GetItemString(ctx->module_cache, module_name); @@ -5103,12 +5124,11 @@ static ERL_NIF_TERM nif_subinterp_pool_flush_generation(ErlNifEnv *env, int argc (void)argc; (void)argv; -#ifdef HAVE_SUBINTERPRETERS - uint64_t new_gen = subinterp_pool_flush_generation(); + /* Use the unconditional version that works with or without subinterpreters. + * This increments the generation counter (used by main interpreter contexts) + * and also marks pool slots stale when subinterpreters are available. */ + uint64_t new_gen = import_cache_flush_generation(); return enif_make_tuple2(env, ATOM_OK, enif_make_uint64(env, new_gen)); -#else - return enif_make_tuple2(env, ATOM_OK, enif_make_uint64(env, 0)); -#endif } /** diff --git a/c_src/py_nif.h b/c_src/py_nif.h index f287aa7..471dc6e 100644 --- a/c_src/py_nif.h +++ b/c_src/py_nif.h @@ -888,6 +888,9 @@ typedef struct { /** @brief Module cache (Dict: module_name -> PyModule) */ PyObject *module_cache; + + /** @brief Cache generation for staleness detection (main interpreter contexts) */ + uint64_t cache_generation; } py_context_t; /* ============================================================================ diff --git a/c_src/py_subinterp_pool.c b/c_src/py_subinterp_pool.c index 9431948..309cbed 100644 --- a/c_src/py_subinterp_pool.c +++ b/c_src/py_subinterp_pool.c @@ -540,4 +540,30 @@ uint64_t subinterp_pool_get_generation(void) { return atomic_load(&g_import_generation); } +#else /* !HAVE_SUBINTERPRETERS */ + +/* For systems without subinterpreter support, we still need the generation + * counter for main interpreter context cache invalidation */ +static _Atomic uint64_t g_import_generation_no_subinterp = 0; + #endif /* HAVE_SUBINTERPRETERS */ + +/* ============================================================================ + * Unconditional generation functions (work with or without subinterpreters) + * ============================================================================ */ + +uint64_t import_cache_get_generation(void) { +#ifdef HAVE_SUBINTERPRETERS + return atomic_load(&g_import_generation); +#else + return atomic_load(&g_import_generation_no_subinterp); +#endif +} + +uint64_t import_cache_flush_generation(void) { +#ifdef HAVE_SUBINTERPRETERS + return subinterp_pool_flush_generation(); +#else + return atomic_fetch_add(&g_import_generation_no_subinterp, 1) + 1; +#endif +} diff --git a/c_src/py_subinterp_pool.h b/c_src/py_subinterp_pool.h index bd00a38..75dde6b 100644 --- a/c_src/py_subinterp_pool.h +++ b/c_src/py_subinterp_pool.h @@ -211,4 +211,24 @@ uint64_t subinterp_pool_get_generation(void); #endif /* HAVE_SUBINTERPRETERS */ +/** + * @brief Get the current import generation (always available) + * + * This function is available regardless of subinterpreter support. + * It's used by main interpreter contexts to detect cache staleness. + * + * @return Current generation number (0 if never flushed) + */ +uint64_t import_cache_get_generation(void); + +/** + * @brief Increment the import generation counter (always available) + * + * This function is available regardless of subinterpreter support. + * For systems with subinterpreters, it also marks pool slots stale. + * + * @return The new generation number + */ +uint64_t import_cache_flush_generation(void); + #endif /* PY_SUBINTERP_POOL_H */ From 26f11b99311f3fc0d40b7755f68290614288b03b Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 02:00:35 +0100 Subject: [PATCH 04/18] Remove flush_imports - module unloading not supported Removed flush_imports/0 and all related flush functionality: - py:flush_imports/0 - py_context:flush_imports/2 - py_nif:interp_flush_imports/2 - py_nif:subinterp_pool_flush_generation/0 - Cache generation tracking in py_context_t Import caching via py:import/1,2 remains fully functional. Module imports are cached per-interpreter and persist for the lifetime of the interpreter. --- c_src/py_nif.c | 198 -------------------------------- c_src/py_nif.h | 6 +- c_src/py_subinterp_pool.c | 26 ----- c_src/py_subinterp_pool.h | 20 ---- src/py.erl | 22 ---- src/py_context.erl | 28 +---- src/py_nif.erl | 26 ----- test/py_import_SUITE.erl | 58 +--------- test/py_import_owngil_SUITE.erl | 22 +--- 9 files changed, 9 insertions(+), 397 deletions(-) diff --git a/c_src/py_nif.c b/c_src/py_nif.c index 8f1a3ba..8adca7e 100644 --- a/c_src/py_nif.c +++ b/c_src/py_nif.c @@ -3224,43 +3224,6 @@ static void owngil_execute_apply_imports(py_context_t *ctx) { ctx->response_ok = true; } -/** - * @brief Execute flush_imports in OWN_GIL context - * - * Clears the per-context module cache optimization layer. - * Does NOT clear sys.modules (that would break Python). - */ -static void owngil_execute_flush_imports(py_context_t *ctx) { - /* Remove specified modules from sys.modules */ - PyObject *sys_modules = PyImport_GetModuleDict(); /* Borrowed ref */ - if (sys_modules != NULL) { - ERL_NIF_TERM head, tail = ctx->request_data; - while (enif_get_list_cell(ctx->shared_env, tail, &head, &tail)) { - ErlNifBinary mod_bin; - if (enif_inspect_binary(ctx->shared_env, head, &mod_bin)) { - char *mod_name = enif_alloc(mod_bin.size + 1); - if (mod_name != NULL) { - memcpy(mod_name, mod_bin.data, mod_bin.size); - mod_name[mod_bin.size] = '\0'; - /* Remove module from sys.modules if present */ - if (PyDict_GetItemString(sys_modules, mod_name) != NULL) { - PyDict_DelItemString(sys_modules, mod_name); - } - enif_free(mod_name); - } - } - } - } - - /* Clear the per-context optimization cache if it exists */ - if (ctx->module_cache != NULL) { - PyDict_Clear(ctx->module_cache); - } - - ctx->response_term = enif_make_atom(ctx->shared_env, "ok"); - ctx->response_ok = true; -} - /** * @brief Execute a request based on its type */ @@ -3299,9 +3262,6 @@ static void owngil_execute_request(py_context_t *ctx) { case CTX_REQ_APPLY_IMPORTS: owngil_execute_apply_imports(ctx); break; - case CTX_REQ_FLUSH_IMPORTS: - owngil_execute_flush_imports(ctx); - break; default: ctx->response_term = enif_make_tuple2(ctx->shared_env, enif_make_atom(ctx->shared_env, "error"), @@ -3911,50 +3871,6 @@ static ERL_NIF_TERM dispatch_apply_imports_to_owngil( return result; } -/** - * @brief Dispatch flush_imports to OWN_GIL worker thread - * - * @param env NIF environment - * @param ctx Context resource - * @param modules_list List of module names to remove from sys.modules - * @return ok - */ -static ERL_NIF_TERM dispatch_flush_imports_to_owngil( - ErlNifEnv *env, py_context_t *ctx, ERL_NIF_TERM modules_list -) { - if (!atomic_load(&ctx->thread_running)) { - return make_error(env, "thread_not_running"); - } - - pthread_mutex_lock(&ctx->request_mutex); - - enif_clear_env(ctx->shared_env); - ctx->request_type = CTX_REQ_FLUSH_IMPORTS; - ctx->request_data = enif_make_copy(ctx->shared_env, modules_list); - - pthread_cond_signal(&ctx->request_ready); - - /* Wait for response with timeout */ - struct timespec deadline; - clock_gettime(CLOCK_REALTIME, &deadline); - deadline.tv_sec += OWNGIL_DISPATCH_TIMEOUT_SECS; - - while (ctx->request_type != CTX_REQ_NONE) { - int rc = pthread_cond_timedwait(&ctx->response_ready, &ctx->request_mutex, &deadline); - if (rc == ETIMEDOUT) { - atomic_store(&ctx->thread_running, false); - pthread_mutex_unlock(&ctx->request_mutex); - fprintf(stderr, "OWN_GIL flush_imports dispatch timeout: worker thread unresponsive\n"); - return make_error(env, "worker_timeout"); - } - } - - ERL_NIF_TERM result = enif_make_copy(env, ctx->response_term); - pthread_mutex_unlock(&ctx->request_mutex); - - return result; -} - #endif /* HAVE_SUBINTERPRETERS */ /** @@ -4124,7 +4040,6 @@ static ERL_NIF_TERM nif_context_create(ErlNifEnv *env, int argc, const ERL_NIF_T ctx->globals = NULL; ctx->locals = NULL; ctx->module_cache = NULL; - ctx->cache_generation = 0; /* Create callback pipe for blocking callback responses */ if (pipe(ctx->callback_pipe) < 0) { @@ -4237,7 +4152,6 @@ static ERL_NIF_TERM nif_context_create(ErlNifEnv *env, int argc, const ERL_NIF_T ctx->globals = PyDict_New(); ctx->locals = PyDict_New(); ctx->module_cache = PyDict_New(); - ctx->cache_generation = import_cache_get_generation(); /* Import __builtins__ into globals */ PyObject *builtins = PyEval_GetBuiltins(); @@ -4370,25 +4284,6 @@ static ERL_NIF_TERM nif_context_destroy(ErlNifEnv *env, int argc, const ERL_NIF_ * Helper function - no mutex needed since context is process-owned. */ static PyObject *context_get_module(py_context_t *ctx, const char *module_name) { - /* Check for stale cache (main interpreter contexts only) */ - if (ctx->module_cache != NULL) { -#ifdef HAVE_SUBINTERPRETERS - /* Pool-backed contexts get fresh interpreters on flush, so only check - * for non-pool contexts (main interpreter or OWN_GIL) */ - bool is_main_interp = (ctx->pool_slot < 0 && !ctx->uses_own_gil); -#else - bool is_main_interp = true; /* All contexts use main interpreter */ -#endif - if (is_main_interp) { - uint64_t current_gen = import_cache_get_generation(); - if (ctx->cache_generation != current_gen) { - /* Cache is stale - clear it and update generation */ - PyDict_Clear(ctx->module_cache); - ctx->cache_generation = current_gen; - } - } - } - /* Check cache first */ if (ctx->module_cache != NULL) { PyObject *cached = PyDict_GetItemString(ctx->module_cache, module_name); @@ -5040,97 +4935,6 @@ static ERL_NIF_TERM nif_interp_apply_imports(ErlNifEnv *env, int argc, const ERL return ATOM_OK; } -/** - * @brief Flush imports from an interpreter's sys.modules - * - * nif_interp_flush_imports(Ref, Modules) -> ok - * - * Removes specified modules from sys.modules and clears the per-context - * module_cache optimization layer. - * - * @param Ref Context reference - * @param Modules List of binary module names to remove from sys.modules - */ -static ERL_NIF_TERM nif_interp_flush_imports(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { - (void)argc; - py_context_t *ctx; - - if (!runtime_is_running()) { - return ATOM_OK; /* Nothing to flush if Python not running */ - } - - if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) { - return make_error(env, "invalid_context"); - } - - if (ctx->destroyed) { - return ATOM_OK; /* Already destroyed */ - } - -#ifdef HAVE_SUBINTERPRETERS - /* OWN_GIL mode: dispatch to the dedicated thread */ - if (ctx->uses_own_gil) { - return dispatch_flush_imports_to_owngil(env, ctx, argv[1]); - } -#endif - - py_context_guard_t guard = py_context_acquire(ctx); - if (!guard.acquired) { - return ATOM_OK; /* Can't acquire - just return ok */ - } - - /* Remove specified modules from sys.modules */ - PyObject *sys_modules = PyImport_GetModuleDict(); /* Borrowed ref */ - if (sys_modules != NULL) { - ERL_NIF_TERM head, tail = argv[1]; - while (enif_get_list_cell(env, tail, &head, &tail)) { - ErlNifBinary mod_bin; - if (enif_inspect_binary(env, head, &mod_bin)) { - char *mod_name = binary_to_string(&mod_bin); - if (mod_name != NULL) { - /* Remove module from sys.modules if present */ - if (PyDict_GetItemString(sys_modules, mod_name) != NULL) { - PyDict_DelItemString(sys_modules, mod_name); - } - enif_free(mod_name); - } - } - } - } - - /* Clear per-context optimization cache */ - if (ctx->module_cache != NULL) { - PyDict_Clear(ctx->module_cache); - } - - py_context_release(&guard); - return ATOM_OK; -} - -/** - * @brief Flush import generation and mark all pool slots stale - * - * nif_subinterp_pool_flush_generation() -> {ok, NewGeneration} - * - * Increments the global import generation counter and marks all initialized - * pool slots as stale. When a stale slot's usage count drops to 0, the - * subinterpreter is automatically destroyed and the slot becomes available - * for a fresh subinterpreter. - * - * This enables flush_imports to work by replacing subinterpreters rather - * than trying to manipulate sys.modules directly (which can crash C extensions). - */ -static ERL_NIF_TERM nif_subinterp_pool_flush_generation(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { - (void)argc; - (void)argv; - - /* Use the unconditional version that works with or without subinterpreters. - * This increments the generation counter (used by main interpreter contexts) - * and also marks pool slots stale when subinterpreters are available. */ - uint64_t new_gen = import_cache_flush_generation(); - return enif_make_tuple2(env, ATOM_OK, enif_make_uint64(env, new_gen)); -} - /** * @brief Execute Python statements using a process-local environment * @@ -7261,8 +7065,6 @@ static ErlNifFunc nif_funcs[] = { {"context_call", 6, nif_context_call_with_env, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"create_local_env", 1, nif_create_local_env, 0}, {"interp_apply_imports", 2, nif_interp_apply_imports, ERL_NIF_DIRTY_JOB_CPU_BOUND}, - {"interp_flush_imports", 2, nif_interp_flush_imports, ERL_NIF_DIRTY_JOB_CPU_BOUND}, - {"subinterp_pool_flush_generation", 0, nif_subinterp_pool_flush_generation, 0}, {"context_call_method", 4, nif_context_call_method, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"context_to_term", 1, nif_context_to_term, 0}, {"context_interp_id", 1, nif_context_interp_id, 0}, diff --git a/c_src/py_nif.h b/c_src/py_nif.h index 471dc6e..c782216 100644 --- a/c_src/py_nif.h +++ b/c_src/py_nif.h @@ -731,8 +731,7 @@ typedef enum { CTX_REQ_EVAL_WITH_ENV, /**< Eval with process-local environment */ CTX_REQ_EXEC_WITH_ENV, /**< Exec with process-local environment */ CTX_REQ_CREATE_LOCAL_ENV, /**< Create process-local env dicts */ - CTX_REQ_APPLY_IMPORTS, /**< Apply imports to module cache */ - CTX_REQ_FLUSH_IMPORTS /**< Flush module cache */ + CTX_REQ_APPLY_IMPORTS /**< Apply imports to module cache */ } ctx_request_type_t; /** @@ -888,9 +887,6 @@ typedef struct { /** @brief Module cache (Dict: module_name -> PyModule) */ PyObject *module_cache; - - /** @brief Cache generation for staleness detection (main interpreter contexts) */ - uint64_t cache_generation; } py_context_t; /* ============================================================================ diff --git a/c_src/py_subinterp_pool.c b/c_src/py_subinterp_pool.c index 309cbed..9431948 100644 --- a/c_src/py_subinterp_pool.c +++ b/c_src/py_subinterp_pool.c @@ -540,30 +540,4 @@ uint64_t subinterp_pool_get_generation(void) { return atomic_load(&g_import_generation); } -#else /* !HAVE_SUBINTERPRETERS */ - -/* For systems without subinterpreter support, we still need the generation - * counter for main interpreter context cache invalidation */ -static _Atomic uint64_t g_import_generation_no_subinterp = 0; - #endif /* HAVE_SUBINTERPRETERS */ - -/* ============================================================================ - * Unconditional generation functions (work with or without subinterpreters) - * ============================================================================ */ - -uint64_t import_cache_get_generation(void) { -#ifdef HAVE_SUBINTERPRETERS - return atomic_load(&g_import_generation); -#else - return atomic_load(&g_import_generation_no_subinterp); -#endif -} - -uint64_t import_cache_flush_generation(void) { -#ifdef HAVE_SUBINTERPRETERS - return subinterp_pool_flush_generation(); -#else - return atomic_fetch_add(&g_import_generation_no_subinterp, 1) + 1; -#endif -} diff --git a/c_src/py_subinterp_pool.h b/c_src/py_subinterp_pool.h index 75dde6b..bd00a38 100644 --- a/c_src/py_subinterp_pool.h +++ b/c_src/py_subinterp_pool.h @@ -211,24 +211,4 @@ uint64_t subinterp_pool_get_generation(void); #endif /* HAVE_SUBINTERPRETERS */ -/** - * @brief Get the current import generation (always available) - * - * This function is available regardless of subinterpreter support. - * It's used by main interpreter contexts to detect cache staleness. - * - * @return Current generation number (0 if never flushed) - */ -uint64_t import_cache_get_generation(void); - -/** - * @brief Increment the import generation counter (always available) - * - * This function is available regardless of subinterpreter support. - * For systems with subinterpreters, it also marks pool slots stale. - * - * @return The new generation number - */ -uint64_t import_cache_flush_generation(void); - #endif /* PY_SUBINTERP_POOL_H */ diff --git a/src/py.erl b/src/py.erl index ed1f139..939d296 100644 --- a/src/py.erl +++ b/src/py.erl @@ -59,7 +59,6 @@ %% Module import caching import/1, import/2, - flush_imports/0, import_stats/0, import_list/0, %% Import registry (global list applied to all interpreters) @@ -462,27 +461,6 @@ clear_imports() -> end, ok. -%% @doc Flush import caches and trigger subinterpreter replacement. -%% -%% Clears the global import registry and marks all shared-GIL pool -%% subinterpreters as stale. When existing contexts using those -%% subinterpreters are destroyed, Python GC handles cleanup via -%% Py_EndInterpreter. New contexts will get fresh subinterpreters -%% with the updated import registry. -%% -%% OWN_GIL contexts each have their own subinterpreter which is -%% destroyed when the context is destroyed. -%% -%% @returns ok --spec flush_imports() -> ok. -flush_imports() -> - %% Clear the global registry - clear_imports(), - %% Mark all pool slots as stale - they will be destroyed when - %% usage count drops to 0 (all contexts using them are destroyed) - py_nif:subinterp_pool_flush_generation(), - ok. - %% @doc Get import registry statistics. %% %% Returns a map with the count of registered imports. diff --git a/src/py_context.erl b/src/py_context.erl index 02bb0af..e58726b 100644 --- a/src/py_context.erl +++ b/src/py_context.erl @@ -53,8 +53,7 @@ get_interp_id/1, is_subinterp/1, create_local_env/1, - get_nif_ref/1, - flush_imports/2 + get_nif_ref/1 ]). %% Internal exports @@ -412,26 +411,6 @@ get_nif_ref(Ctx) when is_pid(Ctx) -> error({context_died, Reason}) end. -%% @doc Flush the module cache from this context's interpreter. -%% -%% Clears all cached modules from the interpreter's module_cache. -%% Removes specified modules from sys.modules in the interpreter. -%% -%% @param Ctx Context process -%% @param Modules List of module names (binaries) to remove -%% @returns ok --spec flush_imports(context(), [binary()]) -> ok. -flush_imports(Ctx, Modules) when is_pid(Ctx), is_list(Modules) -> - MRef = erlang:monitor(process, Ctx), - Ctx ! {flush_imports, self(), MRef, Modules}, - receive - {MRef, ok} -> - erlang:demonitor(MRef, [flush]), - ok; - {'DOWN', MRef, process, Ctx, _Reason} -> - ok - end. - %% ============================================================================ %% Internal functions %% ============================================================================ @@ -616,11 +595,6 @@ loop(#state{ref = Ref, interp_id = InterpId} = State) -> From ! {MRef, Ref}, loop(State); - {flush_imports, From, MRef, Modules} -> - py_nif:interp_flush_imports(Ref, Modules), - From ! {MRef, ok}, - loop(State); - {stop, From, MRef} -> terminate(normal, State), From ! {MRef, ok}; diff --git a/src/py_nif.erl b/src/py_nif.erl index fdc6d1b..3620637 100644 --- a/src/py_nif.erl +++ b/src/py_nif.erl @@ -114,8 +114,6 @@ event_loop_eval/2, %% Per-interpreter import caching NIFs interp_apply_imports/2, - interp_flush_imports/2, - subinterp_pool_flush_generation/0, add_reader/3, remove_reader/2, add_writer/3, @@ -866,30 +864,6 @@ event_loop_eval(_LoopRef, _Expr) -> interp_apply_imports(_Ref, _Imports) -> ?NIF_STUB. -%% @doc Flush the module cache from an interpreter. -%% -%% Removes specified modules from sys.modules and clears the module_cache. -%% Used when flushing imports from the global registry. -%% -%% @param Ref Context reference (from context_create/1) -%% @param Modules List of module names (binaries) to remove from sys.modules -%% @returns ok --spec interp_flush_imports(reference(), [binary()]) -> ok. -interp_flush_imports(_Ref, _Modules) -> - ?NIF_STUB. - -%% @doc Flush import generation and mark all pool slots stale. -%% -%% Increments the global import generation counter and marks all initialized -%% pool slots as stale. When a stale slot's usage count drops to 0, the -%% subinterpreter is automatically destroyed. New contexts will get fresh -%% subinterpreters with updated imports from the registry. -%% -%% @returns {ok, NewGeneration} --spec subinterp_pool_flush_generation() -> {ok, non_neg_integer()}. -subinterp_pool_flush_generation() -> - ?NIF_STUB. - %% @doc Register a file descriptor for read monitoring. %% Uses enif_select to register with the Erlang scheduler. -spec add_reader(reference(), integer(), non_neg_integer()) -> diff --git a/test/py_import_SUITE.erl b/test/py_import_SUITE.erl index c51ee01..dea7cca 100644 --- a/test/py_import_SUITE.erl +++ b/test/py_import_SUITE.erl @@ -1,4 +1,4 @@ -%%% @doc Test suite for py:import/1,2 and py:flush_imports/0 +%%% @doc Test suite for py:import/1,2 -module(py_import_SUITE). -include_lib("common_test/include/ct.hrl"). @@ -22,7 +22,6 @@ import_nonexistent_module_test/1, import_nonexistent_function_test/1, import_idempotent_test/1, - flush_imports_test/1, import_stats_test/1, import_list_test/1, import_speeds_up_calls_test/1, @@ -33,7 +32,6 @@ import_applied_to_new_context_test/1, clear_imports_test/1, get_imports_test/1, - flush_clears_registry_test/1, %% Per-interpreter sharing tests shared_interpreter_import_test/1, event_loop_pool_import_test/1, @@ -57,7 +55,6 @@ groups() -> import_nonexistent_module_test, import_nonexistent_function_test, import_idempotent_test, - flush_imports_test, import_stats_test, import_list_test, import_speeds_up_calls_test, @@ -68,7 +65,6 @@ groups() -> import_applied_to_new_context_test, clear_imports_test, get_imports_test, - flush_clears_registry_test, %% Per-interpreter sharing tests shared_interpreter_import_test, event_loop_pool_import_test, @@ -99,7 +95,7 @@ end_per_group(_Group, _Config) -> init_per_testcase(_TestCase, Config) -> %% Flush imports before each test for clean state - py:flush_imports(), + py:clear_imports(), Config. end_per_testcase(_TestCase, _Config) -> @@ -190,33 +186,10 @@ import_idempotent_test(_Config) -> %% Still works {ok, _} = py:call(json, dumps, [[1]]). -%% @doc Test flushing imports -flush_imports_test(_Config) -> - %% Import some modules - ok = py:import(json), - ok = py:import(math), - ok = py:import(json, dumps), - - %% Get stats before flush - {ok, StatsBefore} = py:import_stats(), - CountBefore = maps:get(count, StatsBefore, 0), - ?assert(CountBefore >= 3), - - %% Flush - ok = py:flush_imports(), - - %% Stats should show empty cache - {ok, StatsAfter} = py:import_stats(), - CountAfter = maps:get(count, StatsAfter, 0), - ?assertEqual(0, CountAfter), - - %% Calls still work (they re-import) - {ok, _} = py:call(json, dumps, [[1]]). - %% @doc Test import stats import_stats_test(_Config) -> %% Start fresh - ok = py:flush_imports(), + ok = py:clear_imports(), %% Check empty stats {ok, Stats0} = py:import_stats(), @@ -244,7 +217,7 @@ import_stats_test(_Config) -> %% @doc Test listing imports import_list_test(_Config) -> %% Start fresh - ok = py:flush_imports(), + ok = py:clear_imports(), %% Empty map {ok, Map0} = py:import_list(), @@ -278,7 +251,7 @@ import_list_test(_Config) -> %% @doc Test that pre-importing speeds up subsequent calls import_speeds_up_calls_test(_Config) -> %% Flush to ensure cold start - ok = py:flush_imports(), + ok = py:clear_imports(), %% Time a cold call (module not imported) %% Using json.dumps since hashlib.md5 needs bytes encoding @@ -504,27 +477,6 @@ get_imports_test(_Config) -> ct:pal("get_imports result: ~p", [Imports]). -%% @doc Test that flush_imports clears the registry -flush_clears_registry_test(_Config) -> - %% Add some imports - ok = py:import(json), - ok = py:import(math), - - %% Verify they're in the registry - Imports1 = py:get_imports(), - ?assert(length(Imports1) >= 2), - - %% Flush imports (clears registry + interpreter caches) - ok = py:flush_imports(), - - %% Verify registry is empty - Imports2 = py:get_imports(), - ?assertEqual([], Imports2), - - %% Verify calls still work (modules are re-imported on demand) - {ok, _} = py:call(json, dumps, [[1]]), - {ok, _} = py:call(math, sqrt, [4.0]). - %% ============================================================================ %% Per-Interpreter Sharing Tests %% ============================================================================ diff --git a/test/py_import_owngil_SUITE.erl b/test/py_import_owngil_SUITE.erl index 835da48..81ce4c7 100644 --- a/test/py_import_owngil_SUITE.erl +++ b/test/py_import_owngil_SUITE.erl @@ -26,8 +26,7 @@ owngil_import_function_test/1, owngil_import_main_rejected_test/1, owngil_import_stats_test/1, - owngil_import_list_test/1, - owngil_flush_imports_test/1 + owngil_import_list_test/1 ]). %% Isolation tests @@ -52,8 +51,7 @@ groups() -> owngil_import_function_test, owngil_import_main_rejected_test, owngil_import_stats_test, - owngil_import_list_test, - owngil_flush_imports_test + owngil_import_list_test ]}, {isolation, [sequence], [ owngil_import_isolation_test, @@ -194,22 +192,6 @@ owngil_import_list_test(_Config) -> py_context:destroy(Ctx) end. -%% @doc Test flush imports functionality -owngil_flush_imports_test(_Config) -> - Ctx = create_owngil_context(), - try - %% Import a module - ok = py_context:exec(Ctx, <<"import json">>), - {ok, _} = py_context:call(Ctx, json, dumps, [[1]]), - - %% Module should still work after re-import - ok = py_context:exec(Ctx, <<"import json">>), - {ok, Result} = py_context:call(Ctx, json, dumps, [[2, 3]]), - ?assertEqual(<<"[2, 3]">>, Result) - after - py_context:destroy(Ctx) - end. - %% ============================================================================ %% Isolation Tests %% ============================================================================ From 06c6c78aa08c37cd0accdb321e8cf6bdd56a5b05 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 02:08:20 +0100 Subject: [PATCH 05/18] Rename import functions for clarity - py:import/1,2 -> py:ensure_imported/1,2 - py:get_imports/0 -> py:all_imports/0 - Add py:is_imported/1,2 to check registry status - Update test suite to use new names --- src/py.erl | 63 ++++++++++++++++----- test/py_import_SUITE.erl | 118 +++++++++++++++++++-------------------- 2 files changed, 107 insertions(+), 74 deletions(-) diff --git a/src/py.erl b/src/py.erl index 939d296..9830a61 100644 --- a/src/py.erl +++ b/src/py.erl @@ -57,13 +57,15 @@ stream_eval/1, stream_eval/2, %% Module import caching - import/1, - import/2, + ensure_imported/1, + ensure_imported/2, + is_imported/1, + is_imported/2, import_stats/0, import_list/0, %% Import registry (global list applied to all interpreters) init_import_registry/0, - get_imports/0, + all_imports/0, clear_imports/0, version/0, memory_stats/0, @@ -371,14 +373,14 @@ init_import_registry() -> %% %% Example: %% ``` -%% ok = py:import(json), +%% ok = py:ensure_imported(json), %% {ok, Result} = py:call(json, dumps, [Data]). %% Module imported on first use %% ''' %% %% @param Module Python module name %% @returns ok | {error, Reason} --spec import(py_module()) -> ok | {error, term()}. -import(Module) -> +-spec ensure_imported(py_module()) -> ok | {error, term()}. +ensure_imported(Module) -> ModuleBin = ensure_binary(Module), %% Reject __main__ case ModuleBin of @@ -403,15 +405,15 @@ import(Module) -> %% %% Example: %% ``` -%% ok = py:import(json, dumps), +%% ok = py:ensure_imported(json, dumps), %% {ok, Result} = py:call(json, dumps, [Data]). %% Module imported on first use %% ''' %% %% @param Module Python module name %% @param Func Function name to register %% @returns ok | {error, Reason} --spec import(py_module(), py_func()) -> ok | {error, term()}. -import(Module, Func) -> +-spec ensure_imported(py_module(), py_func()) -> ok | {error, term()}. +ensure_imported(Module, Func) -> ModuleBin = ensure_binary(Module), FuncBin = ensure_binary(Func), %% Reject __main__ @@ -427,6 +429,37 @@ import(Module, Func) -> ok end. +%% @doc Check if a module is registered in the import registry. +%% +%% @param Module Python module name +%% @returns true if module is registered, false otherwise +-spec is_imported(py_module()) -> boolean(). +is_imported(Module) -> + ModuleBin = ensure_binary(Module), + case ets:info(?IMPORT_REGISTRY) of + undefined -> false; + _ -> ets:member(?IMPORT_REGISTRY, ModuleBin) + end. + +%% @doc Check if a module/function is registered in the import registry. +%% +%% @param Module Python module name +%% @param Func Function name +%% @returns true if module/function is registered, false otherwise +-spec is_imported(py_module(), py_func()) -> boolean(). +is_imported(Module, Func) -> + ModuleBin = ensure_binary(Module), + FuncBin = ensure_binary(Func), + case ets:info(?IMPORT_REGISTRY) of + undefined -> false; + _ -> + case ets:lookup(?IMPORT_REGISTRY, ModuleBin) of + [{_, all}] -> true; + [{_, FuncBin}] -> true; + _ -> false + end + end. + %% @doc Get all registered imports from the global registry. %% %% Returns a list of {Module, Func | all} tuples representing all @@ -434,14 +467,14 @@ import(Module, Func) -> %% %% Example: %% ``` -%% ok = py:import(json), -%% ok = py:import(math, sqrt), -%% [{<<"json">>, all}, {<<"math">>, <<"sqrt">>}] = py:get_imports(). +%% ok = py:ensure_imported(json), +%% ok = py:ensure_imported(math, sqrt), +%% [{<<"json">>, all}, {<<"math">>, <<"sqrt">>}] = py:all_imports(). %% ''' %% %% @returns List of {Module, Func | all} tuples --spec get_imports() -> [{binary(), binary() | all}]. -get_imports() -> +-spec all_imports() -> [{binary(), binary() | all}]. +all_imports() -> case ets:info(?IMPORT_REGISTRY) of undefined -> []; _ -> ets:tab2list(?IMPORT_REGISTRY) @@ -498,7 +531,7 @@ import_stats() -> %% @returns {ok, #{Module => [Func]}} map of modules to functions -spec import_list() -> {ok, #{binary() => [binary()]}} | {error, term()}. import_list() -> - Imports = get_imports(), + Imports = all_imports(), %% Group by module Map = lists:foldl(fun({Module, FuncOrAll}, Acc) -> Existing = maps:get(Module, Acc, []), diff --git a/test/py_import_SUITE.erl b/test/py_import_SUITE.erl index dea7cca..bfdd17f 100644 --- a/test/py_import_SUITE.erl +++ b/test/py_import_SUITE.erl @@ -1,4 +1,4 @@ -%%% @doc Test suite for py:import/1,2 +%%% @doc Test suite for py:ensure_imported/1,2 -module(py_import_SUITE). -include_lib("common_test/include/ct.hrl"). @@ -104,40 +104,40 @@ end_per_testcase(_TestCase, _Config) -> %% @doc Test importing a module import_module_test(_Config) -> %% Import json module - ok = py:import(json), + ok = py:ensure_imported(json), %% Verify it works by calling a function {ok, Result} = py:call(json, dumps, [[1, 2, 3]]), ?assertEqual(<<"[1, 2, 3]">>, Result), %% Import with binary name - ok = py:import(<<"math">>), + ok = py:ensure_imported(<<"math">>), {ok, Pi} = py:call(math, sqrt, [4.0]), ?assertEqual(2.0, Pi). %% @doc Test importing a specific function import_function_test(_Config) -> %% Import json.dumps - ok = py:import(json, dumps), + ok = py:ensure_imported(json, dumps), %% Verify it works {ok, Result} = py:call(json, dumps, [#{a => 1}]), ?assert(is_binary(Result)), %% Import with binary names - ok = py:import(<<"os">>, <<"getcwd">>), + ok = py:ensure_imported(<<"os">>, <<"getcwd">>), {ok, Cwd} = py:call(os, getcwd, []), ?assert(is_binary(Cwd)). %% @doc Test that __main__ cannot be imported import_main_rejected_test(_Config) -> %% __main__ should be rejected - {error, main_not_cacheable} = py:import('__main__'), - {error, main_not_cacheable} = py:import(<<"__main__">>), + {error, main_not_cacheable} = py:ensure_imported('__main__'), + {error, main_not_cacheable} = py:ensure_imported(<<"__main__">>), %% Also for function import - {error, main_not_cacheable} = py:import('__main__', some_func), - {error, main_not_cacheable} = py:import(<<"__main__">>, <<"some_func">>). + {error, main_not_cacheable} = py:ensure_imported('__main__', some_func), + {error, main_not_cacheable} = py:ensure_imported(<<"__main__">>, <<"some_func">>). %% @doc Test importing nonexistent module - registry accepts it but call fails %% @@ -145,7 +145,7 @@ import_main_rejected_test(_Config) -> %% when trying to use the module. import_nonexistent_module_test(_Config) -> %% Import succeeds (just adds to registry) - ok = py:import(nonexistent_module_xyz), + ok = py:ensure_imported(nonexistent_module_xyz), %% But trying to use it fails %% Error format is {error, {ExceptionType, Message}} or {error, atom()} @@ -162,7 +162,7 @@ import_nonexistent_module_test(_Config) -> import_nonexistent_function_test(_Config) -> %% Module exists but function doesn't - import still succeeds %% because we're importing the MODULE, not the function - ok = py:import(json, nonexistent_function_xyz), + ok = py:ensure_imported(json, nonexistent_function_xyz), %% The module is imported and usable {ok, _} = py:call(json, dumps, [[1, 2, 3]]), @@ -175,13 +175,13 @@ import_nonexistent_function_test(_Config) -> %% @doc Test that importing same module/function twice is idempotent import_idempotent_test(_Config) -> %% Import multiple times - should all succeed - ok = py:import(json), - ok = py:import(json), - ok = py:import(json), + ok = py:ensure_imported(json), + ok = py:ensure_imported(json), + ok = py:ensure_imported(json), - ok = py:import(json, dumps), - ok = py:import(json, dumps), - ok = py:import(json, dumps), + ok = py:ensure_imported(json, dumps), + ok = py:ensure_imported(json, dumps), + ok = py:ensure_imported(json, dumps), %% Still works {ok, _} = py:call(json, dumps, [[1]]). @@ -196,9 +196,9 @@ import_stats_test(_Config) -> ?assertEqual(0, maps:get(count, Stats0, 0)), %% Import some modules - ok = py:import(json), - ok = py:import(math), - ok = py:import(os), + ok = py:ensure_imported(json), + ok = py:ensure_imported(math), + ok = py:ensure_imported(os), %% Check stats {ok, Stats1} = py:import_stats(), @@ -206,8 +206,8 @@ import_stats_test(_Config) -> ?assertEqual(3, Count1), %% Import functions - ok = py:import(json, dumps), - ok = py:import(json, loads), + ok = py:ensure_imported(json, dumps), + ok = py:ensure_imported(json, loads), %% Check updated stats (3 modules + 2 functions = 5) {ok, Stats2} = py:import_stats(), @@ -224,10 +224,10 @@ import_list_test(_Config) -> ?assertEqual(#{}, Map0), %% Import some modules and functions - ok = py:import(json), - ok = py:import(math), - ok = py:import(json, dumps), - ok = py:import(json, loads), + ok = py:ensure_imported(json), + ok = py:ensure_imported(math), + ok = py:ensure_imported(json, dumps), + ok = py:ensure_imported(json, loads), %% Get map {ok, Map1} = py:import_list(), @@ -260,8 +260,8 @@ import_speeds_up_calls_test(_Config) -> end), %% Pre-import the module and function - ok = py:import(json), - ok = py:import(json, dumps), + ok = py:ensure_imported(json), + ok = py:ensure_imported(json, dumps), %% Time a warm call (module already imported) {WarmTime, {ok, _}} = timer:tc(fun() -> @@ -287,26 +287,26 @@ import_multiprocess_test(_Config) -> %% Spawn 3 processes, each importing different modules %% They all contribute to the same global registry Pid1 = spawn_link(fun() -> - ok = py:import(json), - ok = py:import(json, dumps), + ok = py:ensure_imported(json), + ok = py:ensure_imported(json, dumps), %% Verify we can use the import {ok, _} = py:call(json, dumps, [[1,2,3]]), Parent ! {self(), done} end), Pid2 = spawn_link(fun() -> - ok = py:import(math), - ok = py:import(math, sqrt), - ok = py:import(math, floor), + ok = py:ensure_imported(math), + ok = py:ensure_imported(math, sqrt), + ok = py:ensure_imported(math, floor), %% Verify we can use the import {ok, _} = py:call(math, sqrt, [16.0]), Parent ! {self(), done} end), Pid3 = spawn_link(fun() -> - ok = py:import(os), - ok = py:import(os, getcwd), - ok = py:import(string), + ok = py:ensure_imported(os), + ok = py:ensure_imported(os, getcwd), + ok = py:ensure_imported(string), %% Verify we can use the import {ok, _} = py:call(os, getcwd, []), Parent ! {self(), done} @@ -359,7 +359,7 @@ import_concurrent_stress_test(_Config) -> Pids = [spawn_link(fun() -> %% Each process imports a random subset of modules MyModules = lists:sublist(Modules, 1 + (N rem length(Modules))), - Results = [{M, py:import(M)} || M <- MyModules], + Results = [{M, py:ensure_imported(M)} || M <- MyModules], %% All imports should succeed AllOk = lists:all(fun({_, R}) -> R =:= ok end, Results), @@ -400,20 +400,20 @@ import_registry_test(_Config) -> ok = py:clear_imports(), %% Verify registry is empty - [] = py:get_imports(), + [] = py:all_imports(), %% Import a module - ok = py:import(json), + ok = py:ensure_imported(json), %% Verify it's in the registry - Imports1 = py:get_imports(), + Imports1 = py:all_imports(), ?assert(lists:member({<<"json">>, all}, Imports1)), %% Import a function - ok = py:import(math, sqrt), + ok = py:ensure_imported(math, sqrt), %% Verify both are in the registry - Imports2 = py:get_imports(), + Imports2 = py:all_imports(), ?assert(lists:member({<<"json">>, all}, Imports2)), ?assert(lists:member({<<"math">>, <<"sqrt">>}, Imports2)), @@ -423,7 +423,7 @@ import_registry_test(_Config) -> import_applied_to_new_context_test(_Config) -> %% Clear and add an import ok = py:clear_imports(), - ok = py:import(json), + ok = py:ensure_imported(json), %% Create a new context {ok, Ctx} = py_context:new(#{mode => auto}), @@ -440,30 +440,30 @@ import_applied_to_new_context_test(_Config) -> %% @doc Test clearing all imports from the registry clear_imports_test(_Config) -> %% Add some imports - ok = py:import(json), - ok = py:import(math), - ok = py:import(os), + ok = py:ensure_imported(json), + ok = py:ensure_imported(math), + ok = py:ensure_imported(os), %% Verify they're in the registry - Imports1 = py:get_imports(), + Imports1 = py:all_imports(), ?assert(length(Imports1) >= 3), %% Clear all ok = py:clear_imports(), %% Verify registry is empty - Imports2 = py:get_imports(), + Imports2 = py:all_imports(), ?assertEqual([], Imports2). %% @doc Test get_imports returns the correct format get_imports_test(_Config) -> %% Clear and add imports ok = py:clear_imports(), - ok = py:import(json), - ok = py:import(math, sqrt), + ok = py:ensure_imported(json), + ok = py:ensure_imported(math, sqrt), %% Get imports - Imports = py:get_imports(), + Imports = py:all_imports(), %% Verify format ?assert(is_list(Imports)), @@ -517,14 +517,14 @@ event_loop_pool_import_test(_Config) -> ok = py:clear_imports(), %% Import via py:import (goes to event loop pool's interpreter) - ok = py:import(collections), + ok = py:ensure_imported(collections), %% Verify we can use it via py:call (uses event loop pool) {ok, Result} = py:call(collections, 'Counter', [[a, b, a, c, a, b]]), ?assert(is_map(Result) orelse is_tuple(Result)), %% Import another module - ok = py:import(itertools), + ok = py:ensure_imported(itertools), %% Use it {ok, _} = py:call(itertools, chain, [[[1, 2], [3, 4]]]), @@ -540,7 +540,7 @@ spawn_task_uses_import_test(_Config) -> ok = py:clear_imports(), %% Import base64 module - ok = py:import(base64), + ok = py:ensure_imported(base64), %% Define a simple function that uses base64 Code = <<" @@ -613,7 +613,7 @@ registry_applied_to_subinterp_test(_Config) -> true -> %% Clear registry and add an import ok = py:clear_imports(), - ok = py:import(uuid), + ok = py:ensure_imported(uuid), %% Create a new subinterp context {ok, Ctx} = py_context:new(#{mode => subinterp}), @@ -643,7 +643,7 @@ import_in_sys_modules_test(_Config) -> ok = py:clear_imports(), %% Import a module - ok = py:import(decimal), + ok = py:ensure_imported(decimal), %% Verify the import worked by calling a function {ok, _} = py:call(decimal, 'Decimal', [<<"3.14">>]), @@ -668,11 +668,11 @@ registry_import_in_sys_modules_test(_Config) -> ok = py:clear_imports(), %% Add to registry and import - ok = py:import(fractions), - ok = py:import(statistics), + ok = py:ensure_imported(fractions), + ok = py:ensure_imported(statistics), %% Verify ETS registry has the entries - Registry = py:get_imports(), + Registry = py:all_imports(), ?assert(lists:member({<<"fractions">>, all}, Registry)), ?assert(lists:member({<<"statistics">>, all}, Registry)), From 5a3c308ea94ec002cb9d3b1b02a2adcfd1c579c7 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 02:12:58 +0100 Subject: [PATCH 06/18] Remove dead flush_all_imports function --- src/py_context_router.erl | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/src/py_context_router.erl b/src/py_context_router.erl index 67c40c5..6335d40 100644 --- a/src/py_context_router.erl +++ b/src/py_context_router.erl @@ -76,8 +76,6 @@ start_pool/3, stop_pool/1, pool_started/1, - %% Import cache management - flush_all_imports/1, %% Pool registration (route module/func to specific pools) register_pool/2, register_pool/3, @@ -381,38 +379,6 @@ pool_started(Pool) when is_atom(Pool) -> end end. -%% @doc Flush import caches from all contexts in all pools. -%% -%% Iterates through all pools (default and registered) and removes -%% specified modules from sys.modules in each context's interpreter. -%% -%% @param Modules List of module names (binaries) to remove -%% @returns ok --spec flush_all_imports([binary()]) -> ok. -flush_all_imports(Modules) -> - %% Get all pool names from the registry - RegisteredPools = case persistent_term:get(?POOL_REGISTRY_KEY, undefined) of - undefined -> []; - Registry when is_map(Registry) -> - %% Extract unique pool names from registry values - lists:usort(maps:values(Registry)) - end, - %% Always include default pool, plus any registered pools - AllPools = lists:usort([default | RegisteredPools]), - %% Flush each pool - lists:foreach(fun(Pool) -> - case pool_started(Pool) of - true -> - Ctxs = contexts(Pool), - lists:foreach(fun(Ctx) -> - catch py_context:flush_imports(Ctx, Modules) - end, Ctxs); - false -> - ok - end - end, AllPools), - ok. - %% ============================================================================ %% Pool Registration (route module/func to specific pools) %% ============================================================================ From 0788cbc93eae2d5c9a329c7ce592fbc2e7ea313a Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 07:22:44 +0100 Subject: [PATCH 07/18] Add path registry for sys.path management - py:add_path/1: Add a single path to sys.path in all interpreters - py:add_paths/1: Add multiple paths to sys.path - py:all_paths/0: List all registered paths - py:clear_paths/0: Clear the path registry - py:is_path_added/1: Check if a path is registered Paths are stored in a registry (like imports) and applied to new interpreters/subinterpreters when created. Paths are inserted at the beginning of sys.path to take precedence over system paths. --- c_src/py_nif.c | 217 +++++++++++++++++++++++++++++++++++++++++++++ c_src/py_nif.h | 3 +- src/py.erl | 117 +++++++++++++++++++++++- src/py_context.erl | 19 +++- src/py_nif.erl | 13 +++ 5 files changed, 366 insertions(+), 3 deletions(-) diff --git a/c_src/py_nif.c b/c_src/py_nif.c index 8adca7e..7511bd7 100644 --- a/c_src/py_nif.c +++ b/c_src/py_nif.c @@ -3224,6 +3224,77 @@ static void owngil_execute_apply_imports(py_context_t *ctx) { ctx->response_ok = true; } +/** + * @brief Apply paths to sys.path in OWN_GIL context + * + * Paths are inserted at the beginning of sys.path. + */ +static void owngil_execute_apply_paths(py_context_t *ctx) { + /* Get sys.path */ + PyObject *sys_module = PyImport_ImportModule("sys"); + if (sys_module == NULL) { + PyErr_Clear(); + ctx->response_term = enif_make_tuple2(ctx->shared_env, + enif_make_atom(ctx->shared_env, "error"), + enif_make_atom(ctx->shared_env, "sys_import_failed")); + ctx->response_ok = false; + return; + } + + PyObject *sys_path = PyObject_GetAttrString(sys_module, "path"); + Py_DECREF(sys_module); + if (sys_path == NULL || !PyList_Check(sys_path)) { + Py_XDECREF(sys_path); + PyErr_Clear(); + ctx->response_term = enif_make_tuple2(ctx->shared_env, + enif_make_atom(ctx->shared_env, "error"), + enif_make_atom(ctx->shared_env, "sys_path_not_list")); + ctx->response_ok = false; + return; + } + + /* Count paths first */ + ERL_NIF_TERM head, tail = ctx->request_term; + int path_count = 0; + while (enif_get_list_cell(ctx->shared_env, tail, &head, &tail)) { + path_count++; + } + + /* Insert in reverse order so first path ends up first */ + for (int i = 0; i < path_count; i++) { + /* Skip to the i-th element from the end */ + ERL_NIF_TERM current = ctx->request_term; + for (int j = 0; j < path_count - 1 - i; j++) { + enif_get_list_cell(ctx->shared_env, current, &head, ¤t); + } + enif_get_list_cell(ctx->shared_env, current, &head, ¤t); + + ErlNifBinary path_bin; + if (!enif_inspect_binary(ctx->shared_env, head, &path_bin)) { + continue; + } + + /* Convert to Python string */ + PyObject *path_str = PyUnicode_FromStringAndSize((char *)path_bin.data, path_bin.size); + if (path_str == NULL) { + PyErr_Clear(); + continue; + } + + /* Check if already in sys.path */ + int already_present = PySequence_Contains(sys_path, path_str); + if (already_present <= 0) { + /* Insert at position 0 */ + PyList_Insert(sys_path, 0, path_str); + } + Py_DECREF(path_str); + } + + Py_DECREF(sys_path); + ctx->response_term = enif_make_atom(ctx->shared_env, "ok"); + ctx->response_ok = true; +} + /** * @brief Execute a request based on its type */ @@ -3262,6 +3333,9 @@ static void owngil_execute_request(py_context_t *ctx) { case CTX_REQ_APPLY_IMPORTS: owngil_execute_apply_imports(ctx); break; + case CTX_REQ_APPLY_PATHS: + owngil_execute_apply_paths(ctx); + break; default: ctx->response_term = enif_make_tuple2(ctx->shared_env, enif_make_atom(ctx->shared_env, "error"), @@ -3871,6 +3945,50 @@ static ERL_NIF_TERM dispatch_apply_imports_to_owngil( return result; } +/** + * @brief Dispatch apply_paths request to OWN_GIL worker thread + * + * @param env Current NIF environment + * @param ctx OWN_GIL context + * @param paths_term List of path binaries + * @return ok | {error, Reason} + */ +static ERL_NIF_TERM dispatch_apply_paths_to_owngil( + ErlNifEnv *env, py_context_t *ctx, ERL_NIF_TERM paths_term +) { + if (!atomic_load(&ctx->thread_running)) { + return make_error(env, "thread_not_running"); + } + + pthread_mutex_lock(&ctx->request_mutex); + + enif_clear_env(ctx->shared_env); + ctx->request_term = enif_make_copy(ctx->shared_env, paths_term); + ctx->request_type = CTX_REQ_APPLY_PATHS; + + pthread_cond_signal(&ctx->request_ready); + + /* Wait for response with timeout */ + struct timespec deadline; + clock_gettime(CLOCK_REALTIME, &deadline); + deadline.tv_sec += OWNGIL_DISPATCH_TIMEOUT_SECS; + + while (ctx->request_type != CTX_REQ_NONE) { + int rc = pthread_cond_timedwait(&ctx->response_ready, &ctx->request_mutex, &deadline); + if (rc == ETIMEDOUT) { + atomic_store(&ctx->thread_running, false); + pthread_mutex_unlock(&ctx->request_mutex); + fprintf(stderr, "OWN_GIL apply_paths dispatch timeout: worker thread unresponsive\n"); + return make_error(env, "worker_timeout"); + } + } + + ERL_NIF_TERM result = enif_make_copy(env, ctx->response_term); + pthread_mutex_unlock(&ctx->request_mutex); + + return result; +} + #endif /* HAVE_SUBINTERPRETERS */ /** @@ -4935,6 +5053,104 @@ static ERL_NIF_TERM nif_interp_apply_imports(ErlNifEnv *env, int argc, const ERL return ATOM_OK; } +/** + * @brief Apply a list of paths to an interpreter's sys.path + * + * nif_interp_apply_paths(Ref, Paths) -> ok | {error, Reason} + * + * Paths: [PathBin, ...] + * Inserts paths at the beginning of sys.path so they take precedence. + */ +static ERL_NIF_TERM nif_interp_apply_paths(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + (void)argc; + py_context_t *ctx; + + if (!runtime_is_running()) { + return make_error(env, "python_not_running"); + } + + if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) { + return make_error(env, "invalid_context"); + } + + if (ctx->destroyed) { + return make_error(env, "context_destroyed"); + } + +#ifdef HAVE_SUBINTERPRETERS + /* OWN_GIL mode: dispatch to the dedicated thread */ + if (ctx->uses_own_gil) { + return dispatch_apply_paths_to_owngil(env, ctx, argv[1]); + } +#endif + + py_context_guard_t guard = py_context_acquire(ctx); + if (!guard.acquired) { + return make_error(env, "acquire_failed"); + } + + /* Get sys.path */ + PyObject *sys_module = PyImport_ImportModule("sys"); + if (sys_module == NULL) { + py_context_release(&guard); + return make_error(env, "sys_import_failed"); + } + + PyObject *sys_path = PyObject_GetAttrString(sys_module, "path"); + Py_DECREF(sys_module); + if (sys_path == NULL || !PyList_Check(sys_path)) { + Py_XDECREF(sys_path); + py_context_release(&guard); + return make_error(env, "sys_path_not_list"); + } + + /* Process each path - insert at beginning in reverse order */ + /* First, collect all paths */ + ERL_NIF_TERM head, tail = argv[1]; + int path_count = 0; + ERL_NIF_TERM paths_list = argv[1]; + + /* Count paths */ + while (enif_get_list_cell(env, tail, &head, &tail)) { + path_count++; + } + + /* Insert in reverse order so first path ends up first */ + tail = paths_list; + for (int i = 0; i < path_count; i++) { + /* Skip to the i-th element from the end */ + ERL_NIF_TERM current = paths_list; + for (int j = 0; j < path_count - 1 - i; j++) { + enif_get_list_cell(env, current, &head, ¤t); + } + enif_get_list_cell(env, current, &head, ¤t); + + ErlNifBinary path_bin; + if (!enif_inspect_binary(env, head, &path_bin)) { + continue; + } + + /* Convert to Python string */ + PyObject *path_str = PyUnicode_FromStringAndSize((char *)path_bin.data, path_bin.size); + if (path_str == NULL) { + PyErr_Clear(); + continue; + } + + /* Check if already in sys.path */ + int already_present = PySequence_Contains(sys_path, path_str); + if (already_present <= 0) { + /* Insert at position 0 */ + PyList_Insert(sys_path, 0, path_str); + } + Py_DECREF(path_str); + } + + Py_DECREF(sys_path); + py_context_release(&guard); + return ATOM_OK; +} + /** * @brief Execute Python statements using a process-local environment * @@ -7065,6 +7281,7 @@ static ErlNifFunc nif_funcs[] = { {"context_call", 6, nif_context_call_with_env, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"create_local_env", 1, nif_create_local_env, 0}, {"interp_apply_imports", 2, nif_interp_apply_imports, ERL_NIF_DIRTY_JOB_CPU_BOUND}, + {"interp_apply_paths", 2, nif_interp_apply_paths, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"context_call_method", 4, nif_context_call_method, ERL_NIF_DIRTY_JOB_CPU_BOUND}, {"context_to_term", 1, nif_context_to_term, 0}, {"context_interp_id", 1, nif_context_interp_id, 0}, diff --git a/c_src/py_nif.h b/c_src/py_nif.h index c782216..92989fc 100644 --- a/c_src/py_nif.h +++ b/c_src/py_nif.h @@ -731,7 +731,8 @@ typedef enum { CTX_REQ_EVAL_WITH_ENV, /**< Eval with process-local environment */ CTX_REQ_EXEC_WITH_ENV, /**< Exec with process-local environment */ CTX_REQ_CREATE_LOCAL_ENV, /**< Create process-local env dicts */ - CTX_REQ_APPLY_IMPORTS /**< Apply imports to module cache */ + CTX_REQ_APPLY_IMPORTS, /**< Apply imports to module cache */ + CTX_REQ_APPLY_PATHS /**< Apply paths to sys.path */ } ctx_request_type_t; /** diff --git a/src/py.erl b/src/py.erl index 9830a61..6f141f1 100644 --- a/src/py.erl +++ b/src/py.erl @@ -67,6 +67,12 @@ init_import_registry/0, all_imports/0, clear_imports/0, + %% Path registry (sys.path additions applied to all interpreters) + add_path/1, + add_paths/1, + all_paths/0, + clear_paths/0, + is_path_added/1, version/0, memory_stats/0, gc/0, @@ -172,6 +178,9 @@ %% ETS table for global import registry -define(IMPORT_REGISTRY, py_import_registry). +%% ETS table for global path registry (sys.path additions) +-define(PATH_REGISTRY, py_path_registry). + %% @doc Get or create a process-local Python environment for a context. %% %% Each Erlang process can have Python environments per interpreter. @@ -345,7 +354,7 @@ exec(Ctx, Code) when is_pid(Ctx) -> %%% Module Import Caching %%% ============================================================================ -%% @doc Initialize the import registry ETS table. +%% @doc Initialize the import and path registry ETS tables. %% %% This is called automatically during application startup. %% Safe to call multiple times - does nothing if already initialized. @@ -361,6 +370,14 @@ init_import_registry() -> ok; _ -> ok + end, + case ets:info(?PATH_REGISTRY) of + undefined -> + %% Use set type - paths are unique, ordered by insertion + ets:new(?PATH_REGISTRY, [ordered_set, public, named_table]), + ok; + _ -> + ok end. %% @doc Register a module for import in all interpreters. @@ -494,6 +511,104 @@ clear_imports() -> end, ok. +%%% ============================================================================ +%%% Path Registry (sys.path additions) +%%% ============================================================================ + +%% @doc Add a path to sys.path in all interpreters. +%% +%% Adds the path to the global path registry. When new interpreters +%% are created, they will automatically have this path in sys.path. +%% The path is inserted at the beginning of sys.path to take precedence. +%% +%% Example: +%% ``` +%% ok = py:add_path("/path/to/my/modules"), +%% {ok, Result} = py:call(mymodule, myfunc, []). +%% ''' +%% +%% @param Path Directory path to add (string, binary, or atom) +%% @returns ok +-spec add_path(string() | binary() | atom()) -> ok. +add_path(Path) -> + PathBin = ensure_binary(Path), + case ets:info(?PATH_REGISTRY) of + undefined -> ok; + _ -> + %% Use monotonic time as key to preserve insertion order + Key = erlang:monotonic_time(), + ets:insert(?PATH_REGISTRY, {Key, PathBin}) + end, + ok. + +%% @doc Add multiple paths to sys.path in all interpreters. +%% +%% Adds all paths to the global path registry. Paths are added in order, +%% so the first path in the list will be first in sys.path. +%% +%% Example: +%% ``` +%% ok = py:add_paths(["/path/to/lib1", "/path/to/lib2"]), +%% {ok, Result} = py:call(mymodule, myfunc, []). +%% ''' +%% +%% @param Paths List of directory paths to add +%% @returns ok +-spec add_paths([string() | binary() | atom()]) -> ok. +add_paths(Paths) when is_list(Paths) -> + lists:foreach(fun add_path/1, Paths), + ok. + +%% @doc Get all registered paths from the global registry. +%% +%% Returns a list of paths in the order they were added. +%% +%% Example: +%% ``` +%% ok = py:add_path("/path/to/modules"), +%% [<<"/path/to/modules">>] = py:all_paths(). +%% ''' +%% +%% @returns List of paths as binaries +-spec all_paths() -> [binary()]. +all_paths() -> + case ets:info(?PATH_REGISTRY) of + undefined -> []; + _ -> + %% ordered_set returns in key order (monotonic time = insertion order) + [Path || {_Key, Path} <- ets:tab2list(?PATH_REGISTRY)] + end. + +%% @doc Clear all registered paths from the global registry. +%% +%% Removes all entries from the path registry. +%% Does not affect already-running interpreters. +%% +%% @returns ok +-spec clear_paths() -> ok. +clear_paths() -> + case ets:info(?PATH_REGISTRY) of + undefined -> ok; + _ -> ets:delete_all_objects(?PATH_REGISTRY) + end, + ok. + +%% @doc Check if a path is registered in the path registry. +%% +%% @param Path Directory path to check +%% @returns true if path is registered, false otherwise +-spec is_path_added(string() | binary() | atom()) -> boolean(). +is_path_added(Path) -> + PathBin = ensure_binary(Path), + case ets:info(?PATH_REGISTRY) of + undefined -> false; + _ -> + case ets:match(?PATH_REGISTRY, {'_', PathBin}) of + [] -> false; + _ -> true + end + end. + %% @doc Get import registry statistics. %% %% Returns a map with the count of registered imports. diff --git a/src/py_context.erl b/src/py_context.erl index e58726b..e1b9fcf 100644 --- a/src/py_context.erl +++ b/src/py_context.erl @@ -420,8 +420,9 @@ init(Parent, Id, Mode) -> process_flag(trap_exit, true), case create_context(Mode) of {ok, Ref, InterpId} -> - %% Apply all registered imports to this interpreter + %% Apply all registered imports and paths to this interpreter apply_registered_imports(Ref), + apply_registered_paths(Ref), %% For subinterpreters, create a dedicated event worker EventState = setup_event_worker(Ref, InterpId), %% For thread-model subinterpreters, spawn a dedicated callback handler @@ -514,6 +515,22 @@ apply_registered_imports(Ref) -> end end. +%% @private Apply all paths from the global registry to this interpreter. +%% +%% Called when a new interpreter is created to add all registered paths +%% to sys.path. +apply_registered_paths(Ref) -> + case ets:info(py_path_registry) of + undefined -> ok; + _ -> + %% Extract just the path values (keys are monotonic timestamps) + Paths = [Path || {_Key, Path} <- ets:tab2list(py_path_registry)], + case Paths of + [] -> ok; + _ -> py_nif:interp_apply_paths(Ref, Paths) + end + end. + %% @private create_context(auto) -> case py_nif:subinterp_supported() of diff --git a/src/py_nif.erl b/src/py_nif.erl index 3620637..1542685 100644 --- a/src/py_nif.erl +++ b/src/py_nif.erl @@ -114,6 +114,7 @@ event_loop_eval/2, %% Per-interpreter import caching NIFs interp_apply_imports/2, + interp_apply_paths/2, add_reader/3, remove_reader/2, add_writer/3, @@ -864,6 +865,18 @@ event_loop_eval(_LoopRef, _Expr) -> interp_apply_imports(_Ref, _Imports) -> ?NIF_STUB. +%% @doc Apply a list of paths to an interpreter's sys.path. +%% +%% Paths are inserted at the beginning of sys.path to take precedence +%% over system paths. Called when a new context is created. +%% +%% @param Ref Context reference (from context_create/1) +%% @param Paths List of path binaries +%% @returns ok | {error, Reason} +-spec interp_apply_paths(reference(), [binary()]) -> ok | {error, term()}. +interp_apply_paths(_Ref, _Paths) -> + ?NIF_STUB. + %% @doc Register a file descriptor for read monitoring. %% Uses enif_select to register with the Erlang scheduler. -spec add_reader(reference(), integer(), non_neg_integer()) -> From a7673462c4a010280dfee599bcbafad4cb1aeb50 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 07:46:08 +0100 Subject: [PATCH 08/18] Extract import/path registry into py_import module Move import and path registry functions from py.erl to new py_import module for better separation of concerns. The py_import module now provides init/0, ensure_imported/1,2, is_imported/1,2, all_imports/0, clear_imports/0, import_stats/0, import_list/0, add_path/1, add_paths/1, all_paths/0, clear_paths/0, and is_path_added/1. This is a breaking change - users must now use py_import: instead of py: for these functions. --- src/erlang_python_app.erl | 4 +- src/py.erl | 334 --------------------------- src/py_context.erl | 25 +- src/py_import.erl | 388 ++++++++++++++++++++++++++++++++ test/py_import_SUITE.erl | 176 +++++++-------- test/py_import_owngil_SUITE.erl | 4 +- 6 files changed, 487 insertions(+), 444 deletions(-) create mode 100644 src/py_import.erl diff --git a/src/erlang_python_app.erl b/src/erlang_python_app.erl index 01e443e..e012539 100644 --- a/src/erlang_python_app.erl +++ b/src/erlang_python_app.erl @@ -20,8 +20,8 @@ -export([start/2, stop/1]). start(_StartType, _StartArgs) -> - %% Initialize the import registry ETS table - py:init_import_registry(), + %% Initialize the import registry ETS tables + py_import:init(), erlang_python_sup:start_link(). stop(_State) -> diff --git a/src/py.erl b/src/py.erl index 6f141f1..b78b2b8 100644 --- a/src/py.erl +++ b/src/py.erl @@ -56,23 +56,6 @@ stream/4, stream_eval/1, stream_eval/2, - %% Module import caching - ensure_imported/1, - ensure_imported/2, - is_imported/1, - is_imported/2, - import_stats/0, - import_list/0, - %% Import registry (global list applied to all interpreters) - init_import_registry/0, - all_imports/0, - clear_imports/0, - %% Path registry (sys.path additions applied to all interpreters) - add_path/1, - add_paths/1, - all_paths/0, - clear_paths/0, - is_path_added/1, version/0, memory_stats/0, gc/0, @@ -175,12 +158,6 @@ %% Process dictionary key for local Python environment -define(LOCAL_ENV_KEY, py_local_env). -%% ETS table for global import registry --define(IMPORT_REGISTRY, py_import_registry). - -%% ETS table for global path registry (sys.path additions) --define(PATH_REGISTRY, py_path_registry). - %% @doc Get or create a process-local Python environment for a context. %% %% Each Erlang process can have Python environments per interpreter. @@ -350,317 +327,6 @@ exec(Ctx, Code) when is_pid(Ctx) -> EnvRef = get_local_env(Ctx), py_context:exec(Ctx, Code, EnvRef). -%%% ============================================================================ -%%% Module Import Caching -%%% ============================================================================ - -%% @doc Initialize the import and path registry ETS tables. -%% -%% This is called automatically during application startup. -%% Safe to call multiple times - does nothing if already initialized. -%% -%% @returns ok --spec init_import_registry() -> ok. -init_import_registry() -> - case ets:info(?IMPORT_REGISTRY) of - undefined -> - %% Use bag type to allow multiple entries with same module name - %% e.g., {<<"json">>, all} and {<<"json">>, <<"dumps">>} - ets:new(?IMPORT_REGISTRY, [bag, public, named_table]), - ok; - _ -> - ok - end, - case ets:info(?PATH_REGISTRY) of - undefined -> - %% Use set type - paths are unique, ordered by insertion - ets:new(?PATH_REGISTRY, [ordered_set, public, named_table]), - ok; - _ -> - ok - end. - -%% @doc Register a module for import in all interpreters. -%% -%% Adds the module to the global import registry. When new interpreters -%% are created, they will automatically import all registered modules. -%% The module will be imported lazily when first used. -%% -%% The `__main__' module is never cached. -%% -%% Example: -%% ``` -%% ok = py:ensure_imported(json), -%% {ok, Result} = py:call(json, dumps, [Data]). %% Module imported on first use -%% ''' -%% -%% @param Module Python module name -%% @returns ok | {error, Reason} --spec ensure_imported(py_module()) -> ok | {error, term()}. -ensure_imported(Module) -> - ModuleBin = ensure_binary(Module), - %% Reject __main__ - case ModuleBin of - <<"__main__">> -> - {error, main_not_cacheable}; - _ -> - %% Add to global registry - module will be imported lazily - case ets:info(?IMPORT_REGISTRY) of - undefined -> ok; - _ -> ets:insert(?IMPORT_REGISTRY, {ModuleBin, all}) - end, - ok - end. - -%% @doc Register a module/function for import in all interpreters. -%% -%% Adds the module/function to the global import registry. When new -%% interpreters are created, they will automatically import the module. -%% The module will be imported lazily when first used. -%% -%% The `__main__' module is never cached. -%% -%% Example: -%% ``` -%% ok = py:ensure_imported(json, dumps), -%% {ok, Result} = py:call(json, dumps, [Data]). %% Module imported on first use -%% ''' -%% -%% @param Module Python module name -%% @param Func Function name to register -%% @returns ok | {error, Reason} --spec ensure_imported(py_module(), py_func()) -> ok | {error, term()}. -ensure_imported(Module, Func) -> - ModuleBin = ensure_binary(Module), - FuncBin = ensure_binary(Func), - %% Reject __main__ - case ModuleBin of - <<"__main__">> -> - {error, main_not_cacheable}; - _ -> - %% Add to global registry - module will be imported lazily - case ets:info(?IMPORT_REGISTRY) of - undefined -> ok; - _ -> ets:insert(?IMPORT_REGISTRY, {ModuleBin, FuncBin}) - end, - ok - end. - -%% @doc Check if a module is registered in the import registry. -%% -%% @param Module Python module name -%% @returns true if module is registered, false otherwise --spec is_imported(py_module()) -> boolean(). -is_imported(Module) -> - ModuleBin = ensure_binary(Module), - case ets:info(?IMPORT_REGISTRY) of - undefined -> false; - _ -> ets:member(?IMPORT_REGISTRY, ModuleBin) - end. - -%% @doc Check if a module/function is registered in the import registry. -%% -%% @param Module Python module name -%% @param Func Function name -%% @returns true if module/function is registered, false otherwise --spec is_imported(py_module(), py_func()) -> boolean(). -is_imported(Module, Func) -> - ModuleBin = ensure_binary(Module), - FuncBin = ensure_binary(Func), - case ets:info(?IMPORT_REGISTRY) of - undefined -> false; - _ -> - case ets:lookup(?IMPORT_REGISTRY, ModuleBin) of - [{_, all}] -> true; - [{_, FuncBin}] -> true; - _ -> false - end - end. - -%% @doc Get all registered imports from the global registry. -%% -%% Returns a list of {Module, Func | all} tuples representing all -%% modules/functions registered for automatic import. -%% -%% Example: -%% ``` -%% ok = py:ensure_imported(json), -%% ok = py:ensure_imported(math, sqrt), -%% [{<<"json">>, all}, {<<"math">>, <<"sqrt">>}] = py:all_imports(). -%% ''' -%% -%% @returns List of {Module, Func | all} tuples --spec all_imports() -> [{binary(), binary() | all}]. -all_imports() -> - case ets:info(?IMPORT_REGISTRY) of - undefined -> []; - _ -> ets:tab2list(?IMPORT_REGISTRY) - end. - -%% @doc Clear all registered imports from the global registry. -%% -%% Removes all entries from the registry. -%% Does not affect already-running interpreters. -%% -%% @returns ok --spec clear_imports() -> ok. -clear_imports() -> - case ets:info(?IMPORT_REGISTRY) of - undefined -> ok; - _ -> ets:delete_all_objects(?IMPORT_REGISTRY) - end, - ok. - -%%% ============================================================================ -%%% Path Registry (sys.path additions) -%%% ============================================================================ - -%% @doc Add a path to sys.path in all interpreters. -%% -%% Adds the path to the global path registry. When new interpreters -%% are created, they will automatically have this path in sys.path. -%% The path is inserted at the beginning of sys.path to take precedence. -%% -%% Example: -%% ``` -%% ok = py:add_path("/path/to/my/modules"), -%% {ok, Result} = py:call(mymodule, myfunc, []). -%% ''' -%% -%% @param Path Directory path to add (string, binary, or atom) -%% @returns ok --spec add_path(string() | binary() | atom()) -> ok. -add_path(Path) -> - PathBin = ensure_binary(Path), - case ets:info(?PATH_REGISTRY) of - undefined -> ok; - _ -> - %% Use monotonic time as key to preserve insertion order - Key = erlang:monotonic_time(), - ets:insert(?PATH_REGISTRY, {Key, PathBin}) - end, - ok. - -%% @doc Add multiple paths to sys.path in all interpreters. -%% -%% Adds all paths to the global path registry. Paths are added in order, -%% so the first path in the list will be first in sys.path. -%% -%% Example: -%% ``` -%% ok = py:add_paths(["/path/to/lib1", "/path/to/lib2"]), -%% {ok, Result} = py:call(mymodule, myfunc, []). -%% ''' -%% -%% @param Paths List of directory paths to add -%% @returns ok --spec add_paths([string() | binary() | atom()]) -> ok. -add_paths(Paths) when is_list(Paths) -> - lists:foreach(fun add_path/1, Paths), - ok. - -%% @doc Get all registered paths from the global registry. -%% -%% Returns a list of paths in the order they were added. -%% -%% Example: -%% ``` -%% ok = py:add_path("/path/to/modules"), -%% [<<"/path/to/modules">>] = py:all_paths(). -%% ''' -%% -%% @returns List of paths as binaries --spec all_paths() -> [binary()]. -all_paths() -> - case ets:info(?PATH_REGISTRY) of - undefined -> []; - _ -> - %% ordered_set returns in key order (monotonic time = insertion order) - [Path || {_Key, Path} <- ets:tab2list(?PATH_REGISTRY)] - end. - -%% @doc Clear all registered paths from the global registry. -%% -%% Removes all entries from the path registry. -%% Does not affect already-running interpreters. -%% -%% @returns ok --spec clear_paths() -> ok. -clear_paths() -> - case ets:info(?PATH_REGISTRY) of - undefined -> ok; - _ -> ets:delete_all_objects(?PATH_REGISTRY) - end, - ok. - -%% @doc Check if a path is registered in the path registry. -%% -%% @param Path Directory path to check -%% @returns true if path is registered, false otherwise --spec is_path_added(string() | binary() | atom()) -> boolean(). -is_path_added(Path) -> - PathBin = ensure_binary(Path), - case ets:info(?PATH_REGISTRY) of - undefined -> false; - _ -> - case ets:match(?PATH_REGISTRY, {'_', PathBin}) of - [] -> false; - _ -> true - end - end. - -%% @doc Get import registry statistics. -%% -%% Returns a map with the count of registered imports. -%% -%% Example: -%% ``` -%% {ok, #{count => 5}} = py:import_stats(). -%% ''' -%% -%% @returns {ok, Stats} where Stats is a map with registry metrics --spec import_stats() -> {ok, map()} | {error, term()}. -import_stats() -> - Count = case ets:info(?IMPORT_REGISTRY) of - undefined -> 0; - _ -> ets:info(?IMPORT_REGISTRY, size) - end, - {ok, #{count => Count}}. - -%% @doc List all registered imports. -%% -%% Returns a map of modules to their registered functions. -%% Module names are binary keys, function lists are the values. -%% An empty list means only the module is registered (no specific functions). -%% -%% Example: -%% ``` -%% ok = py:import(json), -%% ok = py:import(json, dumps), -%% ok = py:import(json, loads), -%% ok = py:import(math), -%% {ok, #{<<"json">> => [<<"dumps">>, <<"loads">>], -%% <<"math">> => []}} = py:import_list(). -%% ''' -%% -%% @returns {ok, #{Module => [Func]}} map of modules to functions --spec import_list() -> {ok, #{binary() => [binary()]}} | {error, term()}. -import_list() -> - Imports = all_imports(), - %% Group by module - Map = lists:foldl(fun({Module, FuncOrAll}, Acc) -> - Existing = maps:get(Module, Acc, []), - case FuncOrAll of - all -> - %% Module-level import, don't add to function list - maps:put(Module, Existing, Acc); - Func -> - %% Function-level import - maps:put(Module, [Func | Existing], Acc) - end - end, #{}, Imports), - {ok, Map}. - %%% ============================================================================ %%% Asynchronous API %%% ============================================================================ diff --git a/src/py_context.erl b/src/py_context.erl index e1b9fcf..3f41e5c 100644 --- a/src/py_context.erl +++ b/src/py_context.erl @@ -503,16 +503,11 @@ extend_erlang_module_in_context(Ref) -> %% @private Apply all imports from the global registry to this interpreter. %% %% Called when a new interpreter is created to pre-warm the module cache -%% with all modules registered via py:import/1,2. +%% with all modules registered via py_import:ensure_imported/1,2. apply_registered_imports(Ref) -> - case ets:info(py_import_registry) of - undefined -> ok; - _ -> - Imports = ets:tab2list(py_import_registry), - case Imports of - [] -> ok; - _ -> py_nif:interp_apply_imports(Ref, Imports) - end + case py_import:all_imports() of + [] -> ok; + Imports -> py_nif:interp_apply_imports(Ref, Imports) end. %% @private Apply all paths from the global registry to this interpreter. @@ -520,15 +515,9 @@ apply_registered_imports(Ref) -> %% Called when a new interpreter is created to add all registered paths %% to sys.path. apply_registered_paths(Ref) -> - case ets:info(py_path_registry) of - undefined -> ok; - _ -> - %% Extract just the path values (keys are monotonic timestamps) - Paths = [Path || {_Key, Path} <- ets:tab2list(py_path_registry)], - case Paths of - [] -> ok; - _ -> py_nif:interp_apply_paths(Ref, Paths) - end + case py_import:all_paths() of + [] -> ok; + Paths -> py_nif:interp_apply_paths(Ref, Paths) end. %% @private diff --git a/src/py_import.erl b/src/py_import.erl new file mode 100644 index 0000000..8fcaaf2 --- /dev/null +++ b/src/py_import.erl @@ -0,0 +1,388 @@ +%% Copyright 2026 Benoit Chesneau +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. + +%%% @doc Import and path registry for Python interpreters. +%%% +%%% This module manages the global import and path registries that are +%%% applied to all Python interpreters. When new interpreters are created, +%%% they automatically get all registered imports and paths applied. +%%% +%%% == Examples == +%%% +%%% ``` +%%% %% Register modules for import in all interpreters +%%% ok = py_import:ensure_imported(json). +%%% ok = py_import:ensure_imported(math, sqrt). +%%% +%%% %% Add paths to sys.path in all interpreters +%%% ok = py_import:add_path("/path/to/my/modules"). +%%% +%%% %% Check registry contents +%%% Imports = py_import:all_imports(). +%%% Paths = py_import:all_paths(). +%%% ''' +-module(py_import). + +-export([ + init/0, + %% Import registry + ensure_imported/1, + ensure_imported/2, + is_imported/1, + is_imported/2, + all_imports/0, + clear_imports/0, + import_stats/0, + import_list/0, + %% Path registry + add_path/1, + add_paths/1, + all_paths/0, + clear_paths/0, + is_path_added/1 +]). + +-type py_module() :: atom() | binary() | string(). +-type py_func() :: atom() | binary() | string(). + +-export_type([py_module/0, py_func/0]). + +%% ETS table for global import registry +-define(IMPORT_REGISTRY, py_import_registry). + +%% ETS table for global path registry (sys.path additions) +-define(PATH_REGISTRY, py_path_registry). + +%%% ============================================================================ +%%% Initialization +%%% ============================================================================ + +%% @doc Initialize the import and path registry ETS tables. +%% +%% This is called automatically during application startup. +%% Safe to call multiple times - does nothing if already initialized. +%% +%% @returns ok +-spec init() -> ok. +init() -> + case ets:info(?IMPORT_REGISTRY) of + undefined -> + %% Use bag type to allow multiple entries with same module name + %% e.g., {<<"json">>, all} and {<<"json">>, <<"dumps">>} + ets:new(?IMPORT_REGISTRY, [bag, public, named_table]), + ok; + _ -> + ok + end, + case ets:info(?PATH_REGISTRY) of + undefined -> + %% Use set type - paths are unique, ordered by insertion + ets:new(?PATH_REGISTRY, [ordered_set, public, named_table]), + ok; + _ -> + ok + end. + +%%% ============================================================================ +%%% Module Import Registry +%%% ============================================================================ + +%% @doc Register a module for import in all interpreters. +%% +%% Adds the module to the global import registry. When new interpreters +%% are created, they will automatically import all registered modules. +%% The module will be imported lazily when first used. +%% +%% The `__main__' module is never cached. +%% +%% Example: +%% ``` +%% ok = py_import:ensure_imported(json), +%% {ok, Result} = py:call(json, dumps, [Data]). %% Module imported on first use +%% ''' +%% +%% @param Module Python module name +%% @returns ok | {error, Reason} +-spec ensure_imported(py_module()) -> ok | {error, term()}. +ensure_imported(Module) -> + ModuleBin = ensure_binary(Module), + %% Reject __main__ + case ModuleBin of + <<"__main__">> -> + {error, main_not_cacheable}; + _ -> + %% Add to global registry - module will be imported lazily + case ets:info(?IMPORT_REGISTRY) of + undefined -> ok; + _ -> ets:insert(?IMPORT_REGISTRY, {ModuleBin, all}) + end, + ok + end. + +%% @doc Register a module/function for import in all interpreters. +%% +%% Adds the module/function to the global import registry. When new +%% interpreters are created, they will automatically import the module. +%% The module will be imported lazily when first used. +%% +%% The `__main__' module is never cached. +%% +%% Example: +%% ``` +%% ok = py_import:ensure_imported(json, dumps), +%% {ok, Result} = py:call(json, dumps, [Data]). %% Module imported on first use +%% ''' +%% +%% @param Module Python module name +%% @param Func Function name to register +%% @returns ok | {error, Reason} +-spec ensure_imported(py_module(), py_func()) -> ok | {error, term()}. +ensure_imported(Module, Func) -> + ModuleBin = ensure_binary(Module), + FuncBin = ensure_binary(Func), + %% Reject __main__ + case ModuleBin of + <<"__main__">> -> + {error, main_not_cacheable}; + _ -> + %% Add to global registry - module will be imported lazily + case ets:info(?IMPORT_REGISTRY) of + undefined -> ok; + _ -> ets:insert(?IMPORT_REGISTRY, {ModuleBin, FuncBin}) + end, + ok + end. + +%% @doc Check if a module is registered in the import registry. +%% +%% @param Module Python module name +%% @returns true if module is registered, false otherwise +-spec is_imported(py_module()) -> boolean(). +is_imported(Module) -> + ModuleBin = ensure_binary(Module), + case ets:info(?IMPORT_REGISTRY) of + undefined -> false; + _ -> ets:member(?IMPORT_REGISTRY, ModuleBin) + end. + +%% @doc Check if a module/function is registered in the import registry. +%% +%% @param Module Python module name +%% @param Func Function name +%% @returns true if module/function is registered, false otherwise +-spec is_imported(py_module(), py_func()) -> boolean(). +is_imported(Module, Func) -> + ModuleBin = ensure_binary(Module), + FuncBin = ensure_binary(Func), + case ets:info(?IMPORT_REGISTRY) of + undefined -> false; + _ -> + case ets:lookup(?IMPORT_REGISTRY, ModuleBin) of + [{_, all}] -> true; + [{_, FuncBin}] -> true; + _ -> false + end + end. + +%% @doc Get all registered imports from the global registry. +%% +%% Returns a list of {Module, Func | all} tuples representing all +%% modules/functions registered for automatic import. +%% +%% Example: +%% ``` +%% ok = py_import:ensure_imported(json), +%% ok = py_import:ensure_imported(math, sqrt), +%% [{<<"json">>, all}, {<<"math">>, <<"sqrt">>}] = py_import:all_imports(). +%% ''' +%% +%% @returns List of {Module, Func | all} tuples +-spec all_imports() -> [{binary(), binary() | all}]. +all_imports() -> + case ets:info(?IMPORT_REGISTRY) of + undefined -> []; + _ -> ets:tab2list(?IMPORT_REGISTRY) + end. + +%% @doc Clear all registered imports from the global registry. +%% +%% Removes all entries from the registry. +%% Does not affect already-running interpreters. +%% +%% @returns ok +-spec clear_imports() -> ok. +clear_imports() -> + case ets:info(?IMPORT_REGISTRY) of + undefined -> ok; + _ -> ets:delete_all_objects(?IMPORT_REGISTRY) + end, + ok. + +%% @doc Get import registry statistics. +%% +%% Returns a map with the count of registered imports. +%% +%% Example: +%% ``` +%% {ok, #{count => 5}} = py_import:import_stats(). +%% ''' +%% +%% @returns {ok, Stats} where Stats is a map with registry metrics +-spec import_stats() -> {ok, map()} | {error, term()}. +import_stats() -> + Count = case ets:info(?IMPORT_REGISTRY) of + undefined -> 0; + _ -> ets:info(?IMPORT_REGISTRY, size) + end, + {ok, #{count => Count}}. + +%% @doc List all registered imports. +%% +%% Returns a map of modules to their registered functions. +%% Module names are binary keys, function lists are the values. +%% An empty list means only the module is registered (no specific functions). +%% +%% Example: +%% ``` +%% ok = py_import:ensure_imported(json), +%% ok = py_import:ensure_imported(json, dumps), +%% ok = py_import:ensure_imported(json, loads), +%% ok = py_import:ensure_imported(math), +%% {ok, #{<<"json">> => [<<"dumps">>, <<"loads">>], +%% <<"math">> => []}} = py_import:import_list(). +%% ''' +%% +%% @returns {ok, #{Module => [Func]}} map of modules to functions +-spec import_list() -> {ok, #{binary() => [binary()]}} | {error, term()}. +import_list() -> + Imports = all_imports(), + %% Group by module + Map = lists:foldl(fun({Module, FuncOrAll}, Acc) -> + Existing = maps:get(Module, Acc, []), + case FuncOrAll of + all -> + %% Module-level import, don't add to function list + maps:put(Module, Existing, Acc); + Func -> + %% Function-level import + maps:put(Module, [Func | Existing], Acc) + end + end, #{}, Imports), + {ok, Map}. + +%%% ============================================================================ +%%% Path Registry (sys.path additions) +%%% ============================================================================ + +%% @doc Add a path to sys.path in all interpreters. +%% +%% Adds the path to the global path registry. When new interpreters +%% are created, they will automatically have this path in sys.path. +%% The path is inserted at the beginning of sys.path to take precedence. +%% +%% Example: +%% ``` +%% ok = py_import:add_path("/path/to/my/modules"), +%% {ok, Result} = py:call(mymodule, myfunc, []). +%% ''' +%% +%% @param Path Directory path to add (string, binary, or atom) +%% @returns ok +-spec add_path(string() | binary() | atom()) -> ok. +add_path(Path) -> + PathBin = ensure_binary(Path), + case ets:info(?PATH_REGISTRY) of + undefined -> ok; + _ -> + %% Use monotonic time as key to preserve insertion order + Key = erlang:monotonic_time(), + ets:insert(?PATH_REGISTRY, {Key, PathBin}) + end, + ok. + +%% @doc Add multiple paths to sys.path in all interpreters. +%% +%% Adds all paths to the global path registry. Paths are added in order, +%% so the first path in the list will be first in sys.path. +%% +%% Example: +%% ``` +%% ok = py_import:add_paths(["/path/to/lib1", "/path/to/lib2"]), +%% {ok, Result} = py:call(mymodule, myfunc, []). +%% ''' +%% +%% @param Paths List of directory paths to add +%% @returns ok +-spec add_paths([string() | binary() | atom()]) -> ok. +add_paths(Paths) when is_list(Paths) -> + lists:foreach(fun add_path/1, Paths), + ok. + +%% @doc Get all registered paths from the global registry. +%% +%% Returns a list of paths in the order they were added. +%% +%% Example: +%% ``` +%% ok = py_import:add_path("/path/to/modules"), +%% [<<"/path/to/modules">>] = py_import:all_paths(). +%% ''' +%% +%% @returns List of paths as binaries +-spec all_paths() -> [binary()]. +all_paths() -> + case ets:info(?PATH_REGISTRY) of + undefined -> []; + _ -> + %% ordered_set returns in key order (monotonic time = insertion order) + [Path || {_Key, Path} <- ets:tab2list(?PATH_REGISTRY)] + end. + +%% @doc Clear all registered paths from the global registry. +%% +%% Removes all entries from the path registry. +%% Does not affect already-running interpreters. +%% +%% @returns ok +-spec clear_paths() -> ok. +clear_paths() -> + case ets:info(?PATH_REGISTRY) of + undefined -> ok; + _ -> ets:delete_all_objects(?PATH_REGISTRY) + end, + ok. + +%% @doc Check if a path is registered in the path registry. +%% +%% @param Path Directory path to check +%% @returns true if path is registered, false otherwise +-spec is_path_added(string() | binary() | atom()) -> boolean(). +is_path_added(Path) -> + PathBin = ensure_binary(Path), + case ets:info(?PATH_REGISTRY) of + undefined -> false; + _ -> + case ets:match(?PATH_REGISTRY, {'_', PathBin}) of + [] -> false; + _ -> true + end + end. + +%%% ============================================================================ +%%% Internal functions +%%% ============================================================================ + +%% @private +ensure_binary(S) -> + py_util:to_binary(S). diff --git a/test/py_import_SUITE.erl b/test/py_import_SUITE.erl index bfdd17f..45adf17 100644 --- a/test/py_import_SUITE.erl +++ b/test/py_import_SUITE.erl @@ -1,4 +1,4 @@ -%%% @doc Test suite for py:ensure_imported/1,2 +%%% @doc Test suite for py_import:ensure_imported/1,2 -module(py_import_SUITE). -include_lib("common_test/include/ct.hrl"). @@ -84,7 +84,7 @@ init_per_suite(Config) -> end_per_suite(_Config) -> %% Clean up imports to avoid affecting subsequent test suites - py:clear_imports(), + py_import:clear_imports(), ok. init_per_group(_Group, Config) -> @@ -95,7 +95,7 @@ end_per_group(_Group, _Config) -> init_per_testcase(_TestCase, Config) -> %% Flush imports before each test for clean state - py:clear_imports(), + py_import:clear_imports(), Config. end_per_testcase(_TestCase, _Config) -> @@ -104,40 +104,40 @@ end_per_testcase(_TestCase, _Config) -> %% @doc Test importing a module import_module_test(_Config) -> %% Import json module - ok = py:ensure_imported(json), + ok = py_import:ensure_imported(json), %% Verify it works by calling a function {ok, Result} = py:call(json, dumps, [[1, 2, 3]]), ?assertEqual(<<"[1, 2, 3]">>, Result), %% Import with binary name - ok = py:ensure_imported(<<"math">>), + ok = py_import:ensure_imported(<<"math">>), {ok, Pi} = py:call(math, sqrt, [4.0]), ?assertEqual(2.0, Pi). %% @doc Test importing a specific function import_function_test(_Config) -> %% Import json.dumps - ok = py:ensure_imported(json, dumps), + ok = py_import:ensure_imported(json, dumps), %% Verify it works {ok, Result} = py:call(json, dumps, [#{a => 1}]), ?assert(is_binary(Result)), %% Import with binary names - ok = py:ensure_imported(<<"os">>, <<"getcwd">>), + ok = py_import:ensure_imported(<<"os">>, <<"getcwd">>), {ok, Cwd} = py:call(os, getcwd, []), ?assert(is_binary(Cwd)). %% @doc Test that __main__ cannot be imported import_main_rejected_test(_Config) -> %% __main__ should be rejected - {error, main_not_cacheable} = py:ensure_imported('__main__'), - {error, main_not_cacheable} = py:ensure_imported(<<"__main__">>), + {error, main_not_cacheable} = py_import:ensure_imported('__main__'), + {error, main_not_cacheable} = py_import:ensure_imported(<<"__main__">>), %% Also for function import - {error, main_not_cacheable} = py:ensure_imported('__main__', some_func), - {error, main_not_cacheable} = py:ensure_imported(<<"__main__">>, <<"some_func">>). + {error, main_not_cacheable} = py_import:ensure_imported('__main__', some_func), + {error, main_not_cacheable} = py_import:ensure_imported(<<"__main__">>, <<"some_func">>). %% @doc Test importing nonexistent module - registry accepts it but call fails %% @@ -145,7 +145,7 @@ import_main_rejected_test(_Config) -> %% when trying to use the module. import_nonexistent_module_test(_Config) -> %% Import succeeds (just adds to registry) - ok = py:ensure_imported(nonexistent_module_xyz), + ok = py_import:ensure_imported(nonexistent_module_xyz), %% But trying to use it fails %% Error format is {error, {ExceptionType, Message}} or {error, atom()} @@ -162,7 +162,7 @@ import_nonexistent_module_test(_Config) -> import_nonexistent_function_test(_Config) -> %% Module exists but function doesn't - import still succeeds %% because we're importing the MODULE, not the function - ok = py:ensure_imported(json, nonexistent_function_xyz), + ok = py_import:ensure_imported(json, nonexistent_function_xyz), %% The module is imported and usable {ok, _} = py:call(json, dumps, [[1, 2, 3]]), @@ -175,13 +175,13 @@ import_nonexistent_function_test(_Config) -> %% @doc Test that importing same module/function twice is idempotent import_idempotent_test(_Config) -> %% Import multiple times - should all succeed - ok = py:ensure_imported(json), - ok = py:ensure_imported(json), - ok = py:ensure_imported(json), + ok = py_import:ensure_imported(json), + ok = py_import:ensure_imported(json), + ok = py_import:ensure_imported(json), - ok = py:ensure_imported(json, dumps), - ok = py:ensure_imported(json, dumps), - ok = py:ensure_imported(json, dumps), + ok = py_import:ensure_imported(json, dumps), + ok = py_import:ensure_imported(json, dumps), + ok = py_import:ensure_imported(json, dumps), %% Still works {ok, _} = py:call(json, dumps, [[1]]). @@ -189,48 +189,48 @@ import_idempotent_test(_Config) -> %% @doc Test import stats import_stats_test(_Config) -> %% Start fresh - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Check empty stats - {ok, Stats0} = py:import_stats(), + {ok, Stats0} = py_import:import_stats(), ?assertEqual(0, maps:get(count, Stats0, 0)), %% Import some modules - ok = py:ensure_imported(json), - ok = py:ensure_imported(math), - ok = py:ensure_imported(os), + ok = py_import:ensure_imported(json), + ok = py_import:ensure_imported(math), + ok = py_import:ensure_imported(os), %% Check stats - {ok, Stats1} = py:import_stats(), + {ok, Stats1} = py_import:import_stats(), Count1 = maps:get(count, Stats1, 0), ?assertEqual(3, Count1), %% Import functions - ok = py:ensure_imported(json, dumps), - ok = py:ensure_imported(json, loads), + ok = py_import:ensure_imported(json, dumps), + ok = py_import:ensure_imported(json, loads), %% Check updated stats (3 modules + 2 functions = 5) - {ok, Stats2} = py:import_stats(), + {ok, Stats2} = py_import:import_stats(), Count2 = maps:get(count, Stats2, 0), ?assertEqual(5, Count2). %% @doc Test listing imports import_list_test(_Config) -> %% Start fresh - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Empty map - {ok, Map0} = py:import_list(), + {ok, Map0} = py_import:import_list(), ?assertEqual(#{}, Map0), %% Import some modules and functions - ok = py:ensure_imported(json), - ok = py:ensure_imported(math), - ok = py:ensure_imported(json, dumps), - ok = py:ensure_imported(json, loads), + ok = py_import:ensure_imported(json), + ok = py_import:ensure_imported(math), + ok = py_import:ensure_imported(json, dumps), + ok = py_import:ensure_imported(json, loads), %% Get map - {ok, Map1} = py:import_list(), + {ok, Map1} = py_import:import_list(), %% Check structure: should have json and math as keys ?assert(maps:is_key(<<"json">>, Map1)), @@ -251,7 +251,7 @@ import_list_test(_Config) -> %% @doc Test that pre-importing speeds up subsequent calls import_speeds_up_calls_test(_Config) -> %% Flush to ensure cold start - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Time a cold call (module not imported) %% Using json.dumps since hashlib.md5 needs bytes encoding @@ -260,8 +260,8 @@ import_speeds_up_calls_test(_Config) -> end), %% Pre-import the module and function - ok = py:ensure_imported(json), - ok = py:ensure_imported(json, dumps), + ok = py_import:ensure_imported(json), + ok = py_import:ensure_imported(json, dumps), %% Time a warm call (module already imported) {WarmTime, {ok, _}} = timer:tc(fun() -> @@ -282,31 +282,31 @@ import_multiprocess_test(_Config) -> Parent = self(), %% Clear registry first - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Spawn 3 processes, each importing different modules %% They all contribute to the same global registry Pid1 = spawn_link(fun() -> - ok = py:ensure_imported(json), - ok = py:ensure_imported(json, dumps), + ok = py_import:ensure_imported(json), + ok = py_import:ensure_imported(json, dumps), %% Verify we can use the import {ok, _} = py:call(json, dumps, [[1,2,3]]), Parent ! {self(), done} end), Pid2 = spawn_link(fun() -> - ok = py:ensure_imported(math), - ok = py:ensure_imported(math, sqrt), - ok = py:ensure_imported(math, floor), + ok = py_import:ensure_imported(math), + ok = py_import:ensure_imported(math, sqrt), + ok = py_import:ensure_imported(math, floor), %% Verify we can use the import {ok, _} = py:call(math, sqrt, [16.0]), Parent ! {self(), done} end), Pid3 = spawn_link(fun() -> - ok = py:ensure_imported(os), - ok = py:ensure_imported(os, getcwd), - ok = py:ensure_imported(string), + ok = py_import:ensure_imported(os), + ok = py_import:ensure_imported(os, getcwd), + ok = py_import:ensure_imported(string), %% Verify we can use the import {ok, _} = py:call(os, getcwd, []), Parent ! {self(), done} @@ -323,8 +323,8 @@ import_multiprocess_test(_Config) -> ?assertEqual([done, done, done], Results), %% Now verify the GLOBAL registry has all entries - {ok, Stats} = py:import_stats(), - {ok, List} = py:import_list(), + {ok, Stats} = py_import:import_stats(), + {ok, List} = py_import:import_list(), %% Total entries: json, json.dumps, math, math.sqrt, math.floor, os, os.getcwd, string = 8 ?assertEqual(8, maps:get(count, Stats)), @@ -353,13 +353,13 @@ import_concurrent_stress_test(_Config) -> Modules = [json, math, os, string, re, base64, collections, functools, itertools, operator], %% Clear registry first - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Spawn many processes that all try to import at the same time Pids = [spawn_link(fun() -> %% Each process imports a random subset of modules MyModules = lists:sublist(Modules, 1 + (N rem length(Modules))), - Results = [{M, py:ensure_imported(M)} || M <- MyModules], + Results = [{M, py_import:ensure_imported(M)} || M <- MyModules], %% All imports should succeed AllOk = lists:all(fun({_, R}) -> R =:= ok end, Results), @@ -383,7 +383,7 @@ import_concurrent_stress_test(_Config) -> end, Results), %% Verify the global registry has all modules - {ok, Stats} = py:import_stats(), + {ok, Stats} = py_import:import_stats(), Count = maps:get(count, Stats), %% Should have all 10 modules (some may have been imported multiple times but ETS dedupes) ?assertEqual(10, Count), @@ -397,23 +397,23 @@ import_concurrent_stress_test(_Config) -> %% @doc Test that imports are added to the global registry import_registry_test(_Config) -> %% Clear any existing registry entries - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Verify registry is empty - [] = py:all_imports(), + [] = py_import:all_imports(), %% Import a module - ok = py:ensure_imported(json), + ok = py_import:ensure_imported(json), %% Verify it's in the registry - Imports1 = py:all_imports(), + Imports1 = py_import:all_imports(), ?assert(lists:member({<<"json">>, all}, Imports1)), %% Import a function - ok = py:ensure_imported(math, sqrt), + ok = py_import:ensure_imported(math, sqrt), %% Verify both are in the registry - Imports2 = py:all_imports(), + Imports2 = py_import:all_imports(), ?assert(lists:member({<<"json">>, all}, Imports2)), ?assert(lists:member({<<"math">>, <<"sqrt">>}, Imports2)), @@ -422,8 +422,8 @@ import_registry_test(_Config) -> %% @doc Test that imports are automatically applied to new contexts import_applied_to_new_context_test(_Config) -> %% Clear and add an import - ok = py:clear_imports(), - ok = py:ensure_imported(json), + ok = py_import:clear_imports(), + ok = py_import:ensure_imported(json), %% Create a new context {ok, Ctx} = py_context:new(#{mode => auto}), @@ -435,35 +435,35 @@ import_applied_to_new_context_test(_Config) -> %% Clean up py_context:destroy(Ctx), - ok = py:clear_imports(). + ok = py_import:clear_imports(). %% @doc Test clearing all imports from the registry clear_imports_test(_Config) -> %% Add some imports - ok = py:ensure_imported(json), - ok = py:ensure_imported(math), - ok = py:ensure_imported(os), + ok = py_import:ensure_imported(json), + ok = py_import:ensure_imported(math), + ok = py_import:ensure_imported(os), %% Verify they're in the registry - Imports1 = py:all_imports(), + Imports1 = py_import:all_imports(), ?assert(length(Imports1) >= 3), %% Clear all - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Verify registry is empty - Imports2 = py:all_imports(), + Imports2 = py_import:all_imports(), ?assertEqual([], Imports2). %% @doc Test get_imports returns the correct format get_imports_test(_Config) -> %% Clear and add imports - ok = py:clear_imports(), - ok = py:ensure_imported(json), - ok = py:ensure_imported(math, sqrt), + ok = py_import:clear_imports(), + ok = py_import:ensure_imported(json), + ok = py_import:ensure_imported(math, sqrt), %% Get imports - Imports = py:all_imports(), + Imports = py_import:all_imports(), %% Verify format ?assert(is_list(Imports)), @@ -488,7 +488,7 @@ get_imports_test(_Config) -> %% see the module in sys.modules. shared_interpreter_import_test(_Config) -> %% Clear registry - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Create two worker-mode contexts (they share the main interpreter) {ok, Ctx1} = py_context:new(#{mode => worker}), @@ -514,17 +514,17 @@ shared_interpreter_import_test(_Config) -> %% Event loop pool workers using the main interpreter should see these imports. event_loop_pool_import_test(_Config) -> %% Clear registry - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Import via py:import (goes to event loop pool's interpreter) - ok = py:ensure_imported(collections), + ok = py_import:ensure_imported(collections), %% Verify we can use it via py:call (uses event loop pool) {ok, Result} = py:call(collections, 'Counter', [[a, b, a, c, a, b]]), ?assert(is_map(Result) orelse is_tuple(Result)), %% Import another module - ok = py:ensure_imported(itertools), + ok = py_import:ensure_imported(itertools), %% Use it {ok, _} = py:call(itertools, chain, [[[1, 2], [3, 4]]]), @@ -537,10 +537,10 @@ event_loop_pool_import_test(_Config) -> %% to use them since they're in the interpreter's sys.modules. spawn_task_uses_import_test(_Config) -> %% Clear registry - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Import base64 module - ok = py:ensure_imported(base64), + ok = py_import:ensure_imported(base64), %% Define a simple function that uses base64 Code = <<" @@ -574,7 +574,7 @@ subinterp_isolation_test(_Config) -> {skip, "Subinterpreters not supported"}; true -> %% Clear registry so new contexts don't get pre-imported modules - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Create two OWN_GIL contexts (each has its own interpreter) {ok, Ctx1} = py_context:new(#{mode => owngil}), @@ -612,8 +612,8 @@ registry_applied_to_subinterp_test(_Config) -> {skip, "Subinterpreters not supported"}; true -> %% Clear registry and add an import - ok = py:clear_imports(), - ok = py:ensure_imported(uuid), + ok = py_import:clear_imports(), + ok = py_import:ensure_imported(uuid), %% Create a new subinterp context {ok, Ctx} = py_context:new(#{mode => subinterp}), @@ -624,7 +624,7 @@ registry_applied_to_subinterp_test(_Config) -> %% Clean up py_context:destroy(Ctx), - ok = py:clear_imports(), + ok = py_import:clear_imports(), ct:pal("Registry imports successfully applied to new subinterpreter") end. @@ -640,10 +640,10 @@ registry_applied_to_subinterp_test(_Config) -> %% a function from the module works (which requires it to be imported). import_in_sys_modules_test(_Config) -> %% Clear registry - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Import a module - ok = py:ensure_imported(decimal), + ok = py_import:ensure_imported(decimal), %% Verify the import worked by calling a function {ok, _} = py:call(decimal, 'Decimal', [<<"3.14">>]), @@ -665,14 +665,14 @@ _test_decimal_in_sys = 'decimal' in sys.modules %% contains the actual imported modules. registry_import_in_sys_modules_test(_Config) -> %% Clear registry - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Add to registry and import - ok = py:ensure_imported(fractions), - ok = py:ensure_imported(statistics), + ok = py_import:ensure_imported(fractions), + ok = py_import:ensure_imported(statistics), %% Verify ETS registry has the entries - Registry = py:all_imports(), + Registry = py_import:all_imports(), ?assert(lists:member({<<"fractions">>, all}, Registry)), ?assert(lists:member({<<"statistics">>, all}, Registry)), @@ -706,7 +706,7 @@ _sys_modules_keys = list(sys.modules.keys()) %% the interpreter's sys.modules. context_import_in_sys_modules_test(_Config) -> %% Clear registry - ok = py:clear_imports(), + ok = py_import:clear_imports(), %% Create a context {ok, Ctx} = py_context:new(#{mode => auto}), diff --git a/test/py_import_owngil_SUITE.erl b/test/py_import_owngil_SUITE.erl index 81ce4c7..3186658 100644 --- a/test/py_import_owngil_SUITE.erl +++ b/test/py_import_owngil_SUITE.erl @@ -1,4 +1,4 @@ -%%% @doc Test suite for py:import with OWN_GIL subinterpreters. +%%% @doc Test suite for py_import with OWN_GIL subinterpreters. %%% %%% Tests the import caching functionality with Python 3.12+ OWN_GIL mode, %%% which creates dedicated pthreads with independent Python GILs. @@ -67,7 +67,7 @@ init_per_suite(Config) -> timer:sleep(500), %% Clear any imports from previous test suites to avoid %% importing C extensions that crash in OWN_GIL subinterpreters - py:clear_imports(), + py_import:clear_imports(), %% Then check if subinterpreters are supported case py_nif:subinterp_supported() of true -> From 13e59be72d527e4df8f441068462495288dd3c8f Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 07:51:42 +0100 Subject: [PATCH 09/18] Fix ASAN crash by avoiding decimal module in import tests The _decimal C extension has global state (mpd_setminalloc) that cannot handle being loaded in multiple interpreters. When the import registry contained decimal, it would be applied to subinterpreters causing memory corruption detected by AddressSanitizer. Replace decimal with textwrap (pure Python) in import_in_sys_modules_test. --- test/py_import_SUITE.erl | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/test/py_import_SUITE.erl b/test/py_import_SUITE.erl index 45adf17..3d9a2f2 100644 --- a/test/py_import_SUITE.erl +++ b/test/py_import_SUITE.erl @@ -638,26 +638,30 @@ registry_applied_to_subinterp_test(_Config) -> %% After calling py:import, the module should be in the interpreter's %% sys.modules dictionary. We verify this by checking that calling %% a function from the module works (which requires it to be imported). +%% +%% Note: We use textwrap (pure Python) instead of decimal because the +%% _decimal C extension has global state that crashes in subinterpreters. import_in_sys_modules_test(_Config) -> %% Clear registry ok = py_import:clear_imports(), - %% Import a module - ok = py_import:ensure_imported(decimal), + %% Import a pure Python module (avoid C extensions like decimal + %% which have global state that crashes in subinterpreters) + ok = py_import:ensure_imported(textwrap), %% Verify the import worked by calling a function - {ok, _} = py:call(decimal, 'Decimal', [<<"3.14">>]), + {ok, _} = py:call(textwrap, fill, [<<"Hello world">>, 5]), %% Now check sys.modules using the same process (important!) %% We use exec to define a helper, then eval to check ok = py:exec(<<" import sys -_test_decimal_in_sys = 'decimal' in sys.modules +_test_textwrap_in_sys = 'textwrap' in sys.modules ">>), - {ok, InSysModules} = py:eval(<<"_test_decimal_in_sys">>), + {ok, InSysModules} = py:eval(<<"_test_textwrap_in_sys">>), ?assertEqual(true, InSysModules), - ct:pal("py:import correctly adds module to sys.modules"). + ct:pal("py_import:ensure_imported correctly adds module to sys.modules"). %% @doc Test that ETS registry and sys.modules stay in sync %% From bd55baf590e623fd4e51541734aeb1195a4ebc19 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 07:58:16 +0100 Subject: [PATCH 10/18] Remove dead subinterpreter pool staleness code Remove usage_count, marked_stale, generation tracking and related functions that were designed for the removed flush_imports feature. Since module unloading is not supported, this complexity is no longer needed. Removed: - subinterp_pool_acquire/release functions - subinterp_pool_flush_generation/get_generation functions - usage_count, marked_stale, generation fields from subinterp_slot_t - g_import_generation global Also fix ASAN crash by using textwrap instead of decimal in tests, since _decimal C extension has global state that crashes in subinterpreters. --- c_src/py_nif.c | 14 +-- c_src/py_subinterp_pool.c | 195 -------------------------------------- c_src/py_subinterp_pool.h | 46 --------- 3 files changed, 1 insertion(+), 254 deletions(-) diff --git a/c_src/py_nif.c b/c_src/py_nif.c index 7511bd7..b33c643 100644 --- a/c_src/py_nif.c +++ b/c_src/py_nif.c @@ -441,9 +441,6 @@ static void context_destructor(ErlNifEnv *env, void *obj) { ctx->globals = NULL; ctx->locals = NULL; - /* Release usage count - if slot is stale and count drops to 0, - * the subinterpreter will be destroyed automatically */ - subinterp_pool_release(ctx->pool_slot); subinterp_pool_free(ctx->pool_slot); ctx->pool_slot = -1; ctx->destroyed = true; @@ -4194,13 +4191,9 @@ static ERL_NIF_TERM nif_context_create(ErlNifEnv *env, int argc, const ERL_NIF_T ctx->pool_slot = slot; - /* Acquire usage count for the pool slot */ - subinterp_pool_acquire(slot); - /* Get the pool slot for interpreter access */ subinterp_slot_t *pool_slot = subinterp_pool_get(slot); if (pool_slot == NULL || !pool_slot->initialized) { - subinterp_pool_release(slot); /* Release usage count */ subinterp_pool_free(slot); close(ctx->callback_pipe[0]); close(ctx->callback_pipe[1]); @@ -4224,7 +4217,6 @@ static ERL_NIF_TERM nif_context_create(ErlNifEnv *env, int argc, const ERL_NIF_T Py_XDECREF(ctx->module_cache); PyThreadState_Swap(saved); PyGILState_Release(gstate); - subinterp_pool_release(slot); /* Release usage count */ subinterp_pool_free(slot); close(ctx->callback_pipe[0]); close(ctx->callback_pipe[1]); @@ -4355,11 +4347,7 @@ static ERL_NIF_TERM nif_context_destroy(ErlNifEnv *env, int argc, const ERL_NIF_ ctx->locals = NULL; ctx->module_cache = NULL; - /* Release the pool slot back to the pool. - * subinterp_pool_release decrements usage count and may destroy - * the subinterpreter if it's marked stale and count drops to 0. - * subinterp_pool_free marks the slot as available for new contexts. */ - subinterp_pool_release(ctx->pool_slot); + /* Release the pool slot back to the pool */ subinterp_pool_free(ctx->pool_slot); ctx->pool_slot = -1; diff --git a/c_src/py_subinterp_pool.c b/c_src/py_subinterp_pool.c index 9431948..41b9875 100644 --- a/c_src/py_subinterp_pool.c +++ b/c_src/py_subinterp_pool.c @@ -47,9 +47,6 @@ static int g_pool_size = 0; /** @brief Pool initialized flag */ static _Atomic bool g_pool_initialized = false; -/** @brief Global import generation counter */ -static _Atomic uint64_t g_import_generation = 0; - /* ============================================================================ * Forward declarations for functions defined in other modules * ============================================================================ */ @@ -199,9 +196,6 @@ int subinterp_pool_init(int size) { } slot->initialized = true; - atomic_store(&slot->usage_count, 0); - atomic_store(&slot->marked_stale, false); - slot->generation = atomic_load(&g_import_generation); /* Swap back to main thread state before creating next subinterpreter */ PyThreadState_Swap(main_tstate); @@ -351,193 +345,4 @@ bool subinterp_pool_is_initialized(void) { return atomic_load(&g_pool_initialized); } -void subinterp_pool_acquire(int slot) { - if (slot < 0 || slot >= g_pool_size) { - return; - } - - subinterp_slot_t *s = &g_subinterp_pool[slot]; - if (!s->initialized) { - return; - } - - atomic_fetch_add(&s->usage_count, 1); -} - -/** - * @brief Reinitialize a pool slot with a fresh subinterpreter - * - * Called after destroying a stale subinterpreter to create a fresh one. - * Assumes GIL is already held. - * - * @param slot_idx Slot index - * @return 0 on success, -1 on failure - */ -static int subinterp_pool_reinit_slot(int slot_idx) { - subinterp_slot_t *slot = &g_subinterp_pool[slot_idx]; - - /* Save main thread state */ - PyThreadState *main_tstate = PyThreadState_Get(); - - /* Configure subinterpreter with SHARED GIL */ - PyInterpreterConfig config = { - .use_main_obmalloc = 0, - .allow_fork = 0, - .allow_exec = 0, - .allow_threads = 1, - .allow_daemon_threads = 0, - .check_multi_interp_extensions = 1, - .gil = PyInterpreterConfig_SHARED_GIL, - }; - - PyThreadState *tstate = NULL; - PyStatus status = Py_NewInterpreterFromConfig(&tstate, &config); - - if (PyStatus_Exception(status) || tstate == NULL) { - fprintf(stderr, "subinterp_pool_reinit_slot: failed to create subinterp for slot %d\n", slot_idx); - PyThreadState_Swap(main_tstate); - return -1; - } - - /* tstate is now the current thread state for the new interpreter */ - slot->interp = PyThreadState_GetInterpreter(tstate); - slot->tstate = tstate; - - /* Initialize globals/locals */ - slot->globals = PyDict_New(); - slot->locals = PyDict_New(); - slot->module_cache = PyDict_New(); - - if (slot->globals == NULL || slot->locals == NULL || slot->module_cache == NULL) { - Py_XDECREF(slot->module_cache); - Py_XDECREF(slot->globals); - Py_XDECREF(slot->locals); - Py_EndInterpreter(tstate); - PyThreadState_Swap(main_tstate); - return -1; - } - - /* Import __builtins__ into globals */ - PyObject *builtins = PyEval_GetBuiltins(); - PyDict_SetItemString(slot->globals, "__builtins__", builtins); - - /* Create erlang module in this subinterpreter */ - if (create_erlang_module() >= 0) { - /* Register ReactorBuffer and PyBuffer */ - ReactorBuffer_register_with_reactor(); - PyBuffer_register_with_module(); - - /* Import erlang module into globals */ - PyObject *erlang_module = PyImport_ImportModule("erlang"); - if (erlang_module != NULL) { - PyDict_SetItemString(slot->globals, "erlang", erlang_module); - Py_DECREF(erlang_module); - } - } - - /* Initialize event loop for this subinterpreter */ - init_subinterpreter_event_loop(NULL); - - slot->initialized = true; - atomic_store(&slot->usage_count, 0); - atomic_store(&slot->marked_stale, false); - slot->generation = atomic_load(&g_import_generation); - - /* Swap back to main thread state */ - PyThreadState_Swap(main_tstate); - -#ifdef DEBUG - fprintf(stderr, "subinterp_pool_reinit_slot: reinitialized slot %d\n", slot_idx); -#endif - - return 0; -} - -void subinterp_pool_release(int slot) { - if (slot < 0 || slot >= g_pool_size) { - return; - } - - subinterp_slot_t *s = &g_subinterp_pool[slot]; - if (!s->initialized) { - return; - } - - int prev_count = atomic_fetch_sub(&s->usage_count, 1); - - /* If usage drops to 0 and slot is marked stale, destroy and reinitialize - * the subinterpreter. We need the GIL to safely do Python operations. */ - if (prev_count == 1 && atomic_load(&s->marked_stale)) { - /* Acquire the GIL */ - PyGILState_STATE gstate = PyGILState_Ensure(); - - /* Save current thread state and swap to the subinterpreter */ - PyThreadState *saved = PyThreadState_Swap(s->tstate); - - /* Clean up Python objects */ - Py_XDECREF(s->module_cache); - Py_XDECREF(s->globals); - Py_XDECREF(s->locals); - - /* End the interpreter - this frees s->tstate */ - Py_EndInterpreter(s->tstate); - - /* Clear slot state */ - s->interp = NULL; - s->tstate = NULL; - s->globals = NULL; - s->locals = NULL; - s->module_cache = NULL; - s->initialized = false; - atomic_store(&s->marked_stale, false); - - /* Swap back to saved thread state (may be NULL if we didn't have one) */ - PyThreadState_Swap(saved); - - /* Reinitialize the slot with a fresh subinterpreter */ - if (subinterp_pool_reinit_slot(slot) < 0) { - /* Reinit failed - clear allocation bit so slot is skipped */ - uint64_t mask = ~(1ULL << slot); - atomic_fetch_and(&g_pool_allocation, mask); - fprintf(stderr, "subinterp_pool_release: failed to reinit slot %d\n", slot); - } - /* If reinit succeeded, slot stays allocated but now has fresh interp */ - - /* Release the GIL */ - PyGILState_Release(gstate); - -#ifdef DEBUG - fprintf(stderr, "subinterp_pool_release: recycled stale slot %d\n", slot); -#endif - } -} - -uint64_t subinterp_pool_flush_generation(void) { - if (!atomic_load(&g_pool_initialized)) { - return 0; - } - - /* Increment generation */ - uint64_t new_gen = atomic_fetch_add(&g_import_generation, 1) + 1; - - /* Mark all initialized slots as stale */ - for (int i = 0; i < g_pool_size; i++) { - subinterp_slot_t *slot = &g_subinterp_pool[i]; - if (slot->initialized) { - atomic_store(&slot->marked_stale, true); - } - } - -#ifdef DEBUG - fprintf(stderr, "subinterp_pool_flush_generation: incremented to %llu, marked all slots stale\n", - (unsigned long long)new_gen); -#endif - - return new_gen; -} - -uint64_t subinterp_pool_get_generation(void) { - return atomic_load(&g_import_generation); -} - #endif /* HAVE_SUBINTERPRETERS */ diff --git a/c_src/py_subinterp_pool.h b/c_src/py_subinterp_pool.h index bd00a38..35ee917 100644 --- a/c_src/py_subinterp_pool.h +++ b/c_src/py_subinterp_pool.h @@ -84,15 +84,6 @@ typedef struct { /** @brief Whether this slot is initialized and ready for use */ bool initialized; - - /** @brief Number of contexts currently using this slot */ - _Atomic int usage_count; - - /** @brief Marked for destruction when usage_count reaches 0 */ - _Atomic bool marked_stale; - - /** @brief Import generation when slot was created */ - uint64_t generation; } subinterp_slot_t; /** @@ -172,43 +163,6 @@ void subinterp_pool_shutdown(void); */ bool subinterp_pool_is_initialized(void); -/** - * @brief Increment usage count for a slot - * - * Called when a context starts using a pool slot. - * - * @param slot Slot index - */ -void subinterp_pool_acquire(int slot); - -/** - * @brief Decrement usage count for a slot - * - * Called when a context stops using a pool slot. - * If the slot is marked stale and usage_count reaches 0, - * the subinterpreter is destroyed and the slot is cleared. - * - * @param slot Slot index - */ -void subinterp_pool_release(int slot); - -/** - * @brief Increment import generation and mark all pool slots stale - * - * Called by flush_imports to trigger subinterpreter replacement. - * Stale slots will be destroyed when their usage_count reaches 0. - * - * @return The new generation number - */ -uint64_t subinterp_pool_flush_generation(void); - -/** - * @brief Get the current import generation - * - * @return Current generation number - */ -uint64_t subinterp_pool_get_generation(void); - #endif /* HAVE_SUBINTERPRETERS */ #endif /* PY_SUBINTERP_POOL_H */ From e30edfcb226edecf119d62b847b6a2cf12e2964f Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 08:02:08 +0100 Subject: [PATCH 11/18] Remove py_import:import_stats/0 --- src/py_import.erl | 19 ------------------- test/py_import_SUITE.erl | 36 ++---------------------------------- 2 files changed, 2 insertions(+), 53 deletions(-) diff --git a/src/py_import.erl b/src/py_import.erl index 8fcaaf2..f1d56a0 100644 --- a/src/py_import.erl +++ b/src/py_import.erl @@ -43,7 +43,6 @@ is_imported/2, all_imports/0, clear_imports/0, - import_stats/0, import_list/0, %% Path registry add_path/1, @@ -229,24 +228,6 @@ clear_imports() -> end, ok. -%% @doc Get import registry statistics. -%% -%% Returns a map with the count of registered imports. -%% -%% Example: -%% ``` -%% {ok, #{count => 5}} = py_import:import_stats(). -%% ''' -%% -%% @returns {ok, Stats} where Stats is a map with registry metrics --spec import_stats() -> {ok, map()} | {error, term()}. -import_stats() -> - Count = case ets:info(?IMPORT_REGISTRY) of - undefined -> 0; - _ -> ets:info(?IMPORT_REGISTRY, size) - end, - {ok, #{count => Count}}. - %% @doc List all registered imports. %% %% Returns a map of modules to their registered functions. diff --git a/test/py_import_SUITE.erl b/test/py_import_SUITE.erl index 3d9a2f2..9f7a581 100644 --- a/test/py_import_SUITE.erl +++ b/test/py_import_SUITE.erl @@ -22,7 +22,6 @@ import_nonexistent_module_test/1, import_nonexistent_function_test/1, import_idempotent_test/1, - import_stats_test/1, import_list_test/1, import_speeds_up_calls_test/1, import_multiprocess_test/1, @@ -55,7 +54,6 @@ groups() -> import_nonexistent_module_test, import_nonexistent_function_test, import_idempotent_test, - import_stats_test, import_list_test, import_speeds_up_calls_test, import_multiprocess_test, @@ -186,34 +184,6 @@ import_idempotent_test(_Config) -> %% Still works {ok, _} = py:call(json, dumps, [[1]]). -%% @doc Test import stats -import_stats_test(_Config) -> - %% Start fresh - ok = py_import:clear_imports(), - - %% Check empty stats - {ok, Stats0} = py_import:import_stats(), - ?assertEqual(0, maps:get(count, Stats0, 0)), - - %% Import some modules - ok = py_import:ensure_imported(json), - ok = py_import:ensure_imported(math), - ok = py_import:ensure_imported(os), - - %% Check stats - {ok, Stats1} = py_import:import_stats(), - Count1 = maps:get(count, Stats1, 0), - ?assertEqual(3, Count1), - - %% Import functions - ok = py_import:ensure_imported(json, dumps), - ok = py_import:ensure_imported(json, loads), - - %% Check updated stats (3 modules + 2 functions = 5) - {ok, Stats2} = py_import:import_stats(), - Count2 = maps:get(count, Stats2, 0), - ?assertEqual(5, Count2). - %% @doc Test listing imports import_list_test(_Config) -> %% Start fresh @@ -323,11 +293,10 @@ import_multiprocess_test(_Config) -> ?assertEqual([done, done, done], Results), %% Now verify the GLOBAL registry has all entries - {ok, Stats} = py_import:import_stats(), {ok, List} = py_import:import_list(), %% Total entries: json, json.dumps, math, math.sqrt, math.floor, os, os.getcwd, string = 8 - ?assertEqual(8, maps:get(count, Stats)), + ?assertEqual(8, length(py_import:all_imports())), %% Verify all modules are in the shared registry ?assert(maps:is_key(<<"json">>, List)), @@ -383,8 +352,7 @@ import_concurrent_stress_test(_Config) -> end, Results), %% Verify the global registry has all modules - {ok, Stats} = py_import:import_stats(), - Count = maps:get(count, Stats), + Count = length(py_import:all_imports()), %% Should have all 10 modules (some may have been imported multiple times but ETS dedupes) ?assertEqual(10, Count), From 3b81fffe71b3f1996338c02f03768c45d8a53176 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 08:12:01 +0100 Subject: [PATCH 12/18] Add test for py_import:add_path/1 --- test/py_import_SUITE.erl | 60 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/test/py_import_SUITE.erl b/test/py_import_SUITE.erl index 9f7a581..ded1793 100644 --- a/test/py_import_SUITE.erl +++ b/test/py_import_SUITE.erl @@ -40,7 +40,9 @@ %% sys.modules verification tests import_in_sys_modules_test/1, registry_import_in_sys_modules_test/1, - context_import_in_sys_modules_test/1 + context_import_in_sys_modules_test/1, + %% Path registry tests + add_path_test/1 ]). all() -> @@ -72,7 +74,9 @@ groups() -> %% sys.modules verification tests import_in_sys_modules_test, registry_import_in_sys_modules_test, - context_import_in_sys_modules_test + context_import_in_sys_modules_test, + %% Path registry tests + add_path_test ]}]. init_per_suite(Config) -> @@ -705,3 +709,55 @@ _sys_keys = list(sys.modules.keys()) py_context:destroy(Ctx), ct:pal("Context imports correctly populate sys.modules"). + +%% ============================================================================ +%% Path Registry Tests +%% ============================================================================ + +%% @doc Test that add_path registers a path and makes modules importable +%% +%% This test creates a custom module in priv_dir, adds its path via add_path, +%% then verifies the module can be imported and called. +add_path_test(Config) -> + %% Clear any existing paths + ok = py_import:clear_paths(), + + %% Create test module in priv_dir (guaranteed writable during tests) + PrivDir = ?config(priv_dir, Config), + ModuleDir = filename:join(PrivDir, "custom_modules"), + ok = filelib:ensure_dir(filename:join(ModuleDir, "dummy")), + + %% Write a simple Python module + ModulePath = filename:join(ModuleDir, "sample_module.py"), + ModuleContent = <<"def greet(name):\n" + " return f\"Hello, {name}!\"\n" + "\n" + "def add(a, b):\n" + " return a + b\n" + "\n" + "VERSION = \"1.0.0\"\n">>, + ok = file:write_file(ModulePath, ModuleContent), + + %% Add path + ok = py_import:add_path(ModuleDir), + + %% Verify path is registered + ?assert(py_import:is_path_added(ModuleDir)), + Paths = py_import:all_paths(), + ?assertEqual(1, length(Paths)), + + %% Create a new context to apply paths + {ok, Ctx} = py_context:new(#{mode => auto}), + + %% Import and call the sample module + {ok, Greeting} = py_context:call(Ctx, sample_module, greet, [<<"World">>], #{}), + ?assertEqual(<<"Hello, World!">>, Greeting), + + {ok, Sum} = py_context:call(Ctx, sample_module, add, [2, 3], #{}), + ?assertEqual(5, Sum), + + %% Clean up + py_context:destroy(Ctx), + ok = py_import:clear_paths(), + + ct:pal("add_path successfully registers paths and enables module imports"). From d42b9039a3b45abbe5fea90ab03a3102d80cb418 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 08:13:55 +0100 Subject: [PATCH 13/18] Guard socket operations against closed file descriptors --- priv/_erlang_impl/_transport.py | 59 +++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/priv/_erlang_impl/_transport.py b/priv/_erlang_impl/_transport.py index 4b53c65..8f61000 100644 --- a/priv/_erlang_impl/_transport.py +++ b/priv/_erlang_impl/_transport.py @@ -80,12 +80,23 @@ def _read_ready(self): if self._conn_lost: return + # Guard against closed socket (fd == -1) + if self._sock.fileno() == -1: + return + for _ in range(self._max_reads_per_call): try: data = self._sock.recv(self.max_size) except (BlockingIOError, InterruptedError): # EAGAIN - no more data available return + except OSError as exc: + # Handle bad file descriptor (socket closed) + if exc.errno == errno.EBADF: + self._conn_lost += 1 + return + self._fatal_error(exc, 'Fatal read error') + return except Exception as exc: self._fatal_error(exc, 'Fatal read error') return @@ -140,6 +151,11 @@ def _write_ready_cb(self): Drains buffer until EAGAIN with a budget to avoid starvation. """ + # Guard against closed socket (fd == -1) + if self._sock.fileno() == -1: + self._conn_lost += 1 + return + for _ in range(self._max_writes_per_call): remaining = len(self._buffer) - self._buffer_offset if remaining <= 0: @@ -155,6 +171,14 @@ def _write_ready_cb(self): except (BlockingIOError, InterruptedError): # EAGAIN - socket buffer full return + except OSError as exc: + # Handle bad file descriptor (socket closed) + if exc.errno == errno.EBADF: + self._conn_lost += 1 + return + self._loop.remove_writer(self._fileno) + self._fatal_error(exc, 'Fatal write error') + return except Exception as exc: self._loop.remove_writer(self._fileno) self._fatal_error(exc, 'Fatal write error') @@ -198,10 +222,17 @@ def close(self): def _call_connection_lost(self, exc): """Call protocol.connection_lost().""" + # Guard against double cleanup + if self._conn_lost > 1: + return + self._conn_lost += 1 try: self._protocol.connection_lost(exc) finally: - self._sock.close() + try: + self._sock.close() + except OSError: + pass def _fatal_error(self, exc, message='Fatal error'): """Handle fatal errors.""" @@ -310,6 +341,10 @@ def _read_ready(self): if self._conn_lost: return + # Guard against closed socket (fd == -1) + if self._sock.fileno() == -1: + return + for _ in range(self._max_reads_per_call): try: data, addr = self._sock.recvfrom(self.max_size) @@ -317,6 +352,10 @@ def _read_ready(self): # EAGAIN - no more data available return except OSError as exc: + # Handle bad file descriptor (socket closed) + if exc.errno == errno.EBADF: + self._conn_lost += 1 + return self._protocol.error_received(exc) return except Exception as exc: @@ -362,6 +401,11 @@ def sendto(self, data, addr=None): def _write_ready(self): """Called when socket is ready for writing.""" + # Guard against closed socket (fd == -1) + if self._sock.fileno() == -1: + self._conn_lost += 1 + return + while self._buffer: data, addr = self._buffer[0] try: @@ -375,6 +419,10 @@ def _write_ready(self): except (BlockingIOError, InterruptedError): return except OSError as exc: + # Handle bad file descriptor (socket closed) + if exc.errno == errno.EBADF: + self._conn_lost += 1 + return self._buffer.popleft() self._protocol.error_received(exc) return @@ -400,10 +448,17 @@ def close(self): def _call_connection_lost(self, exc): """Call protocol.connection_lost().""" + # Guard against double cleanup + if self._conn_lost > 1: + return + self._conn_lost += 1 try: self._protocol.connection_lost(exc) finally: - self._sock.close() + try: + self._sock.close() + except OSError: + pass def _fatal_error(self, exc, message='Fatal error on datagram transport'): """Handle fatal errors.""" From a2d50bb2e8c55f3a6c32afd00313b6fe43061a2e Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 08:59:25 +0100 Subject: [PATCH 14/18] Fix context routing and avoid _decimal in tests - Auto-bind context on first access to ensure same process uses same context - Check if bound context is alive to handle router restart - Replace statistics with json in test (avoids _decimal subinterpreter issues) --- src/py_context_router.erl | 16 ++++++++++++++-- test/py_import_SUITE.erl | 18 +++++++++++------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/py_context_router.erl b/src/py_context_router.erl index 6335d40..2cd1910 100644 --- a/src/py_context_router.erl +++ b/src/py_context_router.erl @@ -184,11 +184,23 @@ get_context(N) when is_integer(N), N > 0 -> persistent_term:get(?POOL_CONTEXT_KEY(?DEFAULT_POOL, N)); get_context(Pool) when is_atom(Pool) -> %% Get context from named pool + %% Auto-bind on first access to ensure consistent context for a process case get(?BOUND_CONTEXT_KEY(Pool)) of undefined -> - select_by_scheduler(Pool); + Ctx = select_by_scheduler(Pool), + put(?BOUND_CONTEXT_KEY(Pool), Ctx), + Ctx; Ctx -> - Ctx + %% Verify bound context is still alive + case is_process_alive(Ctx) of + true -> + Ctx; + false -> + %% Re-bind to a new context + NewCtx = select_by_scheduler(Pool), + put(?BOUND_CONTEXT_KEY(Pool), NewCtx), + NewCtx + end end. %% @doc Bind a context to the current process for the default pool. diff --git a/test/py_import_SUITE.erl b/test/py_import_SUITE.erl index ded1793..0342aab 100644 --- a/test/py_import_SUITE.erl +++ b/test/py_import_SUITE.erl @@ -639,40 +639,44 @@ _test_textwrap_in_sys = 'textwrap' in sys.modules %% %% The ETS registry tracks what should be imported, and sys.modules %% contains the actual imported modules. +%% +%% Note: Avoid modules that import C extensions with global state issues +%% (e.g., statistics imports _decimal). Use json which has proper +%% subinterpreter support. See https://github.com/python/cpython/issues/106078 registry_import_in_sys_modules_test(_Config) -> %% Clear registry ok = py_import:clear_imports(), %% Add to registry and import ok = py_import:ensure_imported(fractions), - ok = py_import:ensure_imported(statistics), + ok = py_import:ensure_imported(json), %% Verify ETS registry has the entries Registry = py_import:all_imports(), ?assert(lists:member({<<"fractions">>, all}, Registry)), - ?assert(lists:member({<<"statistics">>, all}, Registry)), + ?assert(lists:member({<<"json">>, all}, Registry)), %% Use the modules to ensure they're imported {ok, _} = py:call(fractions, 'Fraction', [1, 3]), - {ok, _} = py:call(statistics, mean, [[1, 2, 3, 4, 5]]), + {ok, _} = py:call(json, dumps, [[1, 2, 3]]), %% Verify both are in sys.modules by checking from Python ok = py:exec(<<" import sys _fractions_in_sys = 'fractions' in sys.modules -_statistics_in_sys = 'statistics' in sys.modules +_json_in_sys = 'json' in sys.modules _sys_modules_keys = list(sys.modules.keys()) ">>), {ok, FractionsInSys} = py:eval(<<"_fractions_in_sys">>), - {ok, StatsInSys} = py:eval(<<"_statistics_in_sys">>), + {ok, JsonInSys} = py:eval(<<"_json_in_sys">>), ?assertEqual(true, FractionsInSys), - ?assertEqual(true, StatsInSys), + ?assertEqual(true, JsonInSys), %% Get the list of modules in sys.modules that match our registry {ok, SysModulesList} = py:eval(<<"_sys_modules_keys">>), ?assert(lists:member(<<"fractions">>, SysModulesList)), - ?assert(lists:member(<<"statistics">>, SysModulesList)), + ?assert(lists:member(<<"json">>, SysModulesList)), ct:pal("ETS registry and sys.modules are in sync"). From ba5f4feb623a77dca7c7f9fda2ead131baccb705 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 09:44:27 +0100 Subject: [PATCH 15/18] Require Python 3.14+ for OWN_GIL mode OWN_GIL mode is disabled on Python < 3.14 due to C extension global state bugs (e.g., _decimal). See https://github.com/python/cpython/issues/106078 Changes: - Add HAVE_OWNGIL compile flag (Python 3.14+) - Add py_nif:owngil_supported/0 NIF - Update py_context and py_event_loop_pool to check owngil_supported - Update OWN_GIL test suites to skip on Python < 3.14 - Add Python 3.14 to CI matrix and ASAN tests --- .github/workflows/ci.yml | 8 +++++++- c_src/CMakeLists.txt | 14 ++++++++++++++ c_src/py_nif.c | 18 ++++++++++++++++++ src/py_context.erl | 6 +++--- src/py_event_loop_pool.erl | 5 +++-- src/py_nif.erl | 8 ++++++++ test/py_context_owngil_SUITE.erl | 4 ++-- test/py_import_SUITE.erl | 6 +++--- test/py_import_owngil_SUITE.erl | 6 +++--- test/py_owngil_features_SUITE.erl | 4 ++-- 10 files changed, 63 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a42752c..9b7206a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,6 +22,9 @@ jobs: - os: ubuntu-24.04 otp: "27.0" python: "3.13" + - os: ubuntu-24.04 + otp: "27.0" + python: "3.14" # macOS - os: macos-15 otp: "27" @@ -29,6 +32,9 @@ jobs: - os: macos-15 otp: "27" python: "3.13" + - os: macos-15 + otp: "27" + python: "3.14" steps: - name: Checkout @@ -194,7 +200,7 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.12", "3.13"] + python: ["3.12", "3.13", "3.14"] steps: - name: Checkout diff --git a/c_src/CMakeLists.txt b/c_src/CMakeLists.txt index c110207..48ac631 100644 --- a/c_src/CMakeLists.txt +++ b/c_src/CMakeLists.txt @@ -159,12 +159,23 @@ int main(void) { if(HAVE_SUBINTERPRETERS) message(STATUS "Subinterpreter API detected (PyInterpreterConfig_OWN_GIL available)") + # OWN_GIL mode requires Python 3.14+ due to C extension global state bugs + # in 3.12/3.13 (e.g., _decimal). See https://github.com/python/cpython/issues/106078 + if(Python3_VERSION VERSION_GREATER_EQUAL "3.14") + set(HAVE_OWNGIL TRUE) + message(STATUS "Python ${Python3_VERSION} >= 3.14, OWN_GIL mode enabled") + else() + set(HAVE_OWNGIL FALSE) + message(STATUS "Python ${Python3_VERSION} < 3.14, OWN_GIL mode disabled (C extension bugs)") + endif() else() message(STATUS "Subinterpreter API compile test failed, using shared GIL fallback") + set(HAVE_OWNGIL FALSE) endif() else() message(STATUS "Python ${Python3_VERSION} < 3.12, subinterpreter API not available") set(HAVE_SUBINTERPRETERS FALSE) + set(HAVE_OWNGIL FALSE) endif() # Check for free-threaded Python (Python 3.13+ with --disable-gil / nogil build) @@ -201,6 +212,9 @@ endif() if(HAVE_SUBINTERPRETERS) target_compile_definitions(py_nif PRIVATE HAVE_SUBINTERPRETERS=1) endif() +if(HAVE_OWNGIL) + target_compile_definitions(py_nif PRIVATE HAVE_OWNGIL=1) +endif() if(HAVE_FREE_THREADED) target_compile_definitions(py_nif PRIVATE HAVE_FREE_THREADED=1) endif() diff --git a/c_src/py_nif.c b/c_src/py_nif.c index b33c643..07e4c95 100644 --- a/c_src/py_nif.c +++ b/c_src/py_nif.c @@ -1970,6 +1970,23 @@ static ERL_NIF_TERM nif_subinterp_supported(ErlNifEnv *env, int argc, const ERL_ #endif } +/** + * @brief Check if OWN_GIL mode is supported (Python 3.14+) + * + * OWN_GIL requires Python 3.14+ due to C extension global state bugs + * in earlier versions (e.g., _decimal). See gh-106078. + */ +static ERL_NIF_TERM nif_owngil_supported(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + (void)argc; + (void)argv; + +#ifdef HAVE_OWNGIL + return ATOM_TRUE; +#else + return ATOM_FALSE; +#endif +} + #ifdef HAVE_SUBINTERPRETERS static ERL_NIF_TERM nif_subinterp_worker_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { @@ -7130,6 +7147,7 @@ static ErlNifFunc nif_funcs[] = { /* Sub-interpreter support (shared GIL pool model) */ {"subinterp_supported", 0, nif_subinterp_supported, 0}, + {"owngil_supported", 0, nif_owngil_supported, 0}, {"subinterp_worker_new", 0, nif_subinterp_worker_new, 0}, {"subinterp_worker_destroy", 1, nif_subinterp_worker_destroy, 0}, {"subinterp_call", 5, nif_subinterp_call, ERL_NIF_DIRTY_JOB_CPU_BOUND}, diff --git a/src/py_context.erl b/src/py_context.erl index 3f41e5c..335fed4 100644 --- a/src/py_context.erl +++ b/src/py_context.erl @@ -531,10 +531,10 @@ create_context(subinterp) -> create_context(worker) -> py_nif:context_create(worker); create_context(owngil) -> - %% OWN_GIL mode requires Python 3.12+ - case py_nif:subinterp_supported() of + %% OWN_GIL mode requires Python 3.14+ due to C extension bugs in earlier versions + case py_nif:owngil_supported() of true -> py_nif:context_create(owngil); - false -> {error, owngil_requires_python312} + false -> {error, owngil_requires_python314} end. %% @private diff --git a/src/py_event_loop_pool.erl b/src/py_event_loop_pool.erl index e9a5996..339203e 100644 --- a/src/py_event_loop_pool.erl +++ b/src/py_event_loop_pool.erl @@ -370,8 +370,9 @@ run_async(Request) -> init([NumLoops]) -> process_flag(trap_exit, true), - %% Check if OWN_GIL mode is enabled - OwnGilEnabled = application:get_env(erlang_python, event_loop_pool_owngil, false), + %% Check if OWN_GIL mode is enabled and supported (Python 3.14+) + OwnGilConfigured = application:get_env(erlang_python, event_loop_pool_owngil, false), + OwnGilEnabled = OwnGilConfigured andalso py_nif:owngil_supported(), %% Initialize OWN_GIL infrastructure if enabled {Sessions, OwnGilReady} = case OwnGilEnabled of diff --git a/src/py_nif.erl b/src/py_nif.erl index 1542685..dd12c28 100644 --- a/src/py_nif.erl +++ b/src/py_nif.erl @@ -54,6 +54,7 @@ async_stream/6, %% Sub-interpreters (Python 3.12+) - shared GIL pool model subinterp_supported/0, + owngil_supported/0, subinterp_worker_new/0, subinterp_worker_destroy/1, subinterp_call/5, @@ -490,6 +491,13 @@ async_stream(_WorkerRef, _Module, _Func, _Args, _Kwargs, _CallerPid) -> subinterp_supported() -> ?NIF_STUB. +%% @doc Check if OWN_GIL mode is supported (Python 3.14+). +%% OWN_GIL requires Python 3.14+ due to C extension global state bugs +%% in earlier versions (e.g., _decimal). See gh-106078. +-spec owngil_supported() -> boolean(). +owngil_supported() -> + ?NIF_STUB. + %% @doc Create a new sub-interpreter worker with its own GIL. %% Returns an opaque reference to be used with subinterp functions. -spec subinterp_worker_new() -> {ok, reference()} | {error, term()}. diff --git a/test/py_context_owngil_SUITE.erl b/test/py_context_owngil_SUITE.erl index 918ba09..66f7c2d 100644 --- a/test/py_context_owngil_SUITE.erl +++ b/test/py_context_owngil_SUITE.erl @@ -96,12 +96,12 @@ groups() -> ]}]. init_per_suite(Config) -> - case py_nif:subinterp_supported() of + case py_nif:owngil_supported() of true -> {ok, _} = application:ensure_all_started(erlang_python), Config; false -> - {skip, "Requires Python 3.12+"} + {skip, "OWN_GIL requires Python 3.14+"} end. end_per_suite(_Config) -> diff --git a/test/py_import_SUITE.erl b/test/py_import_SUITE.erl index 0342aab..8bd7575 100644 --- a/test/py_import_SUITE.erl +++ b/test/py_import_SUITE.erl @@ -540,10 +540,10 @@ def encode_test(data): %% OWN_GIL contexts each have their own interpreter, so imports in one %% should NOT be visible in another (different sys.modules). subinterp_isolation_test(_Config) -> - %% Skip if subinterpreters not supported - case py_nif:subinterp_supported() of + %% Skip if OWN_GIL not supported (requires Python 3.14+) + case py_nif:owngil_supported() of false -> - {skip, "Subinterpreters not supported"}; + {skip, "OWN_GIL requires Python 3.14+"}; true -> %% Clear registry so new contexts don't get pre-imported modules ok = py_import:clear_imports(), diff --git a/test/py_import_owngil_SUITE.erl b/test/py_import_owngil_SUITE.erl index 3186658..7bee5fc 100644 --- a/test/py_import_owngil_SUITE.erl +++ b/test/py_import_owngil_SUITE.erl @@ -68,12 +68,12 @@ init_per_suite(Config) -> %% Clear any imports from previous test suites to avoid %% importing C extensions that crash in OWN_GIL subinterpreters py_import:clear_imports(), - %% Then check if subinterpreters are supported - case py_nif:subinterp_supported() of + %% Check if OWN_GIL is supported (requires Python 3.14+) + case py_nif:owngil_supported() of true -> Config; false -> - {skip, "Requires Python 3.12+"} + {skip, "OWN_GIL requires Python 3.14+"} end. end_per_suite(_Config) -> diff --git a/test/py_owngil_features_SUITE.erl b/test/py_owngil_features_SUITE.erl index 82545b5..2a45720 100644 --- a/test/py_owngil_features_SUITE.erl +++ b/test/py_owngil_features_SUITE.erl @@ -220,7 +220,7 @@ groups() -> ]}]. init_per_suite(Config) -> - case py_nif:subinterp_supported() of + case py_nif:owngil_supported() of true -> {ok, _} = application:ensure_all_started(erlang_python), %% Add test directory to Python path @@ -228,7 +228,7 @@ init_per_suite(Config) -> TestDir = filename:join(filename:dirname(PrivDir), "test"), Config ++ [{test_dir, TestDir}]; false -> - {skip, "Requires Python 3.12+"} + {skip, "OWN_GIL requires Python 3.14+"} end. end_per_suite(_Config) -> From 89a7bed3ad6d1e15c1fff38ddd5d3066e50d2dbc Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 09:58:47 +0100 Subject: [PATCH 16/18] Use worker mode for auto context on Python < 3.14 Auto mode now uses worker mode (not subinterpreters) on Python 3.12/3.13 to avoid crashes from C extensions like _decimal that have global state bugs in subinterpreters. On Python 3.14+ where these bugs are fixed, auto mode uses subinterpreters. Users can still explicitly use subinterp mode if needed, but must be aware of the C extension limitations on older Python versions. --- src/py_context.erl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/py_context.erl b/src/py_context.erl index 335fed4..0f5c5ad 100644 --- a/src/py_context.erl +++ b/src/py_context.erl @@ -521,8 +521,12 @@ apply_registered_paths(Ref) -> end. %% @private +%% Auto mode uses subinterpreters only on Python 3.14+ where C extension +%% global state bugs (e.g., _decimal) are fixed. On Python 3.12/3.13, +%% use worker mode to avoid crashes from C extensions like _decimal. +%% Users can still explicitly use subinterp mode if needed. create_context(auto) -> - case py_nif:subinterp_supported() of + case py_nif:owngil_supported() of true -> create_context(subinterp); false -> create_context(worker) end; From 6fd9229c5b039934b150bee45052b64a595e50f3 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 10:14:16 +0100 Subject: [PATCH 17/18] Remove auto mode, use worker as default - Remove auto context mode, worker is now the default - subinterp and owngil must be explicitly requested - This avoids C extension crashes (_decimal) on Python 3.12/3.13 - subinterp requires Python 3.12+, owngil requires Python 3.14+ --- src/erlang_python_sup.erl | 2 +- src/py.erl | 5 ++--- src/py_context.erl | 27 ++++++++------------------- src/py_context_init.erl | 6 +++--- src/py_context_router.erl | 11 +++++------ src/py_context_sup.erl | 2 +- src/py_reactor_context.erl | 16 +++------------- test/py_import_SUITE.erl | 6 +++--- 8 files changed, 26 insertions(+), 49 deletions(-) diff --git a/src/erlang_python_sup.erl b/src/erlang_python_sup.erl index c0da692..3d1de97 100644 --- a/src/erlang_python_sup.erl +++ b/src/erlang_python_sup.erl @@ -34,7 +34,7 @@ start_link() -> init([]) -> NumContexts = application:get_env(erlang_python, num_contexts, erlang:system_info(schedulers)), - ContextMode = application:get_env(erlang_python, context_mode, auto), + ContextMode = application:get_env(erlang_python, context_mode, worker), NumAsyncWorkers = application:get_env(erlang_python, num_async_workers, 2), %% Default executors: 4 (benchmarked sweet spot for most workloads) diff --git a/src/py.erl b/src/py.erl index b78b2b8..3e9060e 100644 --- a/src/py.erl +++ b/src/py.erl @@ -1319,8 +1319,7 @@ clear_traces() -> %% @doc Start the process-per-context system with default settings. %% -%% Creates one context per scheduler, using auto mode (subinterp on -%% Python 3.12+, worker otherwise). +%% Creates one context per scheduler using worker mode. %% %% @returns {ok, [Context]} | {error, Reason} -spec start_contexts() -> {ok, [pid()]} | {error, term()}. @@ -1331,7 +1330,7 @@ start_contexts() -> %% %% Options: %% - `contexts' - Number of contexts to create (default: number of schedulers) -%% - `mode' - Context mode: `auto', `subinterp', or `worker' (default: `auto') +%% - `mode' - Context mode: `worker', `subinterp', or `owngil' (default: `worker') %% %% @param Opts Start options %% @returns {ok, [Context]} | {error, Reason} diff --git a/src/py_context.erl b/src/py_context.erl index 0f5c5ad..1b37e0e 100644 --- a/src/py_context.erl +++ b/src/py_context.erl @@ -62,7 +62,7 @@ %% Exported for py_reactor_context -export([extend_erlang_module_in_context/1]). --type context_mode() :: auto | subinterp | worker | owngil. +-type context_mode() :: worker | subinterp | owngil. -type context() :: pid(). -export_type([context_mode/0, context/0]). @@ -82,14 +82,12 @@ %% @doc Start a new py_context process. %% %% The process creates a Python context based on the mode: -%% - `auto' - Detect best mode (subinterp on Python 3.12+, worker otherwise) -%% - `subinterp' - Create a sub-interpreter with shared GIL (uses pool) %% - `worker' - Create a thread-state worker (main interpreter namespace) -%% - `owngil' - Create a sub-interpreter with its own GIL (true parallelism) +%% - `subinterp' - Create a sub-interpreter with shared GIL (Python 3.12+) +%% - `owngil' - Create a sub-interpreter with its own GIL (Python 3.14+) %% %% The `owngil' mode creates a dedicated pthread for each context, allowing -%% true parallel Python execution. This is useful for CPU-bound workloads. -%% Requires Python 3.12+. +%% true parallel Python execution. Requires Python 3.14+. %% %% @param Id Unique identifier for this context %% @param Mode Context mode @@ -128,13 +126,13 @@ stop(Ctx) when is_pid(Ctx) -> %% @doc Create a new context with options map. %% %% Options: -%% - `mode' - Context mode (auto | subinterp | worker | owngil), default: auto +%% - `mode' - Context mode (worker | subinterp | owngil), default: worker %% %% @param Opts Options map %% @returns {ok, Pid} | {error, Reason} -spec new(map()) -> {ok, context()} | {error, term()}. new(Opts) when is_map(Opts) -> - Mode = maps:get(mode, Opts, auto), + Mode = maps:get(mode, Opts, worker), Id = erlang:unique_integer([positive]), start_link(Id, Mode). @@ -521,19 +519,10 @@ apply_registered_paths(Ref) -> end. %% @private -%% Auto mode uses subinterpreters only on Python 3.14+ where C extension -%% global state bugs (e.g., _decimal) are fixed. On Python 3.12/3.13, -%% use worker mode to avoid crashes from C extensions like _decimal. -%% Users can still explicitly use subinterp mode if needed. -create_context(auto) -> - case py_nif:owngil_supported() of - true -> create_context(subinterp); - false -> create_context(worker) - end; -create_context(subinterp) -> - py_nif:context_create(subinterp); create_context(worker) -> py_nif:context_create(worker); +create_context(subinterp) -> + py_nif:context_create(subinterp); create_context(owngil) -> %% OWN_GIL mode requires Python 3.14+ due to C extension bugs in earlier versions case py_nif:owngil_supported() of diff --git a/src/py_context_init.erl b/src/py_context_init.erl index 6d7d5a4..7de5a7f 100644 --- a/src/py_context_init.erl +++ b/src/py_context_init.erl @@ -29,7 +29,7 @@ %%% {erlang_python, [ %%% {default_pool_size, 4}, % Number of contexts (default: schedulers) %%% {io_pool_size, 10}, % I/O pool size (default: 10) -%%% {io_pool_mode, worker} % Mode for io pool (default: auto) +%%% {io_pool_mode, worker} % Mode for io pool (default: worker) %%% ]}. %%% ''' %%% @private @@ -49,13 +49,13 @@ start_link(Opts) -> %% Start default pool DefaultSize = maps:get(contexts, Opts, erlang:system_info(schedulers)), - DefaultMode = maps:get(mode, Opts, auto), + DefaultMode = maps:get(mode, Opts, worker), case py_context_router:start_pool(default, DefaultSize, DefaultMode) of {ok, _DefaultContexts} -> %% Start I/O pool if configured IoSize = application:get_env(erlang_python, io_pool_size, 10), - IoMode = application:get_env(erlang_python, io_pool_mode, auto), + IoMode = application:get_env(erlang_python, io_pool_mode, worker), case py_context_router:start_pool(io, IoSize, IoMode) of {ok, _IoContexts} -> %% The contexts are supervised by py_context_sup diff --git a/src/py_context_router.erl b/src/py_context_router.erl index 2cd1910..f27814a 100644 --- a/src/py_context_router.erl +++ b/src/py_context_router.erl @@ -124,8 +124,7 @@ %% @doc Start the context router with default settings. %% -%% Creates one context per scheduler, using auto mode (subinterp on -%% Python 3.12+, worker otherwise). +%% Creates one context per scheduler using worker mode. %% %% @returns {ok, [Context]} | {error, Reason} -spec start() -> {ok, [pid()]} | {error, term()}. @@ -136,14 +135,14 @@ start() -> %% %% Options: %% - `contexts' - Number of contexts to create (default: number of schedulers) -%% - `mode' - Context mode: `auto', `subinterp', or `worker' (default: `auto') +%% - `mode' - Context mode: `worker', `subinterp', or `owngil' (default: `worker') %% %% @param Opts Start options %% @returns {ok, [Context]} | {error, Reason} -spec start(start_opts()) -> {ok, [pid()]} | {error, term()}. start(Opts) -> NumContexts = maps:get(contexts, Opts, erlang:system_info(schedulers)), - Mode = maps:get(mode, Opts, auto), + Mode = maps:get(mode, Opts, worker), %% Delegate to start_pool for the default pool start_pool(?DEFAULT_POOL, NumContexts, Mode). @@ -274,13 +273,13 @@ contexts(Pool) when is_atom(Pool) -> %% @returns {ok, [Context]} | {error, Reason} -spec start_pool(pool_name(), pos_integer()) -> {ok, [pid()]} | {error, term()}. start_pool(Pool, Size) -> - start_pool(Pool, Size, auto). + start_pool(Pool, Size, worker). %% @doc Start a named pool with given size and mode. %% %% @param Pool Pool name (default, io, or custom) %% @param Size Number of contexts in the pool -%% @param Mode Context mode (auto, subinterp, worker) +%% @param Mode Context mode (worker, subinterp, owngil) %% @returns {ok, [Context]} | {error, Reason} -spec start_pool(pool_name(), pos_integer(), py_context:context_mode()) -> {ok, [pid()]} | {error, term()}. diff --git a/src/py_context_sup.erl b/src/py_context_sup.erl index bea6026..97001ac 100644 --- a/src/py_context_sup.erl +++ b/src/py_context_sup.erl @@ -44,7 +44,7 @@ start_link() -> %% @doc Start a new py_context under this supervisor. %% %% @param Id Unique identifier for the context (integer or {Pool, N} tuple) -%% @param Mode Context mode (auto | subinterp | worker) +%% @param Mode Context mode (worker | subinterp | owngil) %% @returns {ok, Pid} | {error, Reason} -spec start_context(term(), py_context:context_mode()) -> {ok, pid()} | {error, term()}. diff --git a/src/py_reactor_context.erl b/src/py_reactor_context.erl index 6ed4b49..6576eee 100644 --- a/src/py_reactor_context.erl +++ b/src/py_reactor_context.erl @@ -79,7 +79,7 @@ %% @doc Start a new py_reactor_context process. %% %% @param Id Unique identifier for this context -%% @param Mode Context mode (auto, subinterp, worker) +%% @param Mode Context mode (worker, subinterp, owngil) %% @returns {ok, Pid} | {error, Reason} -spec start_link(pos_integer(), atom()) -> {ok, pid()} | {error, term()}. start_link(Id, Mode) -> @@ -95,7 +95,7 @@ start_link(Id, Mode) -> %% (useful for setting up protocol factory) %% %% @param Id Unique identifier for this context -%% @param Mode Context mode (auto, subinterp, worker) +%% @param Mode Context mode (worker, subinterp, owngil) %% @param Opts Options map %% @returns {ok, Pid} | {error, Reason} -spec start_link(pos_integer(), atom(), map()) -> {ok, pid()} | {error, term()}. @@ -180,18 +180,8 @@ handoff(Ctx, Fd, ClientInfo) when is_pid(Ctx), is_integer(Fd), is_map(ClientInfo init(Parent, Id, Mode, Opts) -> process_flag(trap_exit, true), - %% Determine mode - ActualMode = case Mode of - auto -> - case py_nif:subinterp_supported() of - true -> subinterp; - false -> worker - end; - _ -> Mode - end, - %% Create Python context - case py_nif:context_create(ActualMode) of + case py_nif:context_create(Mode) of {ok, Ref, _InterpId} -> %% Set up callback handler py_nif:context_set_callback_handler(Ref, self()), diff --git a/test/py_import_SUITE.erl b/test/py_import_SUITE.erl index 8bd7575..2aab86f 100644 --- a/test/py_import_SUITE.erl +++ b/test/py_import_SUITE.erl @@ -398,7 +398,7 @@ import_applied_to_new_context_test(_Config) -> ok = py_import:ensure_imported(json), %% Create a new context - {ok, Ctx} = py_context:new(#{mode => auto}), + {ok, Ctx} = py_context:new(#{mode => worker}), %% The json module should already be cached in the new context %% We can verify by calling a function from it @@ -689,7 +689,7 @@ context_import_in_sys_modules_test(_Config) -> ok = py_import:clear_imports(), %% Create a context - {ok, Ctx} = py_context:new(#{mode => auto}), + {ok, Ctx} = py_context:new(#{mode => worker}), %% Call a function from a module (this imports it) {ok, _} = py_context:call(Ctx, textwrap, fill, [<<"Hello world this is a test">>, 10], #{}), @@ -751,7 +751,7 @@ add_path_test(Config) -> ?assertEqual(1, length(Paths)), %% Create a new context to apply paths - {ok, Ctx} = py_context:new(#{mode => auto}), + {ok, Ctx} = py_context:new(#{mode => worker}), %% Import and call the sample module {ok, Greeting} = py_context:call(Ctx, sample_module, greet, [<<"World">>], #{}), From 11eeaac3b0a26be78e856bf7d06a718706c548ee Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Mon, 23 Mar 2026 10:27:13 +0100 Subject: [PATCH 18/18] Document subinterpreter modes and Python version requirements - Document four execution paths: SHARED_GIL (3.12+), OWN_GIL (3.14+), free-threaded (3.13+), and BEAM processes - Clarify worker mode is now the default - Explain why OWN_GIL requires Python 3.14+ (C extension global state bugs) - Note that SHARED_GIL isolation improves in Python 3.14+ - Update getting-started.md with context mode examples --- README.md | 54 ++++++++++++++++++++++++++++++++-------- docs/getting-started.md | 30 +++++++++++++++++++++- docs/owngil_internals.md | 18 ++++++++------ 3 files changed, 83 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 60b73dd..3c6d60a 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,10 @@ erlang_python embeds Python into the BEAM VM, letting you call Python functions, evaluate expressions, and stream from generators - all without blocking Erlang schedulers. -**Three paths to parallelism:** -- **Sub-interpreters** (Python 3.12+) - Each interpreter has its own GIL -- **Free-threaded Python** (3.13+) - No GIL at all +**Parallelism options:** +- **Worker mode** (default, recommended) - Works with any Python version. With free-threaded Python (3.13t+), provides true parallelism automatically +- **SHARED_GIL sub-interpreters** (Python 3.12+) - Isolated namespaces, shared GIL (isolation improves in 3.14+) +- **OWN_GIL sub-interpreters** (Python 3.14+) - Each interpreter has its own GIL, true parallelism - **BEAM processes** - Fan out work across lightweight Erlang processes Key features: @@ -313,10 +314,11 @@ Ref = py:async_call(aiohttp, get, [<<"https://api.example.com/data">>]), ## Parallel Execution with Sub-interpreters -True parallelism without GIL contention using Python 3.12+ sub-interpreters: +True parallelism without GIL contention using Python 3.14+ OWN_GIL sub-interpreters: ```erlang -%% Execute multiple calls in parallel across sub-interpreters +%% Execute multiple calls in parallel across OWN_GIL sub-interpreters +%% Requires Python 3.14+ {ok, Results} = py:parallel([ {math, factorial, [100]}, {math, factorial, [200]}, @@ -326,6 +328,8 @@ True parallelism without GIL contention using Python 3.12+ sub-interpreters: %% Each call runs in its own interpreter with its own GIL ``` +For Python 3.12/3.13, use SHARED_GIL sub-interpreters (`mode => subinterp`) for namespace isolation, but note that parallelism is limited by the shared GIL. + ## Parallel Processing with BEAM Processes Leverage Erlang's lightweight processes for massive parallelism: @@ -595,19 +599,47 @@ ok = py:clear_traces(). ## Execution Modes -The library auto-detects the best execution mode: +### Context Modes -| Mode | Python Version | Parallelism | +When creating Python contexts, you can choose the execution mode: + +| Mode | Python Version | Description | |------|----------------|-------------| -| Free-threaded | 3.13+ (nogil) | True parallel, no GIL | -| Sub-interpreter | 3.12+ | Per-interpreter GIL | -| Multi-executor | Any | GIL contention | +| `worker` | Any | Main interpreter, shared namespace (default, recommended) | +| `subinterp` | 3.12+ | SHARED_GIL sub-interpreter, isolated namespace | +| `owngil` | 3.14+ | OWN_GIL sub-interpreter, true parallelism | + +```erlang +%% Default: worker mode (recommended) +%% With free-threaded Python (3.13t+), provides true parallelism automatically +{ok, Ctx} = py_context:new(#{}). + +%% Explicit subinterpreter with shared GIL (Python 3.12+) +%% Provides namespace isolation but no parallelism +{ok, Ctx} = py_context:new(#{mode => subinterp}). + +%% OWN_GIL mode for true parallelism (Python 3.14+ required) +%% Each context runs in its own pthread with independent GIL +{ok, Ctx} = py_context:new(#{mode => owngil}). +``` + +**Worker mode is recommended** because it works with any Python version and automatically benefits from free-threaded Python (3.13t+) when available. + +**Why OWN_GIL requires Python 3.14+**: Some C extensions (e.g., `_decimal`, `numpy`) have global state bugs in sub-interpreters on Python 3.12/3.13. These are fixed in Python 3.14. SHARED_GIL mode works on 3.12+ but with caveats for C extensions with global state. -Check current mode: +### Runtime Detection + +Check the current execution mode: ```erlang py:execution_mode(). %% => free_threaded | subinterp | multi_executor ``` +| Mode | Python Version | Parallelism | +|------|----------------|-------------| +| Free-threaded | 3.13+ (nogil) | True parallel, no GIL | +| Sub-interpreter | 3.12+ | Per-interpreter GIL | +| Multi-executor | Any | GIL contention | + ## Error Handling ```erlang diff --git a/docs/getting-started.md b/docs/getting-started.md index e950178..2b76751 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -262,7 +262,35 @@ ok = py:activate_venv(<<"/path/to/venv">>). ok = py:deactivate_venv(). ``` -## Execution Mode and Scalability +## Execution Modes and Context Types + +### Context Modes + +When creating explicit contexts, you can choose different execution modes: + +```erlang +%% Worker mode (default, recommended) - main interpreter +%% With free-threaded Python (3.13t+), provides true parallelism automatically +{ok, Ctx} = py_context:new(#{mode => worker}). + +%% SHARED_GIL sub-interpreter (Python 3.12+) - isolated namespace +{ok, Ctx} = py_context:new(#{mode => subinterp}). + +%% OWN_GIL sub-interpreter (Python 3.14+) - true parallelism +{ok, Ctx} = py_context:new(#{mode => owngil}). +``` + +| Mode | Python | Description | +|------|--------|-------------| +| `worker` | Any | Main interpreter, shared namespace (default, recommended) | +| `subinterp` | 3.12+ | SHARED_GIL sub-interpreter, isolated namespace | +| `owngil` | 3.14+ | OWN_GIL sub-interpreter, each has own GIL | + +**Worker mode is recommended** because it works with any Python version and automatically benefits from free-threaded Python (3.13t+) when available. + +**Why OWN_GIL requires Python 3.14+**: C extensions like `_decimal`, `numpy` have global state bugs in sub-interpreters on Python 3.12/3.13. These are fixed in Python 3.14. SHARED_GIL mode works on 3.12+ but some C extensions may have issues. + +### Runtime Detection Check the current execution mode: diff --git a/docs/owngil_internals.md b/docs/owngil_internals.md index d6599a1..2819c4a 100644 --- a/docs/owngil_internals.md +++ b/docs/owngil_internals.md @@ -2,12 +2,14 @@ ## Overview -OWN_GIL mode provides true parallel Python execution using Python 3.12+ per-interpreter GIL (`PyInterpreterConfig_OWN_GIL`). Each OWN_GIL context runs in a dedicated pthread with its own subinterpreter and GIL. +OWN_GIL mode provides true parallel Python execution using Python 3.14+ per-interpreter GIL (`PyInterpreterConfig_OWN_GIL`). Each OWN_GIL context runs in a dedicated pthread with its own subinterpreter and GIL. + +**Note**: OWN_GIL requires Python 3.14+ due to C extension global state bugs in earlier versions (e.g., `_decimal`, `numpy`). For Python 3.12/3.13, use SHARED_GIL sub-interpreters (`mode => subinterp`) which provide namespace isolation but share the GIL. ## Quick Start ```erlang -%% Create an OWN_GIL context (requires Python 3.12+) +%% Create an OWN_GIL context (requires Python 3.14+) {ok, Ctx} = py_context:start_link(1, owngil), %% Basic operations work the same as other modes @@ -79,11 +81,13 @@ All major erlang_python features work with OWN_GIL mode: ## Comparison with Other Modes -| Mode | Thread Model | GIL | Parallelism | -|------|-------------|-----|-------------| -| `worker` | Dirty scheduler | Main interpreter GIL | None | -| `subinterp` | Dirty scheduler | Shared GIL | None (isolated namespaces) | -| `owngil` | Dedicated pthread | Per-interpreter GIL | True parallel | +| Mode | Python Version | Thread Model | GIL | Parallelism | +|------|----------------|--------------|-----|-------------| +| `worker` | Any | Dirty scheduler | Main interpreter GIL | None | +| `subinterp` | 3.12+ | Dirty scheduler | Shared GIL | None (isolated namespaces) | +| `owngil` | 3.14+ | Dedicated pthread | Per-interpreter GIL | True parallel | + +**Why version requirements differ**: The `subinterp` mode (SHARED_GIL) works on Python 3.12+ for namespace isolation. However, `owngil` mode requires Python 3.14+ because C extensions like `_decimal`, `numpy` have global state that crashes in OWN_GIL sub-interpreters on earlier versions. Python 3.14 includes fixes for these issues (see [cpython#106078](https://github.com/python/cpython/issues/106078)). ## Key Data Structures