Skip to content

Commit 84325aa

Browse files
committed
Add lazy module import caching API
Adds explicit import caching for Python modules and functions: API: - py:import/1 - Import and cache a module - py:import/2 - Import module and cache a specific function - py:flush_imports/0 - Clear all import caches - py:import_stats/0 - Get cache entry count - py:import_list/0 - List cached imports as #{Module => [Funcs]} Key behaviors: - __main__ is never cached (returns error) - Caching is per-process namespace (via PID affinity) - All Python execution commands benefit from the cache Tests: - py_import_SUITE: 12 tests for basic import functionality - py_import_owngil_SUITE: 9 tests for OWN_GIL mode (Python 3.12+) - Multi-process and concurrent stress tests included
1 parent 1dc888e commit 84325aa

File tree

8 files changed

+1541
-1
lines changed

8 files changed

+1541
-1
lines changed

c_src/py_event_loop.c

Lines changed: 497 additions & 0 deletions
Large diffs are not rendered by default.

c_src/py_event_loop.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,62 @@ ERL_NIF_TERM nif_process_ready_tasks(ErlNifEnv *env, int argc,
690690
ERL_NIF_TERM nif_event_loop_set_py_loop(ErlNifEnv *env, int argc,
691691
const ERL_NIF_TERM argv[]);
692692

693+
/* ============================================================================
694+
* Module Import Caching
695+
* ============================================================================ */
696+
697+
/**
698+
* @brief Import and cache a module in the event loop's interpreter
699+
*
700+
* Pre-imports the module and caches it for faster subsequent calls.
701+
* The __main__ module is never cached (returns error).
702+
*
703+
* NIF: loop_import_module(LoopRef, Module) -> ok | {error, Reason}
704+
*/
705+
ERL_NIF_TERM nif_loop_import_module(ErlNifEnv *env, int argc,
706+
const ERL_NIF_TERM argv[]);
707+
708+
/**
709+
* @brief Import a module and cache a specific function
710+
*
711+
* Pre-imports the module and caches the function reference.
712+
* The __main__ module is never cached (returns error).
713+
*
714+
* NIF: loop_import_function(LoopRef, Module, Func) -> ok | {error, Reason}
715+
*/
716+
ERL_NIF_TERM nif_loop_import_function(ErlNifEnv *env, int argc,
717+
const ERL_NIF_TERM argv[]);
718+
719+
/**
720+
* @brief Flush the import cache for an event loop's interpreter
721+
*
722+
* Clears the module/function cache for all namespaces in this loop.
723+
*
724+
* NIF: loop_flush_import_cache(LoopRef) -> ok
725+
*/
726+
ERL_NIF_TERM nif_loop_flush_import_cache(ErlNifEnv *env, int argc,
727+
const ERL_NIF_TERM argv[]);
728+
729+
/**
730+
* @brief Get import cache statistics for the calling process's namespace
731+
*
732+
* Returns a map with count of cached entries.
733+
*
734+
* NIF: loop_import_stats(LoopRef) -> {ok, #{count => N}} | {error, Reason}
735+
*/
736+
ERL_NIF_TERM nif_loop_import_stats(ErlNifEnv *env, int argc,
737+
const ERL_NIF_TERM argv[]);
738+
739+
/**
740+
* @brief List all cached imports in the calling process's namespace
741+
*
742+
* Returns a list of binary strings with cached module and function names.
743+
*
744+
* NIF: loop_import_list(LoopRef) -> {ok, [binary()]} | {error, Reason}
745+
*/
746+
ERL_NIF_TERM nif_loop_import_list(ErlNifEnv *env, int argc,
747+
const ERL_NIF_TERM argv[]);
748+
693749
/* ============================================================================
694750
* Internal Helper Functions
695751
* ============================================================================ */

c_src/py_nif.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6764,6 +6764,12 @@ static ErlNifFunc nif_funcs[] = {
67646764
/* Per-process namespace NIFs */
67656765
{"event_loop_exec", 2, nif_event_loop_exec, ERL_NIF_DIRTY_JOB_IO_BOUND},
67666766
{"event_loop_eval", 2, nif_event_loop_eval, ERL_NIF_DIRTY_JOB_IO_BOUND},
6767+
/* Module import caching NIFs */
6768+
{"loop_import_module", 2, nif_loop_import_module, ERL_NIF_DIRTY_JOB_IO_BOUND},
6769+
{"loop_import_function", 3, nif_loop_import_function, ERL_NIF_DIRTY_JOB_IO_BOUND},
6770+
{"loop_flush_import_cache", 1, nif_loop_flush_import_cache, 0},
6771+
{"loop_import_stats", 1, nif_loop_import_stats, 0},
6772+
{"loop_import_list", 1, nif_loop_import_list, 0},
67676773
{"add_reader", 3, nif_add_reader, 0},
67686774
{"remove_reader", 2, nif_remove_reader, 0},
67696775
{"add_writer", 3, nif_add_writer, 0},

src/py.erl

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@
5656
stream/4,
5757
stream_eval/1,
5858
stream_eval/2,
59+
%% Module import caching
60+
import/1,
61+
import/2,
62+
flush_imports/0,
63+
import_stats/0,
64+
import_list/0,
5965
version/0,
6066
memory_stats/0,
6167
gc/0,
@@ -327,6 +333,93 @@ exec(Ctx, Code) when is_pid(Ctx) ->
327333
EnvRef = get_local_env(Ctx),
328334
py_context:exec(Ctx, Code, EnvRef).
329335

336+
%%% ============================================================================
337+
%%% Module Import Caching
338+
%%% ============================================================================
339+
340+
%% @doc Import and cache a module in the current interpreter.
341+
%%
342+
%% The module is imported in the interpreter handling this process (via affinity).
343+
%% The `__main__' module is never cached in the interpreter cache.
344+
%%
345+
%% This is useful for pre-warming imports before making calls, ensuring the
346+
%% first call doesn't pay the import penalty.
347+
%%
348+
%% Example:
349+
%% ```
350+
%% ok = py:import(json),
351+
%% {ok, Result} = py:call(json, dumps, [Data]). %% Uses cached module
352+
%% '''
353+
%%
354+
%% @param Module Python module name
355+
%% @returns ok | {error, Reason}
356+
-spec import(py_module()) -> ok | {error, term()}.
357+
import(Module) ->
358+
py_event_loop_pool:import(Module).
359+
360+
%% @doc Import and cache a module function in the current interpreter.
361+
%%
362+
%% Pre-imports the module and caches the function reference for faster
363+
%% subsequent calls. The `__main__' module is never cached.
364+
%%
365+
%% Example:
366+
%% ```
367+
%% ok = py:import(json, dumps),
368+
%% {ok, Result} = py:call(json, dumps, [Data]). %% Uses cached function
369+
%% '''
370+
%%
371+
%% @param Module Python module name
372+
%% @param Func Function name to cache
373+
%% @returns ok | {error, Reason}
374+
-spec import(py_module(), py_func()) -> ok | {error, term()}.
375+
import(Module, Func) ->
376+
py_event_loop_pool:import(Module, Func).
377+
378+
%% @doc Flush import caches across all interpreters.
379+
%%
380+
%% Clears the module/function cache in all interpreters. Use this after
381+
%% modifying Python modules on disk to force re-import.
382+
%%
383+
%% @returns ok
384+
-spec flush_imports() -> ok.
385+
flush_imports() ->
386+
py_event_loop_pool:flush_imports().
387+
388+
%% @doc Get import cache statistics for the current interpreter.
389+
%%
390+
%% Returns a map with cache metrics for the interpreter handling this process.
391+
%%
392+
%% Example:
393+
%% ```
394+
%% {ok, #{count => 5}} = py:import_stats().
395+
%% '''
396+
%%
397+
%% @returns {ok, Stats} where Stats is a map with cache metrics
398+
-spec import_stats() -> {ok, map()} | {error, term()}.
399+
import_stats() ->
400+
py_event_loop_pool:import_stats().
401+
402+
%% @doc List all cached imports in the current interpreter.
403+
%%
404+
%% Returns a map of modules to their cached functions.
405+
%% Module names are binary keys, function lists are the values.
406+
%% An empty list means only the module is cached (no specific functions).
407+
%%
408+
%% Example:
409+
%% ```
410+
%% ok = py:import(json),
411+
%% ok = py:import(json, dumps),
412+
%% ok = py:import(json, loads),
413+
%% ok = py:import(math),
414+
%% {ok, #{<<"json">> => [<<"dumps">>, <<"loads">>],
415+
%% <<"math">> => []}} = py:import_list().
416+
%% '''
417+
%%
418+
%% @returns {ok, #{Module => [Func]}} map of modules to functions
419+
-spec import_list() -> {ok, #{binary() => [binary()]}} | {error, term()}.
420+
import_list() ->
421+
py_event_loop_pool:import_list().
422+
330423
%%% ============================================================================
331424
%%% Asynchronous API
332425
%%% ============================================================================

src/py_event_loop_pool.erl

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,13 @@
4343
await/1, await/2,
4444
%% Per-process namespace API
4545
exec/1, exec/2,
46-
eval/1, eval/2
46+
eval/1, eval/2,
47+
%% Module import caching
48+
import/1, import/2,
49+
flush_imports/0,
50+
import_stats/0,
51+
import_list/0,
52+
get_all_loops/0
4753
]).
4854

4955
%% Legacy API
@@ -332,6 +338,95 @@ eval(Expr) ->
332338
eval(LoopRef, Expr) ->
333339
py_nif:event_loop_eval(LoopRef, Expr).
334340

341+
%%% ============================================================================
342+
%%% Module Import Caching
343+
%%% ============================================================================
344+
345+
%% @doc Import and cache a module in the current interpreter.
346+
%%
347+
%% The module is imported in the interpreter assigned to this process (via
348+
%% PID hash affinity). The `__main__' module is never cached.
349+
%%
350+
%% Example:
351+
%% <pre>
352+
%% ok = py_event_loop_pool:import(json),
353+
%% Ref = py_event_loop_pool:create_task(json, dumps, [[1,2,3]])
354+
%% </pre>
355+
-spec import(Module :: atom() | binary()) -> ok | {error, term()}.
356+
import(Module) ->
357+
case get_loop() of
358+
{ok, LoopRef} ->
359+
ModuleBin = py_util:to_binary(Module),
360+
py_nif:loop_import_module(LoopRef, ModuleBin);
361+
{error, not_available} ->
362+
{error, event_loop_not_available}
363+
end.
364+
365+
%% @doc Import a module and cache a specific function.
366+
%%
367+
%% Pre-imports the module and caches the function reference for faster
368+
%% subsequent calls. The `__main__' module is never cached.
369+
-spec import(Module :: atom() | binary(), Func :: atom() | binary()) -> ok | {error, term()}.
370+
import(Module, Func) ->
371+
case get_loop() of
372+
{ok, LoopRef} ->
373+
ModuleBin = py_util:to_binary(Module),
374+
FuncBin = py_util:to_binary(Func),
375+
py_nif:loop_import_function(LoopRef, ModuleBin, FuncBin);
376+
{error, not_available} ->
377+
{error, event_loop_not_available}
378+
end.
379+
380+
%% @doc Flush import caches across all event loop interpreters.
381+
%%
382+
%% Clears the module/function cache in all interpreters. Use this after
383+
%% modifying Python modules on disk to force re-import.
384+
-spec flush_imports() -> ok.
385+
flush_imports() ->
386+
case get_all_loops() of
387+
{ok, Loops} ->
388+
[py_nif:loop_flush_import_cache(LoopRef) || {LoopRef, _} <- Loops],
389+
ok;
390+
{error, _} ->
391+
ok
392+
end.
393+
394+
%% @doc Get import cache statistics for the current interpreter.
395+
%%
396+
%% Returns a map with cache metrics.
397+
-spec import_stats() -> {ok, map()} | {error, term()}.
398+
import_stats() ->
399+
case get_loop() of
400+
{ok, LoopRef} ->
401+
py_nif:loop_import_stats(LoopRef);
402+
{error, not_available} ->
403+
{error, event_loop_not_available}
404+
end.
405+
406+
%% @doc List all cached imports in the current interpreter.
407+
%%
408+
%% Returns a list of cached module and function names.
409+
-spec import_list() -> {ok, [binary()]} | {error, term()}.
410+
import_list() ->
411+
case get_loop() of
412+
{ok, LoopRef} ->
413+
py_nif:loop_import_list(LoopRef);
414+
{error, not_available} ->
415+
{error, event_loop_not_available}
416+
end.
417+
418+
%% @doc Get all event loop references in the pool.
419+
%%
420+
%% Returns a list of {LoopRef, WorkerPid} tuples for all loops in the pool.
421+
-spec get_all_loops() -> {ok, [{reference(), pid()}]} | {error, not_available}.
422+
get_all_loops() ->
423+
case pool_size() of
424+
0 -> {error, not_available};
425+
N ->
426+
Loops = persistent_term:get(?PT_LOOPS),
427+
{ok, [element(Idx, Loops) || Idx <- lists:seq(1, N)]}
428+
end.
429+
335430
%%% ============================================================================
336431
%%% Legacy API
337432
%%% ============================================================================

src/py_nif.erl

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,12 @@
112112
%% Per-process namespace NIFs
113113
event_loop_exec/2,
114114
event_loop_eval/2,
115+
%% Module import caching NIFs
116+
loop_import_module/2,
117+
loop_import_function/3,
118+
loop_flush_import_cache/1,
119+
loop_import_stats/1,
120+
loop_import_list/1,
115121
add_reader/3,
116122
remove_reader/2,
117123
add_writer/3,
@@ -846,6 +852,67 @@ event_loop_exec(_LoopRef, _Code) ->
846852
event_loop_eval(_LoopRef, _Expr) ->
847853
?NIF_STUB.
848854

855+
%%% ============================================================================
856+
%%% Module Import Caching
857+
%%% ============================================================================
858+
859+
%% @doc Import and cache a module in the event loop's interpreter.
860+
%%
861+
%% Pre-imports the module and caches it for faster subsequent calls.
862+
%% The `__main__' module is never cached (returns error).
863+
%%
864+
%% @param LoopRef Event loop reference
865+
%% @param Module Module name as binary
866+
%% @returns ok | {error, Reason}
867+
-spec loop_import_module(reference(), binary()) -> ok | {error, term()}.
868+
loop_import_module(_LoopRef, _Module) ->
869+
?NIF_STUB.
870+
871+
%% @doc Import a module and cache a specific function.
872+
%%
873+
%% Pre-imports the module and caches the function reference for faster
874+
%% subsequent calls. The `__main__' module is never cached (returns error).
875+
%%
876+
%% @param LoopRef Event loop reference
877+
%% @param Module Module name as binary
878+
%% @param Func Function name as binary
879+
%% @returns ok | {error, Reason}
880+
-spec loop_import_function(reference(), binary(), binary()) -> ok | {error, term()}.
881+
loop_import_function(_LoopRef, _Module, _Func) ->
882+
?NIF_STUB.
883+
884+
%% @doc Flush the import cache for an event loop's interpreter.
885+
%%
886+
%% Clears the module/function cache. Use this after modifying Python
887+
%% modules on disk to force re-import.
888+
%%
889+
%% @param LoopRef Event loop reference
890+
%% @returns ok
891+
-spec loop_flush_import_cache(reference()) -> ok.
892+
loop_flush_import_cache(_LoopRef) ->
893+
?NIF_STUB.
894+
895+
%% @doc Get import cache statistics for an event loop's interpreter.
896+
%%
897+
%% Returns a map with cache metrics for the calling process's namespace.
898+
%%
899+
%% @param LoopRef Event loop reference
900+
%% @returns {ok, Stats} where Stats is a map with count
901+
-spec loop_import_stats(reference()) -> {ok, map()} | {error, term()}.
902+
loop_import_stats(_LoopRef) ->
903+
?NIF_STUB.
904+
905+
%% @doc List all cached imports in an event loop's interpreter.
906+
%%
907+
%% Returns a map of modules to their cached functions for the calling
908+
%% process's namespace.
909+
%%
910+
%% @param LoopRef Event loop reference
911+
%% @returns {ok, #{Module => [Func]}} map of modules to functions
912+
-spec loop_import_list(reference()) -> {ok, #{binary() => [binary()]}} | {error, term()}.
913+
loop_import_list(_LoopRef) ->
914+
?NIF_STUB.
915+
849916
%% @doc Register a file descriptor for read monitoring.
850917
%% Uses enif_select to register with the Erlang scheduler.
851918
-spec add_reader(reference(), integer(), non_neg_integer()) ->

0 commit comments

Comments
 (0)