Make rapidsmpf py_executor configurable

TomAugspurger · TomAugspurger · commit d58a485fd4e7 · 2026-02-13T12:22:03.000-08:00
This adds a new configuration option to control the number of threads passed to the ThreadPoolExecutor used as rapidsmpf's `py_executor`. This pairs with rapidsai/rapidsmpf#858, which starts using the user-provided `py_executor` for `asyncio.to_thread` calls.
diff --git a/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py b/python/cudf_polars/cudf_polars/experimental/rapidsmpf/core.py
@@ -250,7 +250,10 @@ def evaluate_pipeline(
         )
 
         # Run the network
-        executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="cpse")
+        executor = ThreadPoolExecutor(
+            max_workers=config_options.executor.rapidsmpf_py_executor_max_workers,
+            thread_name_prefix="cpse",
+        )
         run_streaming_pipeline(nodes=nodes, py_executor=executor)
 
         # Extract/return the concatenated result.
diff --git a/python/cudf_polars/cudf_polars/utils/config.py b/python/cudf_polars/cudf_polars/utils/config.py
@@ -704,6 +704,10 @@ class StreamingExecutor:
         or use regular pageable host memory. Pinned host memory offers higher
         bandwidth and lower latency for device to host transfers compared to
         regular pageable host memory.
+    rapidsmpf_py_executor_max_workers
+        Maximum number of workers for the Python ThreadPoolExecutor used by
+        the rapidsmpf runtime. Default is None, which uses ThreadPoolExecutor's
+        default behavior. This option is only used by the "rapidsmpf" runtime.
 
     Notes
     -----
@@ -812,6 +816,11 @@ class StreamingExecutor:
             f"{_env_prefix}__SPILL_TO_PINNED_MEMORY", bool, default=False
         )
     )
+    rapidsmpf_py_executor_max_workers: int | None = dataclasses.field(
+        default_factory=_make_default_factory(
+            f"{_env_prefix}__RAPIDSMPF_PY_EXECUTOR_MAX_WORKERS", int, default=None
+        )
+    )
 
     def __post_init__(self) -> None:  # noqa: D105
         # Check for rapidsmpf runtime
@@ -959,6 +968,8 @@ def __post_init__(self) -> None:  # noqa: D105
             raise TypeError("max_io_threads must be an int")
         if not isinstance(self.spill_to_pinned_memory, bool):
             raise TypeError("spill_to_pinned_memory must be bool")
+        if not isinstance(self.rapidsmpf_py_executor_max_workers, (int, type(None))):
+            raise TypeError("rapidsmpf_py_executor_max_workers must be int or None")
 
         # RapidsMPF spill is only supported for distributed clusters for now.
         # This is because the spilling API is still within the RMPF-Dask integration.
diff --git a/python/cudf_polars/tests/test_config.py b/python/cudf_polars/tests/test_config.py
@@ -443,6 +443,7 @@ def test_validate_shuffle_insertion_method() -> None:
         "client_device_threshold",
         "max_io_threads",
         "spill_to_pinned_memory",
+        "rapidsmpf_py_executor_max_workers",
     ],
 )
 def test_validate_streaming_executor_options(option: str) -> None:
@@ -1010,3 +1011,35 @@ def test_rapidsmpf_distributed_warns(monkeypatch: pytest.MonkeyPatch) -> None:
                 },
             )
         )
+
+
+def test_rapidsmpf_py_executor_max_workers_default() -> None:
+    config = ConfigOptions.from_polars_engine(
+        pl.GPUEngine(
+            executor="streaming",
+        )
+    )
+    assert config.executor.name == "streaming"
+    assert config.executor.rapidsmpf_py_executor_max_workers is None
+
+
+def test_rapidsmpf_py_executor_max_workers_from_executor_options() -> None:
+    config = ConfigOptions.from_polars_engine(
+        pl.GPUEngine(
+            executor="streaming",
+            executor_options={"rapidsmpf_py_executor_max_workers": 4},
+        )
+    )
+    assert config.executor.name == "streaming"
+    assert config.executor.rapidsmpf_py_executor_max_workers == 4
+
+
+def test_rapidsmpf_py_executor_max_workers_from_env(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    with monkeypatch.context() as m:
+        m.setenv("CUDF_POLARS__EXECUTOR__RAPIDSMPF_PY_EXECUTOR_MAX_WORKERS", "8")
+        engine = pl.GPUEngine(executor="streaming")
+        config = ConfigOptions.from_polars_engine(engine)
+        assert config.executor.name == "streaming"
+        assert config.executor.rapidsmpf_py_executor_max_workers == 8