diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py index ab50c37de4..d4112f9dfa 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py @@ -3,6 +3,8 @@ # SPDX-License-Identifier: Apache-2.0 import json +import sys +from collections.abc import Sequence from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError, LoadedDL @@ -27,3 +29,15 @@ def _probe_canary_abs_path(libname: str) -> str | None: def probe_canary_abs_path_and_print_json(libname: str) -> None: print(json.dumps(_probe_canary_abs_path(libname))) + + +def main(argv: Sequence[str] | None = None) -> int: + args = list(sys.argv[1:] if argv is None else argv) + if len(args) != 1: + raise SystemExit("Usage: python -m cuda.pathfinder._dynamic_libs.canary_probe_subprocess ") + probe_canary_abs_path_and_print_json(args[0]) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py index c40dffde15..f1d82f3e71 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py @@ -6,10 +6,11 @@ import functools import json import struct +import subprocess import sys +from pathlib import Path from typing import TYPE_CHECKING -from cuda.pathfinder._dynamic_libs.canary_probe_subprocess import probe_canary_abs_path_and_print_json from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS from cuda.pathfinder._dynamic_libs.load_dl_common import ( DynamicLibNotAvailableError, @@ -28,7 +29,6 @@ run_find_steps, ) from cuda.pathfinder._utils.platform_aware import IS_WINDOWS -from cuda.pathfinder._utils.spawned_process_runner import run_in_spawned_child_process if TYPE_CHECKING: from cuda.pathfinder._dynamic_libs.lib_descriptor import LibDescriptor @@ -40,6 +40,9 @@ name for name, desc in LIB_DESCRIPTORS.items() if (desc.windows_dlls if IS_WINDOWS else desc.linux_sonames) ) _PLATFORM_NAME = "Windows" if IS_WINDOWS else "Linux" +_CANARY_PROBE_MODULE = "cuda.pathfinder._dynamic_libs.canary_probe_subprocess" +_CANARY_PROBE_TIMEOUT_SECONDS = 10.0 +_CANARY_PROBE_IMPORT_ROOT = Path(__file__).resolve().parents[3] # Driver libraries: shipped with the NVIDIA display driver, always on the # system linker path. These skip all CTK search steps (site-packages, @@ -67,15 +70,47 @@ def _load_driver_lib_no_cache(desc: LibDescriptor) -> LoadedDL: ) +def _coerce_subprocess_output(output: str | bytes | None) -> str: + if isinstance(output, bytes): + return output.decode(errors="replace") + return "" if output is None else output + + +def _raise_canary_probe_child_process_error( + *, + returncode: int | None = None, + timeout: float | None = None, + stderr: str | bytes | None = None, +) -> None: + if timeout is None: + error_line = f"Canary probe child process exited with code {returncode}." + else: + error_line = f"Canary probe child process timed out after {timeout} seconds." + raise ChildProcessError( + f"{error_line}\n" + "--- stderr-from-child-process ---\n" + f"{_coerce_subprocess_output(stderr)}" + "\n" + ) + + @functools.cache def _resolve_system_loaded_abs_path_in_subprocess(libname: str) -> str | None: - """Resolve a canary library's absolute path in a spawned child process.""" - result = run_in_spawned_child_process( - probe_canary_abs_path_and_print_json, - args=(libname,), - timeout=10.0, - rethrow=True, - ) + """Resolve a canary library's absolute path in a fresh Python subprocess.""" + try: + result = subprocess.run( # noqa: S603 - trusted argv: current interpreter + internal probe module + [sys.executable, "-m", _CANARY_PROBE_MODULE, libname], + capture_output=True, + text=True, + timeout=_CANARY_PROBE_TIMEOUT_SECONDS, + check=False, + cwd=_CANARY_PROBE_IMPORT_ROOT, + ) + except subprocess.TimeoutExpired as exc: + _raise_canary_probe_child_process_error(timeout=exc.timeout, stderr=exc.stderr) + + if result.returncode != 0: + _raise_canary_probe_child_process_error(returncode=result.returncode, stderr=result.stderr) # Use the final non-empty line in case earlier output lines are emitted. lines = [line for line in result.stdout.splitlines() if line.strip()] diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py index b6118db64d..6e2ae100d8 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/find_nvidia_headers.py @@ -115,7 +115,7 @@ def find_via_ctk_root_canary(desc: HeaderDescriptor) -> LocatedHeaderDir | None: """Try CTK header lookup via CTK-root canary probing. Skips immediately if the descriptor does not opt in (``use_ctk_root_canary``). - Otherwise, system-loads ``cudart`` in a spawned child process, derives + Otherwise, system-loads ``cudart`` in a fully isolated Python subprocess, derives CTK root from the resolved library path, and searches the expected include layout under that root. """ diff --git a/cuda_pathfinder/docs/source/release/1.4.3-notes.rst b/cuda_pathfinder/docs/source/release/1.4.3-notes.rst new file mode 100644 index 0000000000..bb75fd4e75 --- /dev/null +++ b/cuda_pathfinder/docs/source/release/1.4.3-notes.rst @@ -0,0 +1,22 @@ +.. SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-License-Identifier: Apache-2.0 + +.. py:currentmodule:: cuda.pathfinder + +``cuda-pathfinder`` 1.4.3 Release notes +======================================= + +Highlights +---------- + +* Fix CTK canary probing when it is reached from plain Python scripts that do + not use an ``if __name__ == "__main__"`` guard. The canary now runs in a + fully isolated Python subprocess without re-entering the caller's script + during child-process startup. + (`PR #1768 `_) + +* Make the canary subprocess resolve ``cuda.pathfinder`` from the same import + root as the parent process. This avoids mixed source-tree versus wheel imports + in wheel-based test environments and keeps the probe behavior consistent + across source and installed-package workflows. + (`PR #1768 `_) diff --git a/cuda_pathfinder/tests/test_ctk_root_discovery.py b/cuda_pathfinder/tests/test_ctk_root_discovery.py index bcfce30c25..d5d4ebffb3 100644 --- a/cuda_pathfinder/tests/test_ctk_root_discovery.py +++ b/cuda_pathfinder/tests/test_ctk_root_discovery.py @@ -2,6 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 +import os +import subprocess +import sys +import textwrap +from pathlib import Path + import pytest from cuda.pathfinder._dynamic_libs import load_nvidia_dynamic_lib as load_mod @@ -24,6 +30,7 @@ _MODULE = "cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib" _STEPS_MODULE = "cuda.pathfinder._dynamic_libs.search_steps" +_PACKAGE_ROOT = Path(load_mod.__file__).resolve().parents[3] def _ctx(libname: str = "nvvm") -> SearchContext: @@ -184,53 +191,120 @@ def test_try_via_ctk_root_regular_lib(tmp_path): def test_subprocess_probe_returns_abs_path_on_string_payload(mocker): - result = mocker.Mock(stdout='"/usr/local/cuda/lib64/libcudart.so.13"\n') - run_mock = mocker.patch(f"{_MODULE}.run_in_spawned_child_process", return_value=result) + result = subprocess.CompletedProcess( + args=[], + returncode=0, + stdout='"/usr/local/cuda/lib64/libcudart.so.13"\n', + stderr="", + ) + run_mock = mocker.patch(f"{_MODULE}.subprocess.run", return_value=result) assert _resolve_system_loaded_abs_path_in_subprocess("cudart") == "/usr/local/cuda/lib64/libcudart.so.13" - assert run_mock.call_args.kwargs.get("rethrow") is True + run_mock.assert_called_once_with( + [sys.executable, "-m", "cuda.pathfinder._dynamic_libs.canary_probe_subprocess", "cudart"], + capture_output=True, + text=True, + timeout=10.0, + check=False, + cwd=_PACKAGE_ROOT, + ) def test_subprocess_probe_returns_none_on_null_payload(mocker): - result = mocker.Mock(stdout="null\n") - mocker.patch(f"{_MODULE}.run_in_spawned_child_process", return_value=result) + result = subprocess.CompletedProcess(args=[], returncode=0, stdout="null\n", stderr="") + mocker.patch(f"{_MODULE}.subprocess.run", return_value=result) assert _resolve_system_loaded_abs_path_in_subprocess("cudart") is None def test_subprocess_probe_raises_on_child_failure(mocker): + result = subprocess.CompletedProcess(args=[], returncode=1, stdout="", stderr="child failed\n") + mocker.patch(f"{_MODULE}.subprocess.run", return_value=result) + + with pytest.raises(ChildProcessError, match="child failed"): + _resolve_system_loaded_abs_path_in_subprocess("cudart") + + +def test_subprocess_probe_raises_on_timeout(mocker): mocker.patch( - f"{_MODULE}.run_in_spawned_child_process", - side_effect=ChildProcessError("child failed"), + f"{_MODULE}.subprocess.run", + side_effect=subprocess.TimeoutExpired(cmd=["python"], timeout=10.0, stderr="probe hung\n"), ) - with pytest.raises(ChildProcessError, match="child failed"): + with pytest.raises(ChildProcessError, match="timed out after 10.0 seconds"): _resolve_system_loaded_abs_path_in_subprocess("cudart") def test_subprocess_probe_raises_on_empty_stdout(mocker): - result = mocker.Mock(stdout=" \n \n") - mocker.patch(f"{_MODULE}.run_in_spawned_child_process", return_value=result) + result = subprocess.CompletedProcess(args=[], returncode=0, stdout=" \n \n", stderr="") + mocker.patch(f"{_MODULE}.subprocess.run", return_value=result) with pytest.raises(RuntimeError, match="produced no stdout payload"): _resolve_system_loaded_abs_path_in_subprocess("cudart") def test_subprocess_probe_raises_on_invalid_json_payload(mocker): - result = mocker.Mock(stdout="not-json\n") - mocker.patch(f"{_MODULE}.run_in_spawned_child_process", return_value=result) + result = subprocess.CompletedProcess(args=[], returncode=0, stdout="not-json\n", stderr="") + mocker.patch(f"{_MODULE}.subprocess.run", return_value=result) with pytest.raises(RuntimeError, match="invalid JSON payload"): _resolve_system_loaded_abs_path_in_subprocess("cudart") def test_subprocess_probe_raises_on_unexpected_json_payload(mocker): - result = mocker.Mock(stdout='{"path": "/usr/local/cuda/lib64/libcudart.so.13"}\n') - mocker.patch(f"{_MODULE}.run_in_spawned_child_process", return_value=result) + result = subprocess.CompletedProcess( + args=[], + returncode=0, + stdout='{"path": "/usr/local/cuda/lib64/libcudart.so.13"}\n', + stderr="", + ) + mocker.patch(f"{_MODULE}.subprocess.run", return_value=result) with pytest.raises(RuntimeError, match="unexpected payload"): _resolve_system_loaded_abs_path_in_subprocess("cudart") +def test_subprocess_probe_does_not_reenter_calling_script(tmp_path): + script_path = tmp_path / "call_probe.py" + run_count_path = tmp_path / "run_count.txt" + script_path.write_text( + textwrap.dedent( + f""" + from pathlib import Path + + from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import ( + _resolve_system_loaded_abs_path_in_subprocess, + ) + + marker_path = Path({str(run_count_path)!r}) + run_count = int(marker_path.read_text()) if marker_path.exists() else 0 + marker_path.write_text(str(run_count + 1)) + + try: + _resolve_system_loaded_abs_path_in_subprocess("not_a_real_lib") + except Exception: + pass + """ + ), + encoding="utf-8", + ) + env = os.environ.copy() + existing_pythonpath = env.get("PYTHONPATH") + env["PYTHONPATH"] = ( + str(_PACKAGE_ROOT) if not existing_pythonpath else os.pathsep.join((str(_PACKAGE_ROOT), existing_pythonpath)) + ) + + result = subprocess.run( # noqa: S603 - trusted argv: current interpreter + temp script created by this test + [sys.executable, str(script_path)], + capture_output=True, + text=True, + check=False, + env=env, + ) + + assert result.returncode == 0, result.stderr + assert run_count_path.read_text(encoding="utf-8") == "1" + + # --------------------------------------------------------------------------- # _try_ctk_root_canary # ---------------------------------------------------------------------------