Skip to content

Commit c7f3077

Browse files
cpcloudcursoragent
andcommitted
fix(pathfinder): restore descriptor-driven CTK canary fallback
Reinstate CTK-root canary discovery in the refactored loader path and define canary eligibility/anchors on per-library descriptors so fallback policy lives with the rest of library metadata. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 0c0eb71 commit c7f3077

File tree

8 files changed

+198
-210
lines changed

8 files changed

+198
-210
lines changed

cuda_pathfinder/cuda/pathfinder/_dynamic_libs/canary_probe_subprocess.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,17 @@
44

55
import json
66

7+
from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS
78
from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError, LoadedDL
8-
from cuda.pathfinder._utils.platform_aware import IS_WINDOWS
9-
10-
if IS_WINDOWS:
11-
from cuda.pathfinder._dynamic_libs.load_dl_windows import load_with_system_search
12-
else:
13-
from cuda.pathfinder._dynamic_libs.load_dl_linux import load_with_system_search
9+
from cuda.pathfinder._dynamic_libs.platform_loader import LOADER
1410

1511

1612
def _probe_canary_abs_path(libname: str) -> str | None:
13+
desc = LIB_DESCRIPTORS.get(libname)
14+
if desc is None:
15+
raise ValueError(f"Unsupported canary library name: {libname!r}")
1716
try:
18-
loaded: LoadedDL | None = load_with_system_search(libname)
17+
loaded: LoadedDL | None = LOADER.load_with_system_search(desc)
1918
except DynamicLibNotFoundError:
2019
return None
2120
if loaded is None:

cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class DescriptorSpec:
2222
dependencies: tuple[str, ...] = ()
2323
anchor_rel_dirs_linux: tuple[str, ...] = ("lib64", "lib")
2424
anchor_rel_dirs_windows: tuple[str, ...] = ("bin/x64", "bin")
25+
ctk_root_canary_anchor_libnames: tuple[str, ...] = ()
2526
requires_add_dll_directory: bool = False
2627
requires_rtld_deepbind: bool = False
2728

@@ -72,6 +73,7 @@ class DescriptorSpec:
7273
site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/cuda_nvcc/nvvm/bin"),
7374
anchor_rel_dirs_linux=("nvvm/lib64",),
7475
anchor_rel_dirs_windows=("nvvm/bin/*", "nvvm/bin"),
76+
ctk_root_canary_anchor_libnames=("cudart",),
7577
),
7678
DescriptorSpec(
7779
name="cublas",

cuda_pathfinder/cuda/pathfinder/_dynamic_libs/load_nvidia_dynamic_lib.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,24 @@
44
from __future__ import annotations
55

66
import functools
7+
import json
78
import struct
89
import sys
910
from typing import TYPE_CHECKING
1011

12+
from cuda.pathfinder._dynamic_libs.canary_probe_subprocess import probe_canary_abs_path_and_print_json
1113
from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS
1214
from cuda.pathfinder._dynamic_libs.load_dl_common import DynamicLibNotFoundError, LoadedDL, load_dependencies
1315
from cuda.pathfinder._dynamic_libs.platform_loader import LOADER
1416
from cuda.pathfinder._dynamic_libs.search_steps import (
1517
EARLY_FIND_STEPS,
1618
LATE_FIND_STEPS,
1719
SearchContext,
20+
derive_ctk_root,
21+
find_via_ctk_root,
1822
run_find_steps,
1923
)
24+
from cuda.pathfinder._utils.spawned_process_runner import run_in_spawned_child_process
2025

2126
if TYPE_CHECKING:
2227
from cuda.pathfinder._dynamic_libs.lib_descriptor import LibDescriptor
@@ -51,6 +56,48 @@ def _load_driver_lib_no_cache(desc: LibDescriptor) -> LoadedDL:
5156
)
5257

5358

59+
@functools.cache
60+
def _resolve_system_loaded_abs_path_in_subprocess(libname: str) -> str | None:
61+
"""Resolve a canary library's absolute path in a spawned child process."""
62+
result = run_in_spawned_child_process(
63+
probe_canary_abs_path_and_print_json,
64+
args=(libname,),
65+
timeout=10.0,
66+
rethrow=True,
67+
)
68+
69+
# Use the final non-empty line in case earlier output lines are emitted.
70+
lines = [line for line in result.stdout.splitlines() if line.strip()]
71+
if not lines:
72+
raise RuntimeError(f"Canary probe child process produced no stdout payload for {libname!r}")
73+
try:
74+
payload = json.loads(lines[-1])
75+
except json.JSONDecodeError:
76+
raise RuntimeError(
77+
f"Canary probe child process emitted invalid JSON payload for {libname!r}: {lines[-1]!r}"
78+
) from None
79+
if isinstance(payload, str):
80+
return payload
81+
if payload is None:
82+
return None
83+
raise RuntimeError(f"Canary probe child process emitted unexpected payload for {libname!r}: {payload!r}")
84+
85+
86+
def _try_ctk_root_canary(ctx: SearchContext) -> str | None:
87+
"""Try CTK-root canary fallback for descriptor-configured libraries."""
88+
for canary_libname in ctx.desc.ctk_root_canary_anchor_libnames:
89+
canary_abs_path = _resolve_system_loaded_abs_path_in_subprocess(canary_libname)
90+
if canary_abs_path is None:
91+
continue
92+
ctk_root = derive_ctk_root(canary_abs_path)
93+
if ctk_root is None:
94+
continue
95+
find = find_via_ctk_root(ctx, ctk_root)
96+
if find is not None:
97+
return find.abs_path
98+
return None
99+
100+
54101
def _load_lib_no_cache(libname: str) -> LoadedDL:
55102
desc = LIB_DESCRIPTORS[libname]
56103

@@ -82,6 +129,11 @@ def _load_lib_no_cache(libname: str) -> LoadedDL:
82129
if find is not None:
83130
return LOADER.load_with_abs_path(desc, find.abs_path, find.found_via)
84131

132+
if desc.ctk_root_canary_anchor_libnames:
133+
canary_abs_path = _try_ctk_root_canary(ctx)
134+
if canary_abs_path is not None:
135+
return LOADER.load_with_abs_path(desc, canary_abs_path, "system-ctk-root")
136+
85137
ctx.raise_not_found()
86138

87139

@@ -150,6 +202,13 @@ def load_nvidia_dynamic_lib(libname: str) -> LoadedDL:
150202
151203
- If set, use ``CUDA_HOME`` or ``CUDA_PATH`` (in that order).
152204
205+
5. **CTK root canary probe (discoverable libs only)**
206+
207+
- For selected libraries whose shared object doesn't reside on the
208+
standard linker path (currently ``nvvm``), attempt to derive CTK
209+
root by system-loading a well-known CTK canary library in a
210+
subprocess and then searching relative to that root.
211+
153212
**Driver libraries** (``"cuda"``, ``"nvml"``):
154213
155214
These are part of the NVIDIA display driver (not the CUDA Toolkit) and

cuda_pathfinder/cuda/pathfinder/_dynamic_libs/search_steps.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,63 @@ def _find_using_lib_dir(ctx: SearchContext, lib_dir: str | None) -> str | None:
9696
)
9797

9898

99+
def _derive_ctk_root_linux(resolved_lib_path: str) -> str | None:
100+
"""Derive CTK root from Linux canary path.
101+
102+
Supports:
103+
- ``$CTK_ROOT/lib64/libfoo.so.*``
104+
- ``$CTK_ROOT/lib/libfoo.so.*``
105+
- ``$CTK_ROOT/targets/<triple>/lib64/libfoo.so.*``
106+
- ``$CTK_ROOT/targets/<triple>/lib/libfoo.so.*``
107+
"""
108+
lib_dir = os.path.dirname(resolved_lib_path)
109+
basename = os.path.basename(lib_dir)
110+
if basename in ("lib64", "lib"):
111+
parent = os.path.dirname(lib_dir)
112+
grandparent = os.path.dirname(parent)
113+
if os.path.basename(grandparent) == "targets":
114+
return os.path.dirname(grandparent)
115+
return parent
116+
return None
117+
118+
119+
def _derive_ctk_root_windows(resolved_lib_path: str) -> str | None:
120+
"""Derive CTK root from Windows canary path.
121+
122+
Supports:
123+
- ``$CTK_ROOT/bin/x64/foo.dll`` (CTK 13 style)
124+
- ``$CTK_ROOT/bin/foo.dll`` (CTK 12 style)
125+
"""
126+
import ntpath
127+
128+
lib_dir = ntpath.dirname(resolved_lib_path)
129+
basename = ntpath.basename(lib_dir).lower()
130+
if basename == "x64":
131+
parent = ntpath.dirname(lib_dir)
132+
if ntpath.basename(parent).lower() == "bin":
133+
return ntpath.dirname(parent)
134+
elif basename == "bin":
135+
return ntpath.dirname(lib_dir)
136+
return None
137+
138+
139+
def derive_ctk_root(resolved_lib_path: str) -> str | None:
140+
"""Derive CTK root from a resolved canary library path."""
141+
ctk_root = _derive_ctk_root_linux(resolved_lib_path)
142+
if ctk_root is not None:
143+
return ctk_root
144+
return _derive_ctk_root_windows(resolved_lib_path)
145+
146+
147+
def find_via_ctk_root(ctx: SearchContext, ctk_root: str) -> FindResult | None:
148+
"""Find a library under a previously derived CTK root."""
149+
lib_dir = _find_lib_dir_using_anchor(ctx.desc, ctx.platform, ctk_root)
150+
abs_path = _find_using_lib_dir(ctx, lib_dir)
151+
if abs_path is None:
152+
return None
153+
return FindResult(abs_path, "system-ctk-root")
154+
155+
99156
# ---------------------------------------------------------------------------
100157
# Find steps
101158
# ---------------------------------------------------------------------------

cuda_pathfinder/cuda/pathfinder/_dynamic_libs/supported_nvidia_libs.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,6 @@
5151
desc.name for desc in DESCRIPTOR_CATALOG if desc.requires_rtld_deepbind and desc.linux_sonames
5252
)
5353

54-
# CTK root canary probe config:
55-
# - anchor libs: expected on the standard system loader path and used to derive
56-
# CTK root in an isolated child process.
57-
# - discoverable libs: libs that are allowed to use the CTK-root canary fallback.
58-
_CTK_ROOT_CANARY_ANCHOR_LIBNAMES = ("cudart",)
59-
_CTK_ROOT_CANARY_DISCOVERABLE_LIBNAMES = ("nvvm",)
60-
6154
# Based on output of toolshed/make_site_packages_libdirs_linux.py
6255
SITE_PACKAGES_LIBDIRS_LINUX_CTK = {
6356
desc.name: desc.site_packages_linux for desc in _CTK_DESCRIPTORS if desc.site_packages_linux

cuda_pathfinder/tests/test_add_nv_library.py

Lines changed: 0 additions & 160 deletions
This file was deleted.

0 commit comments

Comments
 (0)