Skip to content

Commit 8b7585c

Browse files
committed
fix(nvidia): failed to apply mig with kdp
Signed-off-by: thxCode <thxcode0824@gmail.com>
1 parent 4d6bf3d commit 8b7585c

2 files changed

Lines changed: 35 additions & 20 deletions

File tree

gpustack_runtime/deployer/__types__.py

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1373,25 +1373,29 @@ def _prepare(self):
13731373
resource_key,
13741374
)
13751375
if ren and ben_list:
1376-
valued_uuid = (
1377-
self.allowed_uuid_values
1378-
and (
1379-
ren
1380-
in envs.GPUSTACK_RUNTIME_DEPLOY_RUNTIME_VISIBLE_DEVICES_VALUE_UUID
1381-
)
1382-
and manu != ManufacturerEnum.ASCEND
1376+
valued_backend_uuid = (
1377+
ren
1378+
in envs.GPUSTACK_RUNTIME_DEPLOY_RUNTIME_VISIBLE_DEVICES_VALUE_UUID
1379+
) and manu != ManufacturerEnum.ASCEND
1380+
valued_runtime_uuid = (
1381+
self.allowed_runtime_uuid_values and valued_backend_uuid
13831382
)
1384-
dev_values: dict[str, str] = {}
1385-
dev_aligned_values: dict[str, str] = {}
1383+
dev_runtime_values: dict[str, str] = {}
1384+
dev_backend_values: dict[str, str] = {}
1385+
dev_backend_aligned_values: dict[str, str] = {}
13861386
dev_numa_affinities: dict[str, str] = {}
13871387
dev_cpus_affinities: dict[str, str] = {}
13881388
for dev_i, dev in enumerate(devs):
13891389
dev_index = str(dev.index)
1390-
if valued_uuid:
1391-
dev_values[dev_index] = dev.uuid
1390+
if valued_runtime_uuid:
1391+
dev_runtime_values[dev_index] = dev.uuid
1392+
else:
1393+
dev_runtime_values[dev_index] = dev_index
1394+
if valued_backend_uuid:
1395+
dev_backend_values[dev_index] = dev.uuid
13921396
else:
1393-
dev_values[dev_index] = dev_index
1394-
dev_aligned_values[dev_index] = str(dev_i)
1397+
dev_backend_values[dev_index] = dev_index
1398+
dev_backend_aligned_values[dev_index] = str(dev_i)
13951399
dev_numa_affinities[dev_index] = dev.appendix.get("numa", "")
13961400
dev_cpus_affinities[dev_index] = map_numa_node_to_cpu_affinity(
13971401
dev_numa_affinities[dev_index],
@@ -1402,16 +1406,16 @@ def _prepare(self):
14021406
runtime_env=ren,
14031407
backend_env=ben_list,
14041408
cdi=cdi,
1405-
runtime_values=dev_values,
1409+
runtime_values=dev_runtime_values,
14061410
backend_values={
14071411
ben: (
1408-
dev_aligned_values
1412+
dev_backend_aligned_values
14091413
if (
1410-
not valued_uuid
1414+
not valued_backend_uuid
14111415
and ben
14121416
in envs.GPUSTACK_RUNTIME_DEPLOY_BACKEND_VISIBLE_DEVICES_VALUE_ALIGNMENT
14131417
)
1414-
else dev_values
1418+
else dev_backend_values
14151419
)
14161420
for ben in ben_list
14171421
},
@@ -1654,9 +1658,20 @@ def name(self) -> str:
16541658
return self._name
16551659

16561660
@property
1657-
def allowed_uuid_values(self) -> bool:
1661+
def allowed_runtime_uuid_values(self) -> bool:
1662+
"""
1663+
Return whether the deployer allows using UUIDs as runtime visible devices values.
1664+
1665+
Returns:
1666+
True if allowed, False otherwise.
1667+
1668+
"""
1669+
return True
1670+
1671+
@property
1672+
def allowed_backend_uuid_values(self) -> bool:
16581673
"""
1659-
Return whether the deployer allows using UUIDs as visible devices values.
1674+
Return whether the deployer allows using UUIDs as backend visible devices values.
16601675
16611676
Returns:
16621677
True if allowed, False otherwise.

gpustack_runtime/deployer/kuberentes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1207,7 +1207,7 @@ def __init__(self):
12071207
self._node_name = envs.GPUSTACK_RUNTIME_KUBERNETES_NODE_NAME
12081208

12091209
@property
1210-
def allowed_uuid_values(self) -> bool:
1210+
def allowed_runtime_uuid_values(self) -> bool:
12111211
return get_resource_injection_policy() != "kdp"
12121212

12131213
def _prepare_mirrored_deployment(self):

0 commit comments

Comments
 (0)