From 10908e54d49676ad819c19ec1384571ab9c502ac Mon Sep 17 00:00:00 2001
From: Lihao Ran <imlihao.ran@gmail.com>
Date: Wed, 10 Dec 2025 22:29:36 +0000
Subject: [PATCH] [multihost] Integrate expert parallelism to RayExecutor

Signed-off-by: Lihao Ran <imlihao.ran@gmail.com>
---
 tpu_inference/executors/ray_distributed_executor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tpu_inference/executors/ray_distributed_executor.py b/tpu_inference/executors/ray_distributed_executor.py
index 05d96e0b2..7b961b036 100644
--- a/tpu_inference/executors/ray_distributed_executor.py
+++ b/tpu_inference/executors/ray_distributed_executor.py
@@ -369,7 +369,7 @@ def sort_by_driver_then_worker_ip(item: RayWorkerMetaData):
             for pp_rank in range(self.parallel_config.pipeline_parallel_size):
                 self.pp_tp_workers.append([])
                 num_tp_workers = int(
-                    self.parallel_config.tensor_parallel_size //
+                    self.vllm_config.sharding_config.total_devices //
                     num_tpu_per_worker)
                 for tp_rank in range(num_tp_workers):
                     # PP=2, TP=4, num_tpu_per_worker=2