From 2aa595c89bb0801b9bfb2e8e18a7b0c218274941 Mon Sep 17 00:00:00 2001
From: "Gao, Xiang" <qasdfgtyuiop@gmail.com>
Date: Wed, 17 Dec 2025 16:50:02 -0800
Subject: [PATCH] Unconditionally return 1 for ParallelType::Stream for
 unshardedSizes

---
 csrc/multidevice/execution_utils.cpp | 24 +-----------------------
 1 file changed, 1 insertion(+), 23 deletions(-)

diff --git a/csrc/multidevice/execution_utils.cpp b/csrc/multidevice/execution_utils.cpp
index a7a7da703e2..83ddb761221 100644
--- a/csrc/multidevice/execution_utils.cpp
+++ b/csrc/multidevice/execution_utils.cpp
@@ -70,29 +70,7 @@ std::vector<int64_t> unshardedSizes(
 
     auto multiplier = [&]() -> int64_t {
       if (parallel_type == ParallelType::Stream) {
-        // TODO(#5525): hack for MultiDeviceExecutor.  MultiDeviceExecutor looks
-        // for ParallelType::Stream only in logical domains and assumes a
-        // stream-parallelized dimension is always fully allocated.  So we set
-        // the multiplier to 1 when `sharded_id` is a logical IterDomain. This
-        // will have to change when FusionExecutorCache requires a logical
-        // dimension to be stream-parallelized, both loop and allocation. Refer
-        // to
-        // https://github.com/NVIDIA/Fuser/blob/f8e84e52296cdecd318dd2ce904139616d7bd434/tests/cpp/test_overlap.cpp#L155
-        // for an example. An alternative to consider is to create a new
-        // ParallelType for stream parallelization and use it in
-        // FusionExecutorCache.
-        if (std::find(
-                tv->getLogicalDomain().begin(),
-                tv->getLogicalDomain().end(),
-                sharded_id) != tv->getLogicalDomain().end()) {
-          return 1;
-        }
-
-        NVF_ERROR(
-            sharded_id->extent()->isConstInt(),
-            "DIDs/Stream extent is expected to be constant: ",
-            sharded_id);
-        return sharded_id->extent()->evaluate().as<int64_t>();
+        return 1;
       }
 
       if (isParallelTypeDeviceDim(parallel_type)) {