From 4250bbf691fb66108084490d01d560cb58cf1248 Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 17 Dec 2025 12:34:39 -0800 Subject: [PATCH 01/11] save --- csrc/evaluator_common.h | 1 + 1 file changed, 1 insertion(+) diff --git a/csrc/evaluator_common.h b/csrc/evaluator_common.h index aabf029ed4d..45b9fecf5fd 100644 --- a/csrc/evaluator_common.h +++ b/csrc/evaluator_common.h @@ -218,6 +218,7 @@ class PrecomputedValues { defined_[index] = true; values_[index] = value; binding_log_.emplace_back(index, value); + validate(); } template void bindValue(int index, const T& value) { From 482a4d09bbf96c680076480bba2cb126fc8e2e10 Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 17 Dec 2025 12:44:30 -0800 Subject: [PATCH 02/11] save --- tests/cpp/test_stream.cpp | 94 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/tests/cpp/test_stream.cpp b/tests/cpp/test_stream.cpp index 49a4653c69c..590851955cf 100644 --- a/tests/cpp/test_stream.cpp +++ b/tests/cpp/test_stream.cpp @@ -193,4 +193,98 @@ TEST_F(StreamTest, ReplicatedAllocation) { } } +TEST_F(StreamTest, Matmul) { + constexpr int64_t c = 3; + + auto fusion = std::make_unique(); + { + FusionGuard fg(fusion.get()); + TensorView* in = makeSymbolicTensor(2); + TensorView* w = makeSymbolicTensor(2); + TensorView* out = matmul(in, w); + fusion->addInput(in); + fusion->addInput(w); + fusion->addOutput(out); + + out->outer_split(1, c); + out->axis(1)->parallelize(ParallelType::Stream); + } + + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA); + at::Tensor in_tensor = at::randn({5, 7}, options); + at::Tensor w_tensor = at::randn({7, c * 2}, options); + + // With NVFUSER_DUMP=host_ir, you'll see the host IR container like the + // following: + // clang-format off + // %HostIrContainer { (T0_g_float[iS0{i0}, iS1{i2}], T1_g_float[istreamIdx7{3}, iS11{i2}, iS8{( ceilDiv(i4, 3) )}]) -> (T2_g_float[istreamIdx9{3}, iS4{i0}, iS10{( ceilDiv(i4, 3) )}, rS6{i2}]) : + // FOR i18 from 0 to 3: + // T2_g_float[istreamIdx9{3}, iS4{i0}, iS10{( ceilDiv(i4, 3) )}, rS6{i2}] + // = matmul(T0_g_float[iS0{i0}, iS1{i2}], + // T1_g_float[istreamIdx7{3}, iS11{i2}, iS8{( ceilDiv(i4, 3) )}]) + // } // %HostIrContainer + // clang-format on + FusionExecutorCache executor_cache(std::move(fusion)); + auto out_tensor = executor_cache.runFusionWithInputs({in_tensor, w_tensor})[0] + .as(); + + testValidate( + executor_cache.fusion(), + {out_tensor}, + {in_tensor, w_tensor}, + __LINE__, + __FILE__); +} + +TEST_F(StreamTest, TwoMatmuls) { + constexpr int64_t c = 3; + + auto fusion = std::make_unique(); + { + FusionGuard fg(fusion.get()); + TensorView* in = makeSymbolicTensor(2); + TensorView* w1 = makeSymbolicTensor(2); + TensorView* w2 = makeSymbolicTensor(2); + TensorView* out = matmul(in, w1); + out = matmul(out, w2); + fusion->addInput(in); + fusion->addInput(w1); + fusion->addInput(w2); + fusion->addOutput(out); + + in->outer_split(0, c); + in->axis(0)->parallelize(ParallelType::Stream); + } + + { + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA); + at::Tensor in = at::randn({c * 2, 3}, options); + at::Tensor w1 = at::randn({3, 5}, options); + at::Tensor w2 = at::randn({5, 3}, options); + + // With NVFUSER_DUMP=host_ir, you'll see the host IR container like the + // following: + // clang-format off + // %HostIrContainer { (T0_g_float[istreamIdx12{3}, iS13{( ceilDiv(i0, 3) )}, iS1{i2}], T1_g_float[iS14{i2}, iS3{i4}], T2_g_float[iS15{i4}, iS5{i6}]) -> (T4_g_float[istreamIdx18{3}, iS19{( ceilDiv(i0, 3) )}, iS10{i6}, rS11{i4}]) : + // T4_g_float[istreamIdx18{3}, iS19{( ceilDiv(i0, 3) )}, iS10{i6}, rS11{i4}] = ALLOCATE(buffer=T4_g_float[istreamIdx18{3}, iS19{( ceilDiv(i0, 3) )}, iS10{i6}, rS11{i4}], mem_type=global, size=( i0 * i6 ), zero_init=false, resets_to_zero=false) + // FOR i99 from 0 to 3: + // T5_l_float[istreamIdx22{3}, iS23{( ceilDiv(i0, 3) )}, iS21{i2}] = ShardByStream(T0_g_float[istreamIdx12{3}, iS13{( ceilDiv(i0, 3) )}, iS1{i2}], stream_index = i99) + // T3_g_float[istreamIdx16{3}, iS17{( ceilDiv(i0, 3) )}, iS7{i4}, rS8{i2}] + // = matmul(T5_l_float[istreamIdx22{3}, iS23{( ceilDiv(i0, 3) )}, iS21{i2}], + // T1_g_float[iS14{i2}, iS3{i4}]) + // T6_l_float[istreamIdx26{3}, iS27{( ceilDiv(i0, 3) )}, iS25{i6}] = ShardByStream(T4_g_float[istreamIdx18{3}, iS19{( ceilDiv(i0, 3) )}, iS10{i6}, rS11{i4}], stream_index = i99) + // T6_l_float[istreamIdx26{3}, iS27{( ceilDiv(i0, 3) )}, iS25{i6}] + // = matmul(T3_g_float[istreamIdx16{3}, iS17{( ceilDiv(i0, 3) )}, iS7{i4}, rS8{i2}], + // T2_g_float[iS15{i4}, iS5{i6}]) + // } // %HostIrContainer + // clang-format on + FusionExecutorCache executor_cache(std::move(fusion)); + auto out = + executor_cache.runFusionWithInputs({in, w1, w2})[0].as(); + + testValidate( + executor_cache.fusion(), {out}, {in, w1, w2}, __LINE__, __FILE__); + } +} + } // namespace nvfuser From 29359d73875e8ec8d9a86f2baba4dddf5a58a0f1 Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 17 Dec 2025 13:46:46 -0800 Subject: [PATCH 03/11] Improve error message for PrecomputedValues --- csrc/evaluator_common.cpp | 42 ++++++++++++++++++++++++--------------- csrc/evaluator_common.h | 13 +++++++----- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/csrc/evaluator_common.cpp b/csrc/evaluator_common.cpp index 5e983777b04..77573ca0796 100644 --- a/csrc/evaluator_common.cpp +++ b/csrc/evaluator_common.cpp @@ -186,7 +186,7 @@ void PrecomputedValues::bindParallelExtents( auto raw_val = launch_constraint.getRawVal(it.first); if (raw_val > 0) { for (auto extent : it.second) { - bindValue(extent->evaluatorIndex(), raw_val); + bindValue(extent->evaluatorIndex(), raw_val, extent); } } } @@ -198,7 +198,10 @@ void PrecomputedValues::bindConcreteParallelTypeValue( auto index_list_it = thread_dim_value_indices_.find(pt); if (index_list_it != thread_dim_value_indices_.end()) { for (auto index : *(index_list_it->second)) { - bindValue(index, value); + Val* ir_node = (index >= 0 && index < (int)symbols_.size()) + ? symbols_[index] + : nullptr; + bindValue(index, value, ir_node); } } } @@ -228,7 +231,7 @@ void PrecomputedValues::bindValues( bindTensorMetaData(tv, tensor); } } else { - bindValue(input->evaluatorIndex(), args[i]); + bindValue(input->evaluatorIndex(), args[i], input); } } } @@ -360,15 +363,19 @@ void PrecomputedValues::initializeNamedScalars() { void PrecomputedValues::validate() { FUSER_PERF_SCOPE("PrecomputedValuess::Validate"); using namespace PolymorphicValue_functions; - for (const auto& it : binding_log_) { - NVF_ERROR( - isSame(values_[it.first], it.second), - "Precomputed values failed to validate.", - "\nSomething unexpected changed between the compilation and " - "execution.\n", - values_[it.first], - " != ", - it.second); + for (const auto& [index, expected_value, ir_node] : binding_log_) { + if (!isSame(values_[index], expected_value)) { + std::stringstream error_msg; + error_msg << "Precomputed values failed to validate.\n" + << "Something unexpected changed between the compilation and " + "execution.\n"; + if (ir_node != nullptr) { + error_msg << "IR node: " << ir_node->toString() << "\n"; + } + error_msg << "Computed value: " << values_[index] << "\n" + << "Expected value: " << expected_value; + NVF_ERROR(false, error_msg.str()); + } } has_valid_values_ = true; } @@ -391,12 +398,15 @@ void PrecomputedValues::bindTensorMetaData( if (id->isBroadcast()) { // DIDs are ignored for broadcast. See MultideviceShardingTest.Broadcast // and .ExpandedBroadcast. - bindValue(id->extent()->evaluatorIndex(), 1L); + bindValue(id->extent()->evaluatorIndex(), 1L, id->extent()); if (id->hasExpandedExtent()) { - bindValue(id->expandedExtent()->evaluatorIndex(), dim_size); + bindValue( + id->expandedExtent()->evaluatorIndex(), + dim_size, + id->expandedExtent()); } } else { - bindValue(id->extent()->evaluatorIndex(), dim_size); + bindValue(id->extent()->evaluatorIndex(), dim_size, id->extent()); } } @@ -424,7 +434,7 @@ void PrecomputedValues::bindTensorMetaData( tv->toString(), " with input tensor ", tensor); - bindValue(metadata_val->evaluatorIndex(), metadata); + bindValue(metadata_val->evaluatorIndex(), metadata, metadata_val); } NaiveValueMachine::NaiveValueMachine(PrecomputedValues& precomputed_values) diff --git a/csrc/evaluator_common.h b/csrc/evaluator_common.h index aabf029ed4d..eefa5854f14 100644 --- a/csrc/evaluator_common.h +++ b/csrc/evaluator_common.h @@ -211,17 +211,20 @@ class PrecomputedValues { //! Bind concrete value to the given index //! if the index is valid. - void bindValue_(int index, const PolymorphicValue& value) { + void bindValue_( + int index, + const PolymorphicValue& value, + Val* ir_node = nullptr) { if (index < 0 || is_constant_[index]) { return; } defined_[index] = true; values_[index] = value; - binding_log_.emplace_back(index, value); + binding_log_.emplace_back(index, value, ir_node); } template - void bindValue(int index, const T& value) { - bindValue_(index, PolymorphicValue(value)); + void bindValue(int index, const T& value, Val* ir_node = nullptr) { + bindValue_(index, PolymorphicValue(value), ir_node); } //! Invalidate all computed values in the workspace. @@ -292,7 +295,7 @@ class PrecomputedValues { //! An internal log to keep track of all the bindings //! used in each evaluation cycle. To be used for //! consistency check. - std::vector> binding_log_; + std::vector> binding_log_; //! Integer runtime for realizing the values computations. std::unique_ptr value_machine_; From f770ae9543c88b39afd609677f90852257ee73eb Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 17 Dec 2025 14:10:03 -0800 Subject: [PATCH 04/11] save --- csrc/evaluator_common.cpp | 2 +- csrc/evaluator_common.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/csrc/evaluator_common.cpp b/csrc/evaluator_common.cpp index 77573ca0796..7b71892f756 100644 --- a/csrc/evaluator_common.cpp +++ b/csrc/evaluator_common.cpp @@ -198,7 +198,7 @@ void PrecomputedValues::bindConcreteParallelTypeValue( auto index_list_it = thread_dim_value_indices_.find(pt); if (index_list_it != thread_dim_value_indices_.end()) { for (auto index : *(index_list_it->second)) { - Val* ir_node = (index >= 0 && index < (int)symbols_.size()) + const Val* ir_node = (index >= 0 && index < (int)symbols_.size()) ? symbols_[index] : nullptr; bindValue(index, value, ir_node); diff --git a/csrc/evaluator_common.h b/csrc/evaluator_common.h index eefa5854f14..3fa677ac170 100644 --- a/csrc/evaluator_common.h +++ b/csrc/evaluator_common.h @@ -214,7 +214,7 @@ class PrecomputedValues { void bindValue_( int index, const PolymorphicValue& value, - Val* ir_node = nullptr) { + const Val* ir_node = nullptr) { if (index < 0 || is_constant_[index]) { return; } @@ -223,7 +223,7 @@ class PrecomputedValues { binding_log_.emplace_back(index, value, ir_node); } template - void bindValue(int index, const T& value, Val* ir_node = nullptr) { + void bindValue(int index, const T& value, const Val* ir_node = nullptr) { bindValue_(index, PolymorphicValue(value), ir_node); } @@ -295,7 +295,7 @@ class PrecomputedValues { //! An internal log to keep track of all the bindings //! used in each evaluation cycle. To be used for //! consistency check. - std::vector> binding_log_; + std::vector> binding_log_; //! Integer runtime for realizing the values computations. std::unique_ptr value_machine_; From 9a644727fcbaeb37c42c1f9bc2ee9aa20f4d17ff Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 17 Dec 2025 14:56:19 -0800 Subject: [PATCH 05/11] save --- csrc/runtime/fusion_executor_cache.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/csrc/runtime/fusion_executor_cache.cpp b/csrc/runtime/fusion_executor_cache.cpp index 4b412ca7d3c..485beec78c7 100644 --- a/csrc/runtime/fusion_executor_cache.cpp +++ b/csrc/runtime/fusion_executor_cache.cpp @@ -50,6 +50,10 @@ KernelArgumentHolder FusionExecutorCache::runFusionWithInputs( std::optional selected_device) { FUSER_PERF_SCOPE("FusionExecutorCache::runFusionWithInputs"); + // Print fusion IR every run + debug() << "Fusion IR in FusionExecutorCache::runFusionWithInputs:" << std::endl; + fusion_->print(); + if (isProfilerEnabled()) { FusionProfiler::start(!isProfilerEnabledWithCupti()); } From eea0228d287e651b6a5476a9493ad34557df1400 Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 17 Dec 2025 15:13:50 -0800 Subject: [PATCH 06/11] save --- csrc/evaluator_common.cpp | 36 ++++++++++++++++++++++++++ csrc/evaluator_common.h | 10 +++++++ csrc/runtime/fusion_executor_cache.cpp | 2 ++ csrc/runtime/fusion_kernel_runtime.cpp | 3 +++ 4 files changed, 51 insertions(+) diff --git a/csrc/evaluator_common.cpp b/csrc/evaluator_common.cpp index 7b71892f756..d4e9da13e04 100644 --- a/csrc/evaluator_common.cpp +++ b/csrc/evaluator_common.cpp @@ -208,6 +208,7 @@ void PrecomputedValues::bindConcreteParallelTypeValue( void PrecomputedValues::bindInputs(const KernelArgumentHolder& args) { FUSER_PERF_SCOPE("PrecomputedValues::bindInputs"); + debug() << "[DEBUG] PrecomputedValues::bindInputs called" << std::endl; if (hasValidValues()) { invalidate(); } @@ -222,6 +223,9 @@ void PrecomputedValues::bindValues( std::ssize(inputs), "kernel inputs size does not match args"); + debug() << "[DEBUG] PrecomputedValues::bindValues called with " + << inputs.size() << " inputs" << std::endl; + for (const auto i : arange((int64_t)inputs.size())) { const auto input = inputs[i]; NVF_ERROR(input != nullptr); @@ -374,6 +378,21 @@ void PrecomputedValues::validate() { } error_msg << "Computed value: " << values_[index] << "\n" << "Expected value: " << expected_value; + + // Debug: Show binding history for this index + debug() << "[DEBUG] ===== VALIDATION FAILED =====" << std::endl; + debug() << "[DEBUG] Binding history for index " << index << ":" << std::endl; + for (const auto& [idx, val, node] : binding_log_) { + if (idx == index) { + debug() << "[DEBUG] Bound to: " << val; + if (node != nullptr) { + debug() << " (node: " << node->toString() << ")"; + } + debug() << std::endl; + } + } + debug() << "[DEBUG] ================================" << std::endl; + NVF_ERROR(false, error_msg.str()); } } @@ -390,6 +409,21 @@ void PrecomputedValues::bindTensorMetaData( "Something went wrong configuring launch. Inputs do not match."); std::vector logical_sizes = unshardedSizes(tv, tensor.sizes()); + + debug() << "[DEBUG] bindTensorMetaData for TV: " << tv->toString() << std::endl; + debug() << "[DEBUG] Actual tensor.sizes(): ["; + for (size_t i = 0; i < tensor.sizes().size(); ++i) { + if (i > 0) debug() << ", "; + debug() << tensor.sizes()[i]; + } + debug() << "]" << std::endl; + debug() << "[DEBUG] Unsharded logical_sizes: ["; + for (size_t i = 0; i < logical_sizes.size(); ++i) { + if (i > 0) debug() << ", "; + debug() << logical_sizes[i]; + } + debug() << "]" << std::endl; + adjustEvaluatorSizes(tv, logical_sizes); for (const auto dim : arange(static_cast(logical_domain.size()))) { @@ -406,6 +440,8 @@ void PrecomputedValues::bindTensorMetaData( id->expandedExtent()); } } else { + debug() << "[DEBUG] Binding " << id->extent()->toString() + << " = " << dim_size << std::endl; bindValue(id->extent()->evaluatorIndex(), dim_size, id->extent()); } } diff --git a/csrc/evaluator_common.h b/csrc/evaluator_common.h index 6eb212883e4..0951e41196a 100644 --- a/csrc/evaluator_common.h +++ b/csrc/evaluator_common.h @@ -218,6 +218,16 @@ class PrecomputedValues { if (index < 0 || is_constant_[index]) { return; } + + // Debug: show if we're rebinding a value + if (defined_[index]) { + debug() << "[DEBUG] REBINDING index " << index; + if (ir_node != nullptr) { + debug() << " (node: " << ir_node->toString() << ")"; + } + debug() << " from " << values_[index] << " to " << value << std::endl; + } + defined_[index] = true; values_[index] = value; binding_log_.emplace_back(index, value, ir_node); diff --git a/csrc/runtime/fusion_executor_cache.cpp b/csrc/runtime/fusion_executor_cache.cpp index 485beec78c7..bfb470aa615 100644 --- a/csrc/runtime/fusion_executor_cache.cpp +++ b/csrc/runtime/fusion_executor_cache.cpp @@ -67,6 +67,7 @@ KernelArgumentHolder FusionExecutorCache::runFusionWithInputs( } if (!kernel_runtime->isCompiled()) { + debug() << "[DEBUG] ===== COMPILING KERNEL =====" << std::endl; kernel_runtime->compileFusionParallel(args); } @@ -84,6 +85,7 @@ KernelArgumentHolder FusionExecutorCache::runFusionWithInputs( " failed."); } + debug() << "[DEBUG] ===== EXECUTING KERNEL =====" << std::endl; auto outputs = kernel_runtime->runWithInputs(args); // Kernel time measurement is off by default diff --git a/csrc/runtime/fusion_kernel_runtime.cpp b/csrc/runtime/fusion_kernel_runtime.cpp index 6326c49cf39..ab760067f07 100644 --- a/csrc/runtime/fusion_kernel_runtime.cpp +++ b/csrc/runtime/fusion_kernel_runtime.cpp @@ -578,11 +578,14 @@ std::optional> FusionKernelRuntime:: { FUSER_PERF_SCOPE( "FusionKernelRuntime::getMaybeHeuristicsFor::PrecomputedValues"); + debug() << "[DEBUG] compileFusionParallel: Creating PrecomputedValues and binding inputs" << std::endl; evaluator_precomputed_values = std::make_unique(fusion_to_run); + debug() << "[DEBUG] compileFusionParallel: Calling bindInputs (group_runtime_inputs)" << std::endl; evaluator_precomputed_values->bindInputs(group_runtime_inputs); // TODO Remove binding the original fusion inputs when creating // heuristics for fusion segment. + debug() << "[DEBUG] compileFusionParallel: Calling bindValues (complete fusion inputs)" << std::endl; evaluator_precomputed_values->bindValues( group_to_run->getCompleteFusionInputs(), args); evaluator_precomputed_values->evaluate(); From 7dd1d60d336c80234c21460862dfb85e05115208 Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 17 Dec 2025 15:26:30 -0800 Subject: [PATCH 07/11] save --- csrc/evaluator_common.cpp | 6 +++--- csrc/evaluator_common.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/csrc/evaluator_common.cpp b/csrc/evaluator_common.cpp index d4e9da13e04..a1f80f37af3 100644 --- a/csrc/evaluator_common.cpp +++ b/csrc/evaluator_common.cpp @@ -376,15 +376,15 @@ void PrecomputedValues::validate() { if (ir_node != nullptr) { error_msg << "IR node: " << ir_node->toString() << "\n"; } - error_msg << "Computed value: " << values_[index] << "\n" - << "Expected value: " << expected_value; + error_msg << "Computed value: " << toString(values_[index]) << "\n" + << "Expected value: " << toString(expected_value); // Debug: Show binding history for this index debug() << "[DEBUG] ===== VALIDATION FAILED =====" << std::endl; debug() << "[DEBUG] Binding history for index " << index << ":" << std::endl; for (const auto& [idx, val, node] : binding_log_) { if (idx == index) { - debug() << "[DEBUG] Bound to: " << val; + debug() << "[DEBUG] Bound to: " << toString(val); if (node != nullptr) { debug() << " (node: " << node->toString() << ")"; } diff --git a/csrc/evaluator_common.h b/csrc/evaluator_common.h index 0951e41196a..ace4c2a64c2 100644 --- a/csrc/evaluator_common.h +++ b/csrc/evaluator_common.h @@ -225,7 +225,8 @@ class PrecomputedValues { if (ir_node != nullptr) { debug() << " (node: " << ir_node->toString() << ")"; } - debug() << " from " << values_[index] << " to " << value << std::endl; + debug() << " from " << PolymorphicValue_functions::toString(values_[index]) + << " to " << PolymorphicValue_functions::toString(value) << std::endl; } defined_[index] = true; From 23d1f180adcee73520b0ac38df3fb56797b3d508 Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 17 Dec 2025 15:56:17 -0800 Subject: [PATCH 08/11] try --- csrc/multidevice/execution_utils.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/csrc/multidevice/execution_utils.cpp b/csrc/multidevice/execution_utils.cpp index a7a7da703e2..1f26840643f 100644 --- a/csrc/multidevice/execution_utils.cpp +++ b/csrc/multidevice/execution_utils.cpp @@ -87,6 +87,12 @@ std::vector unshardedSizes( sharded_id) != tv->getLogicalDomain().end()) { return 1; } + if (std::find( + tv->getMayAllocateDomain().begin(), + tv->getMayAllocateDomain().end(), + sharded_id) != tv->getMayAllocateDomain().end()) { + return 1; + } NVF_ERROR( sharded_id->extent()->isConstInt(), From a69118243e7d51aa5c0133351e8bda9e7614f523 Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 17 Dec 2025 15:59:28 -0800 Subject: [PATCH 09/11] save --- csrc/multidevice/execution_utils.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/csrc/multidevice/execution_utils.cpp b/csrc/multidevice/execution_utils.cpp index 1f26840643f..e6605121186 100644 --- a/csrc/multidevice/execution_utils.cpp +++ b/csrc/multidevice/execution_utils.cpp @@ -88,9 +88,9 @@ std::vector unshardedSizes( return 1; } if (std::find( - tv->getMayAllocateDomain().begin(), - tv->getMayAllocateDomain().end(), - sharded_id) != tv->getMayAllocateDomain().end()) { + tv->getMayAllocationDomain().begin(), + tv->getMayAllocationDomain().end(), + sharded_id) != tv->getMayAllocationDomain().end()) { return 1; } From 1092227beed1596bca8648f4f0f45aa123210497 Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 17 Dec 2025 16:00:59 -0800 Subject: [PATCH 10/11] save --- csrc/multidevice/execution_utils.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/csrc/multidevice/execution_utils.cpp b/csrc/multidevice/execution_utils.cpp index e6605121186..cc7e9251e2e 100644 --- a/csrc/multidevice/execution_utils.cpp +++ b/csrc/multidevice/execution_utils.cpp @@ -88,9 +88,9 @@ std::vector unshardedSizes( return 1; } if (std::find( - tv->getMayAllocationDomain().begin(), - tv->getMayAllocationDomain().end(), - sharded_id) != tv->getMayAllocationDomain().end()) { + tv->getMaybeAllocationDomain().begin(), + tv->getMaybeAllocationDomain().end(), + sharded_id) != tv->getMaybeAllocationDomain().end()) { return 1; } From 658a40bb28e99f77e3455397bab293a1f7f69f4b Mon Sep 17 00:00:00 2001 From: "Gao, Xiang" Date: Tue, 6 Jan 2026 14:12:35 -0800 Subject: [PATCH 11/11] Update execution_utils.cpp --- csrc/multidevice/execution_utils.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/csrc/multidevice/execution_utils.cpp b/csrc/multidevice/execution_utils.cpp index cc7e9251e2e..a7a7da703e2 100644 --- a/csrc/multidevice/execution_utils.cpp +++ b/csrc/multidevice/execution_utils.cpp @@ -87,12 +87,6 @@ std::vector unshardedSizes( sharded_id) != tv->getLogicalDomain().end()) { return 1; } - if (std::find( - tv->getMaybeAllocationDomain().begin(), - tv->getMaybeAllocationDomain().end(), - sharded_id) != tv->getMaybeAllocationDomain().end()) { - return 1; - } NVF_ERROR( sharded_id->extent()->isConstInt(),