[miopen] Log applicability failures#3964
[miopen] Log applicability failures#3964SreecharanGundaboluAMD wants to merge 40 commits intodevelopfrom
Conversation
…ndaboluAMD/log_applicability_failures
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF( | ||
| !(x_elem_sz == y_elem_sz && dx_elem_sz == dy_elem_sz && x_elem_sz == dx_elem_sz), | ||
| "Element sizes do not match"); |
There was a problem hiding this comment.
Can this be added to inapplicable_msg?
There was a problem hiding this comment.
or we could use ElementSizeMismatch?
| inline constexpr const char* Layout = "Unsupported tensor layout."; | ||
| inline constexpr const char* UnsupportedDevice = "Unsupported GPU."; | ||
| inline constexpr const char* Direction = "Unsupported direction."; | ||
| inline constexpr const char* UseAsmKernels = "Assembly kernels are disabled."; |
There was a problem hiding this comment.
nit: Name seems counter intuitive
| inline constexpr const char* UseAsmKernels = "Assembly kernels are disabled."; | |
| inline constexpr const char* AsmKernelsDisabled = "Assembly kernels are disabled."; |
|
|
||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!ctx.rmv.IsV2orV3(), inapplicable_msg::MetaData); | ||
|
|
||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!(problem.IsFp32() || problem.IsFp16()), |
|
|
||
| if(problem.IsTensorsCasted() || problem.IsFp8() || problem.IsBfp8()) | ||
| return false; |
There was a problem hiding this comment.
Missed Datatype check for fp8 and bfp8
|
|
||
| if(problem.IsTensorsCasted()) | ||
| return false; | ||
| const std::string solver = SolverDbId(); |
|
|
||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!problem.IsLayoutDefault(), inapplicable_msg::Layout); | ||
|
|
||
| MIOPEN_SOLVER_INAPPLICABLE_IF(problem.IsTensorsCasted() || problem.IsFp8() || problem.IsBfp8(), |
There was a problem hiding this comment.
should be separated to IsTensorsCasted and DataType msg for uniformity
| if(current_block_size * current_grid_size * current_splits_4G > 0xffffffffULL) | ||
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF( | ||
| (current_block_size * current_grid_size * current_splits_4G > 0xffffffffULL), |
There was a problem hiding this comment.
can add a new worksize limit exceeded msg
| (!problem.IsFp32() && !problem.IsFp16() && | ||
| !(problem.IsBfp16() && (device_name == "gfx90a" || device_name == "gfx942" || | ||
| StartsWith(device_name, "gfx95")))), | ||
| "Datatype and HW combination not supported"); |
There was a problem hiding this comment.
| "Datatype and HW combination not supported"); | |
| "Datatype and device combination not supported"); |
| if(current_block_size * current_grid_size * current_splits_4G > 0xffffffffULL) | ||
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF( | ||
| (current_block_size * current_grid_size * current_splits_4G > 0xffffffffULL), |
There was a problem hiding this comment.
can be replace with worksize msg
| (!problem.IsFp32() && !problem.IsFp16() && | ||
| !(problem.IsBfp16() && (device_name == "gfx90a" || device_name == "gfx942" || | ||
| StartsWith(device_name, "gfx95")))), | ||
| "Datatype, HW combination not supported"); |
There was a problem hiding this comment.
| "Datatype, HW combination not supported"); | |
| "Datatype and device combination not supported"); |
| if(current_block_size * current_grid_size * current_splits_4G > 0xffffffffULL) | ||
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF( | ||
| !(current_block_size * current_grid_size * current_splits_4G > 0xffffffffULL), |
There was a problem hiding this comment.
can be replaced with worksize msg
| MIOPEN_SOLVER_INAPPLICABLE_IF(!ctx.use_hip_kernels, inapplicable_msg::HIPDisabled); | ||
|
|
||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!legacy_ck::is_ck_supported_hardware(ctx.GetStream()), | ||
| "Legacy CK does not support HW"); |
There was a problem hiding this comment.
| "Legacy CK does not support HW"); | |
| "Legacy CK is not support on this Hardware"); |
HW is misleading for Height and Width parameters
| legacy_ck::get_ck_convolution_problem_descriptor(problem)); | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!ctx.use_hip_kernels, inapplicable_msg::HIPDisabled); | ||
|
|
||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!legacy_ck::is_ck_supported_hardware(ctx.GetStream()), |
There was a problem hiding this comment.
Can also use CKWhitelist, "GPU is not in CK whitelist."
| MIOPEN_SOLVER_INAPPLICABLE_IF(!problem.AllTensorsLengthsFitIntoInt(), | ||
| inapplicable_msg::AllTensorsDimsFitIntoInt); | ||
|
|
||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!problem.IsLayoutDefault() && !problem.IsLayoutNHWC(), |
There was a problem hiding this comment.
MIOPEN_SOLVER_INAPPLICABLE_IF(!problem.IsLayoutDefault() && !problem.IsLayoutNHWC(),
added twice
| return false; | ||
| if(test_cast(problem.GetWeights())) | ||
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(test_cast(problem.GetIn()), "Input tensor missing cast type"); |
There was a problem hiding this comment.
| MIOPEN_SOLVER_INAPPLICABLE_IF(test_cast(problem.GetIn()), "Input tensor missing cast type"); | |
| MIOPEN_SOLVER_INAPPLICABLE_IF(test_cast(problem.GetOut()), "Output tensor missing cast type"); |
|
|
||
| if(!ConvDirectNaiveConvIsApplicableByKernelType(ctx, problem)) | ||
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!problem.IsLayoutDefault() && !problem.IsLayoutNHWC(), |
There was a problem hiding this comment.
| MIOPEN_SOLVER_INAPPLICABLE_IF(!problem.IsLayoutDefault() && !problem.IsLayoutNHWC(), | |
| MIOPEN_SOLVER_INAPPLICABLE_IF(!ConvDirectNaiveConvIsApplicableByKernelType(ctx, problem), | |
| inapplicable_msg::Generic); |
| MIOPEN_SOLVER_INAPPLICABLE_IF(!problem.IsLayoutDefault() && !problem.IsLayoutNHWC(), | ||
| inapplicable_msg::Layout); |
There was a problem hiding this comment.
| MIOPEN_SOLVER_INAPPLICABLE_IF(!problem.IsLayoutDefault() && !problem.IsLayoutNHWC(), | |
| inapplicable_msg::Layout); | |
| MIOPEN_SOLVER_INAPPLICABLE_IF(!ConvDirectNaiveConvIsApplicableByKernelType(ctx, problem), | |
| inapplicable_msg::Generic); |
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(test_cast(problem.GetIn()), "Input tensor missing cast type"); | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(test_cast(problem.GetOut()), | ||
| "Weight tensor missing cast type"); |
There was a problem hiding this comment.
| "Weight tensor missing cast type"); | |
| "Output tensor missing cast type"); |
| std::tie(gemm_g, gemm_m, gemm_n, gemm_k_total) = CalculateGemmSize(problem); | ||
|
|
||
| return static_ck::IsValidGridGemmXdlops(gemm_m, gemm_n, gemm_k_total); | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!static_ck::IsValidGridGemmXdlops(gemm_m, gemm_n, gemm_k_total), |
There was a problem hiding this comment.
is this a case for InvalidGridGemm
|
|
||
| return static_ck::IsValidGridGemmXdlops(gemm_m, gemm_n, gemm_k_total); | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!static_ck::IsValidGridGemmXdlops(gemm_m, gemm_n, gemm_k_total), | ||
| inapplicable_msg::NoKernelForConfig); |
There was a problem hiding this comment.
| inapplicable_msg::NoKernelForConfig); | |
| inapplicable_msg::InvalidGridGemm); |
|
|
||
| return static_ck::IsValidGridGemmXdlops(gemm_m, gemm_n, gemm_k_total); | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!static_ck::IsValidGridGemmXdlops(gemm_m, gemm_n, gemm_k_total), | ||
| inapplicable_msg::NoKernelForConfig); |
There was a problem hiding this comment.
| inapplicable_msg::NoKernelForConfig); | |
| inapplicable_msg::InvalidGridGemm); |
| if(IS_DISABLED(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F3X3)) | ||
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(IS_DISABLED(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F3X3), | ||
| inapplicable_msg::EnvDisabled); |
There was a problem hiding this comment.
It would be useful to also capture the ENV VAR in EnvDisabled msg to help isolate why its disabled
| // #if 0 | ||
| // // There is a stronger restriction than this one, which make this one | ||
| // unnecessary. | ||
| // // The kernel read stripes (in height direction, one stripe at a time) of | ||
| // input into LDS, | ||
| // // the height of stripe is (MLO_N_ALIGNED_OUT_SCAN_BLK - 1) * | ||
| // MLO_FILTER_STRIDE1 + | ||
| // // MLO_FILTER_SIZE1, (MLO_FILTER_SIZE1 - MLO_FILTER_STRIDE1) of it is | ||
| // reusable from | ||
| // // previous read, (MLO_N_ALIGNED_OUT_SCAN_BLK * MLO_FILTER_STRIDE1) of it is | ||
| // fresh read | ||
| // // from device memory. So (MLO_FILTER_SIZE1 - MLO_FILTER_STRIDE1) need no | ||
| // less than 0. | ||
| // // TODO: chao: revisit this if failure is encountered. | ||
| // problem.GetWeightsHeight() >= problem.GetKernelStrideH() && | ||
| // #endif |
There was a problem hiding this comment.
| // #if 0 | |
| // // There is a stronger restriction than this one, which make this one | |
| // unnecessary. | |
| // // The kernel read stripes (in height direction, one stripe at a time) of | |
| // input into LDS, | |
| // // the height of stripe is (MLO_N_ALIGNED_OUT_SCAN_BLK - 1) * | |
| // MLO_FILTER_STRIDE1 + | |
| // // MLO_FILTER_SIZE1, (MLO_FILTER_SIZE1 - MLO_FILTER_STRIDE1) of it is | |
| // reusable from | |
| // // previous read, (MLO_N_ALIGNED_OUT_SCAN_BLK * MLO_FILTER_STRIDE1) of it is | |
| // fresh read | |
| // // from device memory. So (MLO_FILTER_SIZE1 - MLO_FILTER_STRIDE1) need no | |
| // less than 0. | |
| // // TODO: chao: revisit this if failure is encountered. | |
| // problem.GetWeightsHeight() >= problem.GetKernelStrideH() && | |
| // #endif | |
| #if 0 | |
| // There is a stronger restriction than this one, which make this one unnecessary. | |
| // The kernel read stripes (in height direction, one stripe at a time) of input into LDS, | |
| // the height of stripe is (MLO_N_ALIGNED_OUT_SCAN_BLK - 1) * MLO_FILTER_STRIDE1 + | |
| // MLO_FILTER_SIZE1, (MLO_FILTER_SIZE1 - MLO_FILTER_STRIDE1) of it is reusable from | |
| // previous read, (MLO_N_ALIGNED_OUT_SCAN_BLK * MLO_FILTER_STRIDE1) of it is fresh read | |
| // from device memory. So (MLO_FILTER_SIZE1 - MLO_FILTER_STRIDE1) need no less than 0. | |
| // TODO: chao: revisit this if failure is encountered. | |
| problem.GetWeightsHeight() >= problem.GetKernelStrideH() && | |
| #endif |
| if (!supported) | ||
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!supported, | ||
| inapplicable_msg::Workaround); |
There was a problem hiding this comment.
I am not sure if this is due to an explicit workaround?
| bool ConvWinoFuryRxSCommon<Winodata, Winofilter>::IsApplicable(const ExecutionContext& ctx, | ||
| const ProblemDescription& problem) | ||
| { | ||
| std::string context = "ConvWinoFuryRxSCommon"; // for logging |
There was a problem hiding this comment.
do we mean to add it to all solvers? or this is leftover code?
| return false; | ||
| if(desc.op_map[0]->kind() != miopenFusionOpConvForward) | ||
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF((desc.op_map.size() > 3), "Too many ops in fusion plan."); |
| !(gemm::IsAnyBufferBf16(xDesc, yDesc, wDesc) && !gemm::IsBf16Supported) && | ||
| !(gemm::IsAnyBufferFp16(xDesc, yDesc, wDesc) && !gemm::IsFp16Supported); | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF((problem.IsFp8() && !rblas_fp8_supported), | ||
| "GEMM not applicable for F8 on this GPU architecture"); |
There was a problem hiding this comment.
| "GEMM not applicable for F8 on this GPU architecture"); | |
| "GEMM not applicable for FP8 on this GPU architecture"); |
| } | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF( | ||
| !rblas_fp8_supported, | ||
| "GEMM not supported with casted tensors on this GPU architecture"); |
| !(gemm::IsAnyBufferBf16(dxDesc, dyDesc, wDesc) && !gemm::IsBf16Supported) && | ||
| !(gemm::IsAnyBufferFp16(dxDesc, dyDesc, wDesc) && !gemm::IsFp16Supported); | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF((problem.IsFp8() && !rblas_fp8_supported), | ||
| "GEMM not applicable for F8 on this GPU architecture"); |
There was a problem hiding this comment.
| "GEMM not applicable for F8 on this GPU architecture"); | |
| "GEMM not applicable for FP8 on this GPU architecture"); |
| } | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF( | ||
| !rblas_fp8_supported, | ||
| "GEMM not supported with casted tensors on this GPU architecture"); |
| !(gemm::IsAnyBufferBf16(xDesc, dyDesc, dwDesc) && !gemm::IsBf16Supported) && | ||
| !(gemm::IsAnyBufferFp16(xDesc, dyDesc, dwDesc) && !gemm::IsFp16Supported); | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF((problem.IsFp8() && !rblas_fp8_supported), | ||
| "GEMM not applicable for F8 on this GPU architecture"); |
There was a problem hiding this comment.
| "GEMM not applicable for F8 on this GPU architecture"); | |
| "GEMM not applicable for FP8 on this GPU architecture"); |
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!problem.IsSameType(), inapplicable_msg::DataTypeMismatch); | ||
|
|
||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!IsLargeIndex(problem), "Not a large index problem"); |
There was a problem hiding this comment.
| MIOPEN_SOLVER_INAPPLICABLE_IF(!IsLargeIndex(problem), "Not a large index problem"); | |
| MIOPEN_SOLVER_INAPPLICABLE_IF(!IsLargeIndex(problem), inapplicable_msg::IndexRange); |
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!problem.IsAllContiguous(), inapplicable_msg::NotContiguous); | ||
|
|
||
| MIOPEN_SOLVER_INAPPLICABLE_IF((problem.GetDim() != 0), "Only dim=0 is supported."); |
There was a problem hiding this comment.
inapplicable_msg::InvalidDim?
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!problem.IsAllContiguous(), inapplicable_msg::NotContiguous); | ||
|
|
||
| MIOPEN_SOLVER_INAPPLICABLE_IF((problem.GetDim() != 0), "Only dim=0 is supported."); |
There was a problem hiding this comment.
inapplicable_msg::InvalidDim?
| if(problem.GetInputDesc().GetNumDims() > 5) | ||
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF(!IsImprovementOverROCm(problem), | ||
| "Not an improvement over ROCm implementation"); |
There was a problem hiding this comment.
| "Not an improvement over ROCm implementation"); | |
| inapplicable_msg::IsImprovementOverROCm); |
| MIOPEN_SOLVER_INAPPLICABLE_IF(!IsImprovementOverROCm(problem), | ||
| "Not an improvement over ROCm implementation"); | ||
|
|
||
| MIOPEN_SOLVER_INAPPLICABLE_IF((problem.GetInputDesc().GetNumDims() > 5), |
| if(activ_op.activMode != miopenActivationRELU) | ||
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF((activ_op.activMode != miopenActivationRELU), | ||
| "Activation is not RELU"); |
There was a problem hiding this comment.
| "Activation is not RELU"); | |
| inapplicable_msg::InvalidActivation); |
| MIOPEN_SOLVER_INAPPLICABLE_IF((activationType != miopenActivationRELU && | ||
| activationType != miopenActivationCLIPPEDRELU && | ||
| activationType != miopenActivationCLAMP), | ||
| "Unsupported activation type"); |
There was a problem hiding this comment.
| "Unsupported activation type"); | |
| inapplicable_msg::InvalidActivation); |
| MIOPEN_SOLVER_INAPPLICABLE_IF((activationType != miopenActivationRELU && | ||
| activationType != miopenActivationCLIPPEDRELU && | ||
| activationType != miopenActivationCLAMP), | ||
| "Unsupported activation type"); |
There was a problem hiding this comment.
| "Unsupported activation type"); | |
| inapplicable_msg::InvalidActivation); |
| MIOPEN_SOLVER_INAPPLICABLE_IF(!(prim == miopenFusionOpBatchNormInference || | ||
| prim == miopenFusionOpBiasForward || | ||
| prim == miopenFusionOpActivForward), | ||
| "Unsupported 2nd fusion op"); |
There was a problem hiding this comment.
| "Unsupported 2nd fusion op"); | |
| inapplicable_msg::InvalidActivation); |
| if(!(prim == miopenFusionOpActivForward || prim == miopenFusionOpBatchNormInference)) | ||
| return false; | ||
| MIOPEN_SOLVER_INAPPLICABLE_IF( | ||
| !(prim == miopenFusionOpActivForward || prim == miopenFusionOpBatchNormInference), |
|
Great work 🙌
|
|
This pull request has been inactive for 25 days and will be marked as stale. If you would like to keep this PR open, please:
This PR will be automatically closed in 5 days if no further activity occurs. |
|
This pull request has been automatically closed due to inactivity (30 days with no updates). If you'd like to continue working on this, feel free to reopen the PR or create a new one. |
This pull request introduces a set of macros and standardized messages to improve the clarity, consistency, and maintainability of applicability checks in solver classes across the MIOpen codebase. The main theme is refactoring the
IsApplicablemethods of various solvers to use the new macros for logging and returning early when a solver is not applicable, along with standardized messages for common reasons. This change helps developers quickly understand why a solver was rejected and ensures uniform logging across different solver implementations.The most important changes are:
Core Infrastructure:
solver_utils.hppthat defines theMIOPEN_SOLVER_INAPPLICABLE_IFandMIOPEN_SOLVER_INAPPLICABLE_IF_CONTEXTmacros, as well as a set of standardized inapplicability message strings in theinapplicable_msgnamespace. This file includes documentation and usage guidelines for the macros.Refactoring Applicability Checks (Activ Solvers):
IsApplicablemethods inactiv/bwd_0.cpp,activ/bwd_1.cpp,activ/fwd_0.cpp, andactiv/fwd_1.cppto use the new macros for early returns and standardized messages, replacing manualifchecks and custom strings. This improves logging and code consistency for activation solvers. [1] [2] [3] [4] [5] [6] [7] [8]Refactoring Applicability Checks (Adam Solvers):
IsApplicablemethods inadam/adam.cppandadam/transformers_adam_w.cppto use the new macro and standardized message for contiguous tensor checks. [1] [2]Refactoring Applicability Checks (BatchNorm Solver):
IsApplicableinbatchnorm/backward_per_activation.cppto use the macros and standardized messages for 2D checks, direction/mode checks, and data type validation, replacing multiple manualifstatements.Codebase Integration:
#include <miopen/solver/solver_utils.hpp>to all affected solver source files to enable use of the new macros and messages. [1] [2] [3] [4] [5] [6] [7]These changes collectively standardize the way solver applicability is checked and logged, making the code easier to maintain and debug.
Submission Checklist