diff --git a/lib/attributes/backend/launcher.cpp b/lib/attributes/backend/launcher.cpp index 2ae8acbb..99763611 100644 --- a/lib/attributes/backend/launcher.cpp +++ b/lib/attributes/backend/launcher.cpp @@ -201,7 +201,8 @@ void collectLoops(OklLoopInfo& loopInfo, std::list& out) { } #endif -std::pair splitTileAttr(OklLoopInfo& loopInfo, const oklt::Rewriter& r) { +std::pair splitTileAttr(OklLoopInfo& loopInfo, + const oklt::Rewriter& r) { auto sz = util::parseStrTo(loopInfo.tileSize); // Prepare first loop @@ -210,15 +211,11 @@ std::pair splitTileAttr(OklLoopInfo& loopInfo, const if (sz.value_or(1024) > 0) { if (firstMeta.inc.val.empty()) { firstMeta.inc.val = loopInfo.tileSize; - switch (firstMeta.inc.op.uo) { - case UnOp::PreInc: - case UnOp::PostInc: - firstMeta.inc.op.bo = BinOp::AddAssign; - break; - case UnOp::PreDec: - case UnOp::PostDec: - firstMeta.inc.op.bo = BinOp::RemoveAssign; - break; + if (firstMeta.inc.op.uo == UnOp::PreInc || firstMeta.inc.op.uo == UnOp::PostInc) { + firstMeta.inc.op.bo = BinOp::AddAssign; + } + if (firstMeta.inc.op.uo == UnOp::PreDec || firstMeta.inc.op.uo == UnOp::PostDec) { + firstMeta.inc.op.bo = BinOp::RemoveAssign; } } else { firstMeta.inc.val = "(" + loopInfo.tileSize + " * " + firstMeta.inc.val + ")"; @@ -228,13 +225,11 @@ std::pair splitTileAttr(OklLoopInfo& loopInfo, const // Prepare second loop auto secondMeta = LoopMetaData(loopInfo, r); secondMeta.range.start = firstMeta.var.name; - switch (secondMeta.condition.op) { - case BinOp::Le: - secondMeta.condition.op = BinOp::Lt; - break; - case BinOp::Ge: - secondMeta.condition.op = BinOp::Gt; - break; + if (secondMeta.condition.op == BinOp::Le) { + secondMeta.condition.op = BinOp::Lt; + } + if (secondMeta.condition.op == BinOp::Ge) { + secondMeta.condition.op = BinOp::Gt; } if (sz.value_or(1024) > 0) { secondMeta.range.end = "(" + firstMeta.var.name + " + " + loopInfo.tileSize + ")"; @@ -374,8 +369,8 @@ HandleResult handleLauncherKernelAttribute(SessionStage& s, auto& rewriter = s.getRewriter(); if (!sema.getParsingKernelInfo()) { - return tl::make_unexpected(Error{OkltPipelineErrorCode::INTERNAL_ERROR_KERNEL_INFO_NULL, - "handleKernelAttribute"}); + return tl::make_unexpected( + Error{OkltPipelineErrorCode::INTERNAL_ERROR_KERNEL_INFO_NULL, "handleKernelAttribute"}); } auto kernelInfo = *sema.getParsingKernelInfo(); diff --git a/lib/core/utils/for_stmt_parser.cpp b/lib/core/utils/for_stmt_parser.cpp index 0580b566..1256f437 100644 --- a/lib/core/utils/for_stmt_parser.cpp +++ b/lib/core/utils/for_stmt_parser.cpp @@ -66,7 +66,8 @@ tl::expected parseForStmt(SessionStage& stage, const clang::Attr* a) { auto& ctx = stage.getCompiler().getASTContext(); OklLoopInfo ret{.attr = a, .stmt = s}; - const Expr *start, *end = nullptr; + const Expr* start = nullptr; + const Expr* end = nullptr; if (isa(s.getInit())) { auto d = dyn_cast(s.getInit()); @@ -90,8 +91,11 @@ tl::expected parseForStmt(SessionStage& stage, start = rsh->getSubExpr(); } ret.range.start = start; + } - auto child_count = std::distance(start->children().begin(), start->children().end()); + if (!start) { + return tl::make_unexpected( + Error{std::error_code(), "loop parse: not supported init statement"}); } // Condition diff --git a/tests/functional/data/transpiler/backends/cuda/intrinsic/intrinsic_ref.cpp b/tests/functional/data/transpiler/backends/cuda/intrinsic/intrinsic_ref.cpp index b51e22f3..62958ce4 100644 --- a/tests/functional/data/transpiler/backends/cuda/intrinsic/intrinsic_ref.cpp +++ b/tests/functional/data/transpiler/backends/cuda/intrinsic/intrinsic_ref.cpp @@ -1,62 +1,63 @@ -#include #include +#include namespace { // Math functions // Single precision -[[maybe_unused]] inline __device__ float okl_exp10f(float x) { return exp10f(x); } +[[maybe_unused]] inline __device__ float okl_exp10f(float x) { + return exp10f(x); +} // Warp Shuffle Functions template -inline __device__ T okl_shfl_sync(unsigned mask, T var, int srcLane, - int width = warpSize) { - return __shfl_sync(mask, var, srcLane, width); +inline __device__ T okl_shfl_sync(unsigned mask, T var, int srcLane, int width = warpSize) { + return __shfl_sync(mask, var, srcLane, width); } template -inline __device__ T okl_shfl_up_sync(unsigned mask, T var, unsigned int delta, - int width = warpSize) { - return __shfl_up_sync(mask, var, delta, width); +inline __device__ T +okl_shfl_up_sync(unsigned mask, T var, unsigned int delta, int width = warpSize) { + return __shfl_up_sync(mask, var, delta, width); } template -inline __device__ T okl_shfl_down_sync(unsigned mask, T var, unsigned int delta, - int width = warpSize) { - return __shfl_down_sync(mask, var, delta, width); +inline __device__ T +okl_shfl_down_sync(unsigned mask, T var, unsigned int delta, int width = warpSize) { + return __shfl_down_sync(mask, var, delta, width); } template -inline __device__ T okl_shfl_xor_sync(unsigned mask, T var, int laneMask, - int width = warpSize) { - return __shfl_xor_sync(mask, laneMask, width); +inline __device__ T okl_shfl_xor_sync(unsigned mask, T var, int laneMask, int width = warpSize) { + return __shfl_xor_sync(mask, var, laneMask, width); } // Pipeline Primitives Interface -[[maybe_unused]] _CUDA_PIPELINE_STATIC_QUALIFIER void -okl_memcpy_async(void *__restrict__ dst_shared, - const void *__restrict__ src_global, - size_t size_and_align, size_t zfill = 0) { - __pipeline_memcpy_async(dst_shared, src_global, size_and_align); +[[maybe_unused]] _CUDA_PIPELINE_STATIC_QUALIFIER void okl_memcpy_async( + void* __restrict__ dst_shared, + const void* __restrict__ src_global, + size_t size_and_align, + size_t zfill = 0) { + __pipeline_memcpy_async(dst_shared, src_global, size_and_align); } [[maybe_unused]] _CUDA_PIPELINE_STATIC_QUALIFIER void okl_pipeline_commit() { - __pipeline_commit(); + __pipeline_commit(); } -[[maybe_unused]] _CUDA_PIPELINE_STATIC_QUALIFIER void -okl_pipeline_wait_prior(size_t N) { __pipeline_wait_prior(N); } -} // namespace +[[maybe_unused]] _CUDA_PIPELINE_STATIC_QUALIFIER void okl_pipeline_wait_prior(size_t N) { + __pipeline_wait_prior(N); +} +} // namespace -extern "C" __global__ -__launch_bounds__(1) void _occa_intrinsic_builtin_0(const float *fVec, - float *fSum) { - { - int i = (0) + blockIdx.x; +extern "C" __global__ __launch_bounds__(1) void _occa_intrinsic_builtin_0(const float* fVec, + float* fSum) { { - int j = (0) + threadIdx.x; - float value = okl_exp10f(fVec[i]); - atomicAdd(&(*fSum), value); + int i = (0) + blockIdx.x; + { + int j = (0) + threadIdx.x; + float value = okl_exp10f(fVec[i]); + atomicAdd(&(*fSum), value); + } } - } }