diff --git a/include/xsimd/arch/common/xsimd_common_arithmetic.hpp b/include/xsimd/arch/common/xsimd_common_arithmetic.hpp index f7c3ad524..c1b1bb9d4 100644 --- a/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +++ b/include/xsimd/arch/common/xsimd_common_arithmetic.hpp @@ -17,6 +17,7 @@ #include #include "../../types/xsimd_batch_constant.hpp" +#include "../xsimd_common_fwd.hpp" #include "./xsimd_common_details.hpp" namespace xsimd @@ -27,7 +28,7 @@ namespace xsimd using namespace types; - // bitwise_lshift + // bitwise_lshift multiple (dynamic) template ::value>::type*/> XSIMD_INLINE batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept { @@ -35,6 +36,15 @@ namespace xsimd { return x << y; }, self, other); } + + // bitwise_lshift multiple (constant) + template > + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch_constant const& rhs, requires_arch req) noexcept + { + return bitwise_lshift(lhs, rhs.as_batch(), req); + } + + // bitwise_lshift single (constant) template ::value>::type*/> XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept { diff --git a/include/xsimd/arch/utils/shifts.hpp b/include/xsimd/arch/utils/shifts.hpp new file mode 100644 index 000000000..d83712a16 --- /dev/null +++ b/include/xsimd/arch/utils/shifts.hpp @@ -0,0 +1,71 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * Copyright (c) Marco Barbone * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_UTILS_SHIFTS_HPP +#define XSIMD_UTILS_SHIFTS_HPP + +#include "xsimd/config/xsimd_inline.hpp" +#include "xsimd/types/xsimd_batch.hpp" + +namespace xsimd +{ + namespace kernel + { + namespace utils + { + template + struct select_stride + { + static constexpr I values_array[] = { Vs... }; + + template + static constexpr K get(K i, K) + { + return static_cast(values_array[length * i + offset]); + } + }; + + template + constexpr I lsb_mask(I bit_index) + { + return static_cast((I { 1 } << bit_index) - I { 1 }); + } + + template + XSIMD_INLINE batch bitwise_lshift_as_twice_larger( + batch const& self, batch_constant) noexcept + { + static_assert(sizeof(T2) == 2 * sizeof(T), "One size must be twice the other"); + + const auto self2 = bitwise_cast(self); + + // Lower byte: shift as twice the size and mask bits flowing to higher byte. + constexpr auto shifts_lo = make_batch_constant, A>(); + constexpr auto mask_lo = lsb_mask(8 * sizeof(T)); + const auto shifted_lo = bitwise_lshift(self2, shifts_lo); + const batch batch_mask_lo { mask_lo }; + const auto masked_lo = bitwise_and(shifted_lo, batch_mask_lo); + + // Higher byte: mask bits that would flow from lower byte and shift as twice the size. + constexpr auto shifts_hi = make_batch_constant, A>(); + constexpr auto mask_hi = mask_lo << (8 * sizeof(T)); + const batch batch_mask_hi { mask_hi }; + const auto masked_hi = bitwise_and(self2, batch_mask_hi); + const auto shifted_hi = bitwise_lshift(masked_hi, shifts_hi); + + return bitwise_cast(bitwise_or(masked_lo, shifted_hi)); + } + } + } +} + +#endif diff --git a/include/xsimd/arch/xsimd_avx.hpp b/include/xsimd/arch/xsimd_avx.hpp index 66ef31bf4..e210f316f 100644 --- a/include/xsimd/arch/xsimd_avx.hpp +++ b/include/xsimd/arch/xsimd_avx.hpp @@ -250,7 +250,7 @@ namespace xsimd self, other); } - // bitwise_lshift + // bitwise_lshift single template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept { @@ -259,6 +259,13 @@ namespace xsimd self, other); } + // bitwise_lshift multiple + template = 0> + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch_constant const& rhs, requires_arch req) noexcept + { + return bitwise_lshift(lhs, rhs.as_batch(), req); + } + // bitwise_not template ::value>::type> XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept diff --git a/include/xsimd/arch/xsimd_avx2.hpp b/include/xsimd/arch/xsimd_avx2.hpp index 2ee2a5241..714a02399 100644 --- a/include/xsimd/arch/xsimd_avx2.hpp +++ b/include/xsimd/arch/xsimd_avx2.hpp @@ -13,12 +13,12 @@ #define XSIMD_AVX2_HPP #include +#include #include #include "../types/xsimd_avx2_register.hpp" #include "../types/xsimd_batch_constant.hpp" - -#include +#include "./utils/shifts.hpp" namespace xsimd { @@ -265,7 +265,19 @@ namespace xsimd return _mm256_xor_si256(self, _mm256_set1_epi32(-1)); } - // bitwise_lshift + // bitwise_or + template ::value>::type> + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + { + return _mm256_or_si256(self, other); + } + template ::value>::type> + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + { + return _mm256_or_si256(self, other); + } + + // bitwise_lshift single (dynamic) template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept { @@ -287,6 +299,7 @@ namespace xsimd } } + // bitwise_lshift single (constant) template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept { @@ -315,6 +328,7 @@ namespace xsimd } } + // bitwise_lshift multiple (dynamic) template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept { @@ -332,16 +346,41 @@ namespace xsimd } } - // bitwise_or - template ::value>::type> - XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + // bitwise_lshift multiple (constant) + template = 0> + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch req) noexcept { - return _mm256_or_si256(self, other); + return bitwise_lshift(self, shifts.as_batch(), req); } - template ::value>::type> - XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + + template = 0> + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch req) noexcept { - return _mm256_or_si256(self, other); + return bitwise_lshift(self, shifts.as_batch(), req); + } + + template = 0> + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch) noexcept + { + using uint_t = typename std::make_unsigned::type; + return bitwise_cast( + utils::bitwise_lshift_as_twice_larger( + bitwise_cast(self), + bitwise_cast(shifts))); + } + + template = 0> + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch) noexcept + { + using uint_t = typename std::make_unsigned::type; + return bitwise_cast( + utils::bitwise_lshift_as_twice_larger( + bitwise_cast(self), + bitwise_cast(shifts))); } // bitwise_rshift diff --git a/include/xsimd/arch/xsimd_common_fwd.hpp b/include/xsimd/arch/xsimd_common_fwd.hpp index 21f99b004..82c8ce112 100644 --- a/include/xsimd/arch/xsimd_common_fwd.hpp +++ b/include/xsimd/arch/xsimd_common_fwd.hpp @@ -47,6 +47,8 @@ namespace xsimd XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept; template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept; + template = 0> + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch_constant const& rhs, requires_arch req) noexcept; template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept; template ::value>::type> diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp index 63893cdbb..13a19a739 100644 --- a/include/xsimd/arch/xsimd_sse2.hpp +++ b/include/xsimd/arch/xsimd_sse2.hpp @@ -18,6 +18,7 @@ #include "../types/xsimd_batch_constant.hpp" #include "../types/xsimd_sse2_register.hpp" +#include "./utils/shifts.hpp" namespace xsimd { @@ -267,65 +268,6 @@ namespace xsimd return _mm_andnot_pd(other, self); } - // bitwise_lshift - template ::value>::type> - XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept - { - XSIMD_IF_CONSTEXPR(sizeof(T) == 1) - { - return _mm_and_si128(_mm_set1_epi8(0xFF << other), _mm_slli_epi32(self, other)); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) - { - return _mm_slli_epi16(self, other); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) - { - return _mm_slli_epi32(self, other); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) - { - return _mm_slli_epi64(self, other); - } - else - { - assert(false && "unsupported arch/op combination"); - return {}; - } - } - template ::value>::type> - XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept - { - constexpr auto bits = std::numeric_limits::digits + std::numeric_limits::is_signed; - static_assert(shift < bits, "Count must be less than the number of bits in T"); - XSIMD_IF_CONSTEXPR(shift == 0) - { - return self; - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 1) - { - // 8-bit left shift via 16-bit shift + mask - __m128i shifted = _mm_slli_epi16(self, static_cast(shift)); - // TODO(C++17): without `if constexpr` we must ensure the compile-time shift does not overflow - constexpr uint8_t mask8 = static_cast(sizeof(T) == 1 ? (~0u << shift) : 0); - const __m128i mask = _mm_set1_epi8(mask8); - return _mm_and_si128(shifted, mask); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) - { - return _mm_slli_epi16(self, static_cast(shift)); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) - { - return _mm_slli_epi32(self, static_cast(shift)); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) - { - return _mm_slli_epi64(self, static_cast(shift)); - } - return bitwise_lshift(self, common {}); - } - // bitwise_not template XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept @@ -582,6 +524,83 @@ namespace xsimd return _mm_castpd_si128(self); } + // bitwise_lshift single (dynamic) + template ::value>::type> + XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept + { + XSIMD_IF_CONSTEXPR(sizeof(T) == 1) + { + return _mm_and_si128(_mm_set1_epi8(0xFF << other), _mm_slli_epi32(self, other)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) + { + return _mm_slli_epi16(self, other); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) + { + return _mm_slli_epi32(self, other); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) + { + return _mm_slli_epi64(self, other); + } + else + { + assert(false && "unsupported arch/op combination"); + return {}; + } + } + + // bitwise_lshift single (constant) + template ::value>::type> + XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept + { + constexpr auto bits = std::numeric_limits::digits + std::numeric_limits::is_signed; + static_assert(shift < bits, "Count must be less than the number of bits in T"); + XSIMD_IF_CONSTEXPR(shift == 0) + { + return self; + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 1) + { + // 8-bit left shift via 16-bit shift + mask + __m128i shifted = _mm_slli_epi16(self, static_cast(shift)); + // TODO(C++17): without `if constexpr` we must ensure the compile-time shift does not overflow + constexpr uint8_t mask8 = static_cast(sizeof(T) == 1 ? (~0u << shift) : 0); + const __m128i mask = _mm_set1_epi8(mask8); + return _mm_and_si128(shifted, mask); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) + { + return _mm_slli_epi16(self, static_cast(shift)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) + { + return _mm_slli_epi32(self, static_cast(shift)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) + { + return _mm_slli_epi64(self, static_cast(shift)); + } + return bitwise_lshift(self, common {}); + } + + // bitwise_lshift multiple (constant) + template + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant, requires_arch) noexcept + { + constexpr auto mults = batch_constant(1u << Vs)...>(); + return _mm_mullo_epi16(self, mults.as_batch()); + } + + template + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch) noexcept + { + return utils::bitwise_lshift_as_twice_larger(self, shifts); + } + // broadcast template batch XSIMD_INLINE broadcast(float val, requires_arch) noexcept diff --git a/include/xsimd/arch/xsimd_sse4_1.hpp b/include/xsimd/arch/xsimd_sse4_1.hpp index 1a64fc878..2038b3253 100644 --- a/include/xsimd/arch/xsimd_sse4_1.hpp +++ b/include/xsimd/arch/xsimd_sse4_1.hpp @@ -41,6 +41,15 @@ namespace xsimd return _mm_ceil_pd(self); } + // bitwise_lshift multiple (constant) + template + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant, requires_arch) noexcept + { + constexpr auto mults = batch_constant(1u << Vs)...>(); + return _mm_mullo_epi32(self, mults.as_batch()); + } + // fast_cast namespace detail { diff --git a/include/xsimd/types/xsimd_api.hpp b/include/xsimd/types/xsimd_api.hpp index 960350331..aba104d79 100644 --- a/include/xsimd/types/xsimd_api.hpp +++ b/include/xsimd/types/xsimd_api.hpp @@ -367,17 +367,23 @@ namespace xsimd detail::static_check_supported_config(); return kernel::bitwise_lshift(x, shift, A {}); } + template + XSIMD_INLINE batch bitwise_lshift(batch const& x) noexcept + { + detail::static_check_supported_config(); + return kernel::bitwise_lshift(x, A {}); + } template XSIMD_INLINE batch bitwise_lshift(batch const& x, batch const& shift) noexcept { detail::static_check_supported_config(); return kernel::bitwise_lshift(x, shift, A {}); } - template - XSIMD_INLINE batch bitwise_lshift(batch const& x) noexcept + template + XSIMD_INLINE batch bitwise_lshift(batch const& x, batch_constant shift) noexcept { detail::static_check_supported_config(); - return kernel::bitwise_lshift(x, A {}); + return kernel::bitwise_lshift(x, shift, A {}); } /** diff --git a/test/test_xsimd_api.cpp b/test/test_xsimd_api.cpp index 2e62c292c..266d5dce4 100644 --- a/test/test_xsimd_api.cpp +++ b/test/test_xsimd_api.cpp @@ -351,7 +351,15 @@ struct xsimd_api_integral_types_functions { using value_type = typename scalar_type::type; - void test_bitwise_lshift() + struct arrange + { + static constexpr value_type get(size_t index, size_t /*size*/) + { + return static_cast(index); + } + }; + + void test_bitwise_lshift_single() { constexpr int shift = 3; value_type val0(12); @@ -364,6 +372,33 @@ struct xsimd_api_integral_types_functions CHECK_EQ(extract(cr), r); } + template + void test_bitwise_lshift_multiple(typename std::enable_if::value, int>::type = 0) + { +#ifndef XSIMD_NO_SUPPORTED_ARCHITECTURE + constexpr auto Max = static_cast(std::numeric_limits::digits); + constexpr auto max_batch = xsimd::make_batch_constant(); + constexpr auto shifts = xsimd::make_batch_constant() % max_batch; + + auto shifted = xsimd::bitwise_lshift(T(1), shifts.as_batch()); + for (std::size_t i = 0; i < shifts.size; ++i) + { + CHECK_EQ(shifted.get(i), 1 << shifts.get(i)); + } + + auto shifted_cst = xsimd::bitwise_lshift(T(1), shifts); + for (std::size_t i = 0; i < shifts.size; ++i) + { + CHECK_EQ(shifted_cst.get(i), 1 << shifts.get(i)); + } +#endif + } + + template + void test_bitwise_lshift_multiple(typename std::enable_if::value, int>::type = 0) + { + } + void test_bitwise_rshift() { constexpr int shift = 3; @@ -426,9 +461,14 @@ TEST_CASE_TEMPLATE("[xsimd api | integral types functions]", B, INTEGRAL_TYPES) { xsimd_api_integral_types_functions Test; - SUBCASE("bitwise_lshift") + SUBCASE("test_bitwise_lshift_single") + { + Test.test_bitwise_lshift_single(); + } + + SUBCASE("bitwise_lshift_multiple") { - Test.test_bitwise_lshift(); + Test.test_bitwise_lshift_multiple(); } SUBCASE("bitwise_rshift")