From 62fd98952ddab0e3bc88c23623614d92da1c23d6 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Thu, 20 Nov 2025 18:06:47 +0100 Subject: [PATCH 01/17] Add bitwise_lshift multiple test --- test/test_xsimd_api.cpp | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/test/test_xsimd_api.cpp b/test/test_xsimd_api.cpp index 2e62c292c..8aabd94ed 100644 --- a/test/test_xsimd_api.cpp +++ b/test/test_xsimd_api.cpp @@ -351,7 +351,15 @@ struct xsimd_api_integral_types_functions { using value_type = typename scalar_type::type; - void test_bitwise_lshift() + struct arrange + { + static constexpr value_type get(size_t index, size_t /*size*/) + { + return static_cast(index); + } + }; + + void test_bitwise_lshift_single() { constexpr int shift = 3; value_type val0(12); @@ -364,6 +372,25 @@ struct xsimd_api_integral_types_functions CHECK_EQ(extract(cr), r); } + void test_bitwise_lshift_multiple() + { + constexpr auto Max = static_cast(std::numeric_limits::digits); + constexpr auto max_batch = xsimd::make_batch_constant(); + constexpr auto shifts = xsimd::make_batch_constant() % max_batch; + + auto shifted = xsimd::bitwise_lshift(T(1), shifts.as_batch()); + for (std::size_t i = 0; i < shifts.size; ++i) + { + CHECK_EQ(shifted.get(i), 1 << shifts.get(i)); + } + + auto shifted_cst = xsimd::bitwise_lshift(T(1), shifts); + for (std::size_t i = 0; i < shifts.size; ++i) + { + CHECK_EQ(shifted_cst.get(i), 1 << shifts.get(i)); + } + } + void test_bitwise_rshift() { constexpr int shift = 3; @@ -426,9 +453,17 @@ TEST_CASE_TEMPLATE("[xsimd api | integral types functions]", B, INTEGRAL_TYPES) { xsimd_api_integral_types_functions Test; - SUBCASE("bitwise_lshift") + SUBCASE("test_bitwise_lshift_single") { - Test.test_bitwise_lshift(); + Test.test_bitwise_lshift_single(); + } + + SUBCASE("bitwise_lshift_multiple") + { + XSIMD_IF_CONSTEXPR(xsimd::is_batch::value) + { + Test.test_bitwise_lshift_multiple(); + } } SUBCASE("bitwise_rshift") From 26f4b4e51dd7bdc0acf940404bd4c851ccd4bfae Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 19 Nov 2025 18:27:24 +0100 Subject: [PATCH 02/17] Add AVX2 constant var left shift --- include/xsimd/arch/utils/shifts.hpp | 63 +++++++++++++++++++++++++++++ include/xsimd/arch/xsimd_avx.hpp | 9 ++++- include/xsimd/arch/xsimd_avx2.hpp | 38 ++++++++++++----- include/xsimd/types/xsimd_api.hpp | 6 +++ 4 files changed, 105 insertions(+), 11 deletions(-) create mode 100644 include/xsimd/arch/utils/shifts.hpp diff --git a/include/xsimd/arch/utils/shifts.hpp b/include/xsimd/arch/utils/shifts.hpp new file mode 100644 index 000000000..b1f223bcf --- /dev/null +++ b/include/xsimd/arch/utils/shifts.hpp @@ -0,0 +1,63 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * Copyright (c) Marco Barbone * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_UTILS_SHIFTS_HPP +#define XSIMD_UTILS_SHIFTS_HPP + +#include "xsimd/config/xsimd_inline.hpp" +#include "xsimd/types/xsimd_batch.hpp" + +namespace xsimd +{ + namespace kernel + { + namespace utils + { + template + struct select_stride + { + static constexpr I values_array[] = { Vs... }; + + template + static constexpr K get(K i, K n) + { + return static_cast(values_array[length * i + offset]); + } + }; + + template + XSIMD_INLINE batch bitwise_lshift_as_twice_larger( + batch const& self, batch_constant shifts, R req) noexcept + { + static_assert(sizeof(T2) == 2 * sizeof(T), "One size must be twice the other"); + + const auto self2 = bitwise_cast(self); + + // Lower byte: shift as twice the size and mask bits flowing to higher byte. + constexpr auto shifts_lo = make_batch_constant, A>(); + const auto shifted_lo = bitwise_lshift(self2, shifts_lo, req); + const batch mask_lo { T2 { 0x00FF } }; + const auto masked_lo = bitwise_and(shifted_lo, mask_lo, req); + + // Higher byte: mask bits that would flow from lower byte and shift as twice the size. + constexpr auto shifts_hi = make_batch_constant, A>(); + const batch mask_hi { T2 { 0xFF00 } }; + const auto masked_hi = bitwise_and(self2, mask_hi, req); + const auto shifted_hi = bitwise_lshift(masked_hi, shifts_hi, req); + + return bitwise_cast(bitwise_or(masked_lo, shifted_hi, req)); + } + } + } +} + +#endif diff --git a/include/xsimd/arch/xsimd_avx.hpp b/include/xsimd/arch/xsimd_avx.hpp index 66ef31bf4..e210f316f 100644 --- a/include/xsimd/arch/xsimd_avx.hpp +++ b/include/xsimd/arch/xsimd_avx.hpp @@ -250,7 +250,7 @@ namespace xsimd self, other); } - // bitwise_lshift + // bitwise_lshift single template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept { @@ -259,6 +259,13 @@ namespace xsimd self, other); } + // bitwise_lshift multiple + template = 0> + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch_constant const& rhs, requires_arch req) noexcept + { + return bitwise_lshift(lhs, rhs.as_batch(), req); + } + // bitwise_not template ::value>::type> XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept diff --git a/include/xsimd/arch/xsimd_avx2.hpp b/include/xsimd/arch/xsimd_avx2.hpp index 2ee2a5241..f057d6794 100644 --- a/include/xsimd/arch/xsimd_avx2.hpp +++ b/include/xsimd/arch/xsimd_avx2.hpp @@ -13,12 +13,12 @@ #define XSIMD_AVX2_HPP #include +#include #include #include "../types/xsimd_avx2_register.hpp" #include "../types/xsimd_batch_constant.hpp" - -#include +#include "./utils/shifts.hpp" namespace xsimd { @@ -265,7 +265,19 @@ namespace xsimd return _mm256_xor_si256(self, _mm256_set1_epi32(-1)); } - // bitwise_lshift + // bitwise_or + template ::value>::type> + XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + { + return _mm256_or_si256(self, other); + } + template ::value>::type> + XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + { + return _mm256_or_si256(self, other); + } + + // bitwise_lshift single (dynamic) template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept { @@ -287,6 +299,7 @@ namespace xsimd } } + // bitwise_lshift single (constant) template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept { @@ -315,6 +328,7 @@ namespace xsimd } } + // bitwise_lshift multiple (dynamic) template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept { @@ -332,16 +346,20 @@ namespace xsimd } } - // bitwise_or - template ::value>::type> - XSIMD_INLINE batch bitwise_or(batch const& self, batch const& other, requires_arch) noexcept + // bitwise_lshift multiple (constant) + template + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch) noexcept { - return _mm256_or_si256(self, other); + constexpr auto mults = batch_constant(1u << Vs)...>(); + return _mm256_mullo_epi16(self, mults.as_batch()); } - template ::value>::type> - XSIMD_INLINE batch_bool bitwise_or(batch_bool const& self, batch_bool const& other, requires_arch) noexcept + + template + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch req) noexcept { - return _mm256_or_si256(self, other); + return utils::bitwise_lshift_as_twice_larger(self, shifts, req); } // bitwise_rshift diff --git a/include/xsimd/types/xsimd_api.hpp b/include/xsimd/types/xsimd_api.hpp index 960350331..2604df30d 100644 --- a/include/xsimd/types/xsimd_api.hpp +++ b/include/xsimd/types/xsimd_api.hpp @@ -379,6 +379,12 @@ namespace xsimd detail::static_check_supported_config(); return kernel::bitwise_lshift(x, A {}); } + template + XSIMD_INLINE batch bitwise_lshift(batch const& x, batch_constant shift) noexcept + { + detail::static_check_supported_config(); + return kernel::bitwise_lshift(x, shift, A {}); + } /** * @ingroup batch_bitwise From 4e5fe409dacec0bc25cb77ea3499b5fed1e46743 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 21 Nov 2025 16:48:48 +0100 Subject: [PATCH 03/17] Add multiple constant bitwise_lshift sse --- include/xsimd/arch/xsimd_sse2.hpp | 136 ++++++++++++++++------------ include/xsimd/arch/xsimd_sse4_1.hpp | 9 ++ 2 files changed, 86 insertions(+), 59 deletions(-) diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp index 63893cdbb..8a4e5a0c0 100644 --- a/include/xsimd/arch/xsimd_sse2.hpp +++ b/include/xsimd/arch/xsimd_sse2.hpp @@ -267,65 +267,6 @@ namespace xsimd return _mm_andnot_pd(other, self); } - // bitwise_lshift - template ::value>::type> - XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept - { - XSIMD_IF_CONSTEXPR(sizeof(T) == 1) - { - return _mm_and_si128(_mm_set1_epi8(0xFF << other), _mm_slli_epi32(self, other)); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) - { - return _mm_slli_epi16(self, other); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) - { - return _mm_slli_epi32(self, other); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) - { - return _mm_slli_epi64(self, other); - } - else - { - assert(false && "unsupported arch/op combination"); - return {}; - } - } - template ::value>::type> - XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept - { - constexpr auto bits = std::numeric_limits::digits + std::numeric_limits::is_signed; - static_assert(shift < bits, "Count must be less than the number of bits in T"); - XSIMD_IF_CONSTEXPR(shift == 0) - { - return self; - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 1) - { - // 8-bit left shift via 16-bit shift + mask - __m128i shifted = _mm_slli_epi16(self, static_cast(shift)); - // TODO(C++17): without `if constexpr` we must ensure the compile-time shift does not overflow - constexpr uint8_t mask8 = static_cast(sizeof(T) == 1 ? (~0u << shift) : 0); - const __m128i mask = _mm_set1_epi8(mask8); - return _mm_and_si128(shifted, mask); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) - { - return _mm_slli_epi16(self, static_cast(shift)); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) - { - return _mm_slli_epi32(self, static_cast(shift)); - } - else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) - { - return _mm_slli_epi64(self, static_cast(shift)); - } - return bitwise_lshift(self, common {}); - } - // bitwise_not template XSIMD_INLINE batch bitwise_not(batch const& self, requires_arch) noexcept @@ -582,6 +523,83 @@ namespace xsimd return _mm_castpd_si128(self); } + // bitwise_lshift single (dynamic) + template ::value>::type> + XSIMD_INLINE batch bitwise_lshift(batch const& self, int32_t other, requires_arch) noexcept + { + XSIMD_IF_CONSTEXPR(sizeof(T) == 1) + { + return _mm_and_si128(_mm_set1_epi8(0xFF << other), _mm_slli_epi32(self, other)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) + { + return _mm_slli_epi16(self, other); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) + { + return _mm_slli_epi32(self, other); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) + { + return _mm_slli_epi64(self, other); + } + else + { + assert(false && "unsupported arch/op combination"); + return {}; + } + } + + // bitwise_lshift single (constant) + template ::value>::type> + XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept + { + constexpr auto bits = std::numeric_limits::digits + std::numeric_limits::is_signed; + static_assert(shift < bits, "Count must be less than the number of bits in T"); + XSIMD_IF_CONSTEXPR(shift == 0) + { + return self; + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 1) + { + // 8-bit left shift via 16-bit shift + mask + __m128i shifted = _mm_slli_epi16(self, static_cast(shift)); + // TODO(C++17): without `if constexpr` we must ensure the compile-time shift does not overflow + constexpr uint8_t mask8 = static_cast(sizeof(T) == 1 ? (~0u << shift) : 0); + const __m128i mask = _mm_set1_epi8(mask8); + return _mm_and_si128(shifted, mask); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) + { + return _mm_slli_epi16(self, static_cast(shift)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 4) + { + return _mm_slli_epi32(self, static_cast(shift)); + } + else XSIMD_IF_CONSTEXPR(sizeof(T) == 8) + { + return _mm_slli_epi64(self, static_cast(shift)); + } + return bitwise_lshift(self, common {}); + } + + // bitwise_lshift multiple (constant) + template + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch) noexcept + { + constexpr auto mults = batch_constant(1u << Vs)...>(); + return _mm_mullo_epi16(self, mults.as_batch()); + } + + template + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch req) noexcept + { + return utils::bitwise_lshift_as_twice_larger(self, shifts, req); + } + // broadcast template batch XSIMD_INLINE broadcast(float val, requires_arch) noexcept diff --git a/include/xsimd/arch/xsimd_sse4_1.hpp b/include/xsimd/arch/xsimd_sse4_1.hpp index 1a64fc878..96b9186d0 100644 --- a/include/xsimd/arch/xsimd_sse4_1.hpp +++ b/include/xsimd/arch/xsimd_sse4_1.hpp @@ -41,6 +41,15 @@ namespace xsimd return _mm_ceil_pd(self); } + // bitwise_lshift multiple (constant) + template + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch) noexcept + { + constexpr auto mults = batch_constant(1u << Vs)...>(); + return _mm_mullo_epi32(self, mults.as_batch()); + } + // fast_cast namespace detail { From 2e90f5859610562d2fa1a944800796f887da7708 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 21 Nov 2025 16:52:54 +0100 Subject: [PATCH 04/17] Add multiple constant bitwise_lshift common --- include/xsimd/arch/common/xsimd_common_arithmetic.hpp | 11 ++++++++++- include/xsimd/arch/xsimd_common_fwd.hpp | 2 ++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/include/xsimd/arch/common/xsimd_common_arithmetic.hpp b/include/xsimd/arch/common/xsimd_common_arithmetic.hpp index f7c3ad524..dfda1ddfc 100644 --- a/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +++ b/include/xsimd/arch/common/xsimd_common_arithmetic.hpp @@ -27,7 +27,7 @@ namespace xsimd using namespace types; - // bitwise_lshift + // bitwise_lshift multiple (dynamic) template ::value>::type*/> XSIMD_INLINE batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept { @@ -35,6 +35,15 @@ namespace xsimd { return x << y; }, self, other); } + + // bitwise_lshift multiple (constant) + template = 0> + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch_constant const& rhs, requires_arch req) noexcept + { + return bitwise_lshift(lhs, rhs.as_batch(), req); + } + + // bitwise_lshift single (constant) template ::value>::type*/> XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept { diff --git a/include/xsimd/arch/xsimd_common_fwd.hpp b/include/xsimd/arch/xsimd_common_fwd.hpp index 21f99b004..82c8ce112 100644 --- a/include/xsimd/arch/xsimd_common_fwd.hpp +++ b/include/xsimd/arch/xsimd_common_fwd.hpp @@ -47,6 +47,8 @@ namespace xsimd XSIMD_INLINE batch abs(batch const& self, requires_arch) noexcept; template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, batch const& other, requires_arch) noexcept; + template = 0> + XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch_constant const& rhs, requires_arch req) noexcept; template ::value>::type> XSIMD_INLINE batch bitwise_lshift(batch const& self, requires_arch) noexcept; template ::value>::type> From 17e4b6a2d2c99d2db94a5d1a44d71e7e73e8b7d8 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 21 Nov 2025 16:58:13 +0100 Subject: [PATCH 05/17] Fix sse2 --- include/xsimd/arch/xsimd_sse2.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp index 8a4e5a0c0..ec555d738 100644 --- a/include/xsimd/arch/xsimd_sse2.hpp +++ b/include/xsimd/arch/xsimd_sse2.hpp @@ -18,6 +18,7 @@ #include "../types/xsimd_batch_constant.hpp" #include "../types/xsimd_sse2_register.hpp" +#include "./utils/shifts.hpp" namespace xsimd { From cdf42803db88e1fc749afe7558a9f2ea06ff8452 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 21 Nov 2025 17:00:59 +0100 Subject: [PATCH 06/17] Fix common fwd --- include/xsimd/arch/common/xsimd_common_arithmetic.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/xsimd/arch/common/xsimd_common_arithmetic.hpp b/include/xsimd/arch/common/xsimd_common_arithmetic.hpp index dfda1ddfc..c1b1bb9d4 100644 --- a/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +++ b/include/xsimd/arch/common/xsimd_common_arithmetic.hpp @@ -17,6 +17,7 @@ #include #include "../../types/xsimd_batch_constant.hpp" +#include "../xsimd_common_fwd.hpp" #include "./xsimd_common_details.hpp" namespace xsimd @@ -37,7 +38,7 @@ namespace xsimd } // bitwise_lshift multiple (constant) - template = 0> + template > XSIMD_INLINE batch bitwise_lshift(batch const& lhs, batch_constant const& rhs, requires_arch req) noexcept { return bitwise_lshift(lhs, rhs.as_batch(), req); From 431d8014f1b18f3e038860f439736ac23c27f521 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 21 Nov 2025 17:11:39 +0100 Subject: [PATCH 07/17] Fix sse2 --- include/xsimd/arch/xsimd_sse2.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp index ec555d738..11c49cc0b 100644 --- a/include/xsimd/arch/xsimd_sse2.hpp +++ b/include/xsimd/arch/xsimd_sse2.hpp @@ -588,7 +588,7 @@ namespace xsimd // bitwise_lshift multiple (constant) template XSIMD_INLINE batch bitwise_lshift( - batch const& self, batch_constant shifts, requires_arch) noexcept + batch const& self, batch_constant, requires_arch) noexcept { constexpr auto mults = batch_constant(1u << Vs)...>(); return _mm_mullo_epi16(self, mults.as_batch()); From 0b42f509f22c62260689fd628a92fc49aefbe13c Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 21 Nov 2025 17:12:43 +0100 Subject: [PATCH 08/17] Fix utils --- include/xsimd/arch/utils/shifts.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/xsimd/arch/utils/shifts.hpp b/include/xsimd/arch/utils/shifts.hpp index b1f223bcf..8ddb5edb8 100644 --- a/include/xsimd/arch/utils/shifts.hpp +++ b/include/xsimd/arch/utils/shifts.hpp @@ -28,7 +28,7 @@ namespace xsimd static constexpr I values_array[] = { Vs... }; template - static constexpr K get(K i, K n) + static constexpr K get(K i, K) { return static_cast(values_array[length * i + offset]); } @@ -36,7 +36,7 @@ namespace xsimd template XSIMD_INLINE batch bitwise_lshift_as_twice_larger( - batch const& self, batch_constant shifts, R req) noexcept + batch const& self, batch_constant, R req) noexcept { static_assert(sizeof(T2) == 2 * sizeof(T), "One size must be twice the other"); From 1cd12f9f097b6ce230d0adb22e82a473f6835df6 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 26 Nov 2025 16:36:22 +0100 Subject: [PATCH 09/17] Fix shift utility --- include/xsimd/arch/utils/shifts.hpp | 26 +++++++++++++++++--------- include/xsimd/arch/xsimd_avx2.hpp | 2 +- include/xsimd/arch/xsimd_sse2.hpp | 2 +- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/include/xsimd/arch/utils/shifts.hpp b/include/xsimd/arch/utils/shifts.hpp index 8ddb5edb8..d83712a16 100644 --- a/include/xsimd/arch/utils/shifts.hpp +++ b/include/xsimd/arch/utils/shifts.hpp @@ -34,9 +34,15 @@ namespace xsimd } }; - template + template + constexpr I lsb_mask(I bit_index) + { + return static_cast((I { 1 } << bit_index) - I { 1 }); + } + + template XSIMD_INLINE batch bitwise_lshift_as_twice_larger( - batch const& self, batch_constant, R req) noexcept + batch const& self, batch_constant) noexcept { static_assert(sizeof(T2) == 2 * sizeof(T), "One size must be twice the other"); @@ -44,17 +50,19 @@ namespace xsimd // Lower byte: shift as twice the size and mask bits flowing to higher byte. constexpr auto shifts_lo = make_batch_constant, A>(); - const auto shifted_lo = bitwise_lshift(self2, shifts_lo, req); - const batch mask_lo { T2 { 0x00FF } }; - const auto masked_lo = bitwise_and(shifted_lo, mask_lo, req); + constexpr auto mask_lo = lsb_mask(8 * sizeof(T)); + const auto shifted_lo = bitwise_lshift(self2, shifts_lo); + const batch batch_mask_lo { mask_lo }; + const auto masked_lo = bitwise_and(shifted_lo, batch_mask_lo); // Higher byte: mask bits that would flow from lower byte and shift as twice the size. constexpr auto shifts_hi = make_batch_constant, A>(); - const batch mask_hi { T2 { 0xFF00 } }; - const auto masked_hi = bitwise_and(self2, mask_hi, req); - const auto shifted_hi = bitwise_lshift(masked_hi, shifts_hi, req); + constexpr auto mask_hi = mask_lo << (8 * sizeof(T)); + const batch batch_mask_hi { mask_hi }; + const auto masked_hi = bitwise_and(self2, batch_mask_hi); + const auto shifted_hi = bitwise_lshift(masked_hi, shifts_hi); - return bitwise_cast(bitwise_or(masked_lo, shifted_hi, req)); + return bitwise_cast(bitwise_or(masked_lo, shifted_hi)); } } } diff --git a/include/xsimd/arch/xsimd_avx2.hpp b/include/xsimd/arch/xsimd_avx2.hpp index f057d6794..a054a002a 100644 --- a/include/xsimd/arch/xsimd_avx2.hpp +++ b/include/xsimd/arch/xsimd_avx2.hpp @@ -359,7 +359,7 @@ namespace xsimd XSIMD_INLINE batch bitwise_lshift( batch const& self, batch_constant shifts, requires_arch req) noexcept { - return utils::bitwise_lshift_as_twice_larger(self, shifts, req); + return utils::bitwise_lshift_as_twice_larger(self, shifts); } // bitwise_rshift diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp index 11c49cc0b..8161f56e0 100644 --- a/include/xsimd/arch/xsimd_sse2.hpp +++ b/include/xsimd/arch/xsimd_sse2.hpp @@ -598,7 +598,7 @@ namespace xsimd XSIMD_INLINE batch bitwise_lshift( batch const& self, batch_constant shifts, requires_arch req) noexcept { - return utils::bitwise_lshift_as_twice_larger(self, shifts, req); + return utils::bitwise_lshift_as_twice_larger(self, shifts); } // broadcast From c89a5ea86aec7bb230b0bec396a58aa68359dd67 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Wed, 26 Nov 2025 17:28:24 +0100 Subject: [PATCH 10/17] Fix Avx2 --- include/xsimd/arch/xsimd_avx2.hpp | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/include/xsimd/arch/xsimd_avx2.hpp b/include/xsimd/arch/xsimd_avx2.hpp index a054a002a..500b8da28 100644 --- a/include/xsimd/arch/xsimd_avx2.hpp +++ b/include/xsimd/arch/xsimd_avx2.hpp @@ -347,19 +347,23 @@ namespace xsimd } // bitwise_lshift multiple (constant) - template - XSIMD_INLINE batch bitwise_lshift( - batch const& self, batch_constant shifts, requires_arch) noexcept + template = 0> + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch req) noexcept { - constexpr auto mults = batch_constant(1u << Vs)...>(); - return _mm256_mullo_epi16(self, mults.as_batch()); - } - - template - XSIMD_INLINE batch bitwise_lshift( - batch const& self, batch_constant shifts, requires_arch req) noexcept - { - return utils::bitwise_lshift_as_twice_larger(self, shifts); + XSIMD_IF_CONSTEXPR(std::is_same::value) + { + return utils::bitwise_lshift_as_twice_larger(self, shifts); + } + XSIMD_IF_CONSTEXPR(std::is_same::value) + { + constexpr auto mults = batch_constant(1u << Vs)...>(); + return _mm256_mullo_epi16(self, mults.as_batch()); + } + else + { + return bitwise_lshift(self, shifts.as_batch(), req); + } } // bitwise_rshift From 1ce42e954f6ffabba5b7ffd6022d88e67e0fa6cf Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Thu, 27 Nov 2025 11:00:04 +0100 Subject: [PATCH 11/17] Remove unused var --- include/xsimd/arch/xsimd_sse2.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xsimd/arch/xsimd_sse2.hpp b/include/xsimd/arch/xsimd_sse2.hpp index 8161f56e0..13a19a739 100644 --- a/include/xsimd/arch/xsimd_sse2.hpp +++ b/include/xsimd/arch/xsimd_sse2.hpp @@ -596,7 +596,7 @@ namespace xsimd template XSIMD_INLINE batch bitwise_lshift( - batch const& self, batch_constant shifts, requires_arch req) noexcept + batch const& self, batch_constant shifts, requires_arch) noexcept { return utils::bitwise_lshift_as_twice_larger(self, shifts); } From 270a662569d79f6673e0b4d14f11edf716a19ecc Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 28 Nov 2025 15:33:06 +0100 Subject: [PATCH 12/17] Fix typos --- include/xsimd/arch/xsimd_sse4_1.hpp | 2 +- include/xsimd/types/xsimd_api.hpp | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/xsimd/arch/xsimd_sse4_1.hpp b/include/xsimd/arch/xsimd_sse4_1.hpp index 96b9186d0..2038b3253 100644 --- a/include/xsimd/arch/xsimd_sse4_1.hpp +++ b/include/xsimd/arch/xsimd_sse4_1.hpp @@ -44,7 +44,7 @@ namespace xsimd // bitwise_lshift multiple (constant) template XSIMD_INLINE batch bitwise_lshift( - batch const& self, batch_constant shifts, requires_arch) noexcept + batch const& self, batch_constant, requires_arch) noexcept { constexpr auto mults = batch_constant(1u << Vs)...>(); return _mm_mullo_epi32(self, mults.as_batch()); diff --git a/include/xsimd/types/xsimd_api.hpp b/include/xsimd/types/xsimd_api.hpp index 2604df30d..aba104d79 100644 --- a/include/xsimd/types/xsimd_api.hpp +++ b/include/xsimd/types/xsimd_api.hpp @@ -367,20 +367,20 @@ namespace xsimd detail::static_check_supported_config(); return kernel::bitwise_lshift(x, shift, A {}); } - template - XSIMD_INLINE batch bitwise_lshift(batch const& x, batch const& shift) noexcept - { - detail::static_check_supported_config(); - return kernel::bitwise_lshift(x, shift, A {}); - } template XSIMD_INLINE batch bitwise_lshift(batch const& x) noexcept { detail::static_check_supported_config(); return kernel::bitwise_lshift(x, A {}); } - template - XSIMD_INLINE batch bitwise_lshift(batch const& x, batch_constant shift) noexcept + template + XSIMD_INLINE batch bitwise_lshift(batch const& x, batch const& shift) noexcept + { + detail::static_check_supported_config(); + return kernel::bitwise_lshift(x, shift, A {}); + } + template + XSIMD_INLINE batch bitwise_lshift(batch const& x, batch_constant shift) noexcept { detail::static_check_supported_config(); return kernel::bitwise_lshift(x, shift, A {}); From 523bbba139837c1409a529a2e48249d6793b157f Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 28 Nov 2025 16:18:00 +0100 Subject: [PATCH 13/17] Don't run test on missing architectures --- test/test_xsimd_api.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/test_xsimd_api.cpp b/test/test_xsimd_api.cpp index 8aabd94ed..a3a1dfb62 100644 --- a/test/test_xsimd_api.cpp +++ b/test/test_xsimd_api.cpp @@ -372,6 +372,7 @@ struct xsimd_api_integral_types_functions CHECK_EQ(extract(cr), r); } +#ifndef XSIMD_NO_SUPPORTED_ARCHITECTURE void test_bitwise_lshift_multiple() { constexpr auto Max = static_cast(std::numeric_limits::digits); @@ -390,6 +391,7 @@ struct xsimd_api_integral_types_functions CHECK_EQ(shifted_cst.get(i), 1 << shifts.get(i)); } } +#endif void test_bitwise_rshift() { @@ -458,6 +460,7 @@ TEST_CASE_TEMPLATE("[xsimd api | integral types functions]", B, INTEGRAL_TYPES) Test.test_bitwise_lshift_single(); } +#ifndef XSIMD_NO_SUPPORTED_ARCHITECTURE SUBCASE("bitwise_lshift_multiple") { XSIMD_IF_CONSTEXPR(xsimd::is_batch::value) @@ -465,6 +468,7 @@ TEST_CASE_TEMPLATE("[xsimd api | integral types functions]", B, INTEGRAL_TYPES) Test.test_bitwise_lshift_multiple(); } } +#endif SUBCASE("bitwise_rshift") { From fb76fd7e721aceb599e8806ec04c7cf0028f51b8 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 28 Nov 2025 16:48:35 +0100 Subject: [PATCH 14/17] Fix test on scalars --- test/test_xsimd_api.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/test/test_xsimd_api.cpp b/test/test_xsimd_api.cpp index a3a1dfb62..86febd526 100644 --- a/test/test_xsimd_api.cpp +++ b/test/test_xsimd_api.cpp @@ -372,8 +372,8 @@ struct xsimd_api_integral_types_functions CHECK_EQ(extract(cr), r); } -#ifndef XSIMD_NO_SUPPORTED_ARCHITECTURE - void test_bitwise_lshift_multiple() + template + void test_bitwise_lshift_multiple(typename std::enable_if::value, int>::type = 0) { constexpr auto Max = static_cast(std::numeric_limits::digits); constexpr auto max_batch = xsimd::make_batch_constant(); @@ -391,7 +391,11 @@ struct xsimd_api_integral_types_functions CHECK_EQ(shifted_cst.get(i), 1 << shifts.get(i)); } } -#endif + + template + void test_bitwise_lshift_multiple(typename std::enable_if::value, int>::type = 0) + { + } void test_bitwise_rshift() { @@ -460,15 +464,10 @@ TEST_CASE_TEMPLATE("[xsimd api | integral types functions]", B, INTEGRAL_TYPES) Test.test_bitwise_lshift_single(); } -#ifndef XSIMD_NO_SUPPORTED_ARCHITECTURE SUBCASE("bitwise_lshift_multiple") { - XSIMD_IF_CONSTEXPR(xsimd::is_batch::value) - { - Test.test_bitwise_lshift_multiple(); - } + Test.test_bitwise_lshift_multiple(); } -#endif SUBCASE("bitwise_rshift") { From 7cd69207520a9eb338cecc21cd40a48a7d8032b6 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 28 Nov 2025 17:21:56 +0100 Subject: [PATCH 15/17] Fx avx2 build --- include/xsimd/arch/xsimd_avx2.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xsimd/arch/xsimd_avx2.hpp b/include/xsimd/arch/xsimd_avx2.hpp index 500b8da28..3c3edf4b1 100644 --- a/include/xsimd/arch/xsimd_avx2.hpp +++ b/include/xsimd/arch/xsimd_avx2.hpp @@ -353,7 +353,7 @@ namespace xsimd { XSIMD_IF_CONSTEXPR(std::is_same::value) { - return utils::bitwise_lshift_as_twice_larger(self, shifts); + return utils::bitwise_lshift_as_twice_larger(self, shifts); } XSIMD_IF_CONSTEXPR(std::is_same::value) { From 5f32d7212aa8cb1e21f04717d2a16cb41b071ae0 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 28 Nov 2025 17:29:29 +0100 Subject: [PATCH 16/17] Deactivate xsimd bathc code whrn no arch --- test/test_xsimd_api.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_xsimd_api.cpp b/test/test_xsimd_api.cpp index 86febd526..266d5dce4 100644 --- a/test/test_xsimd_api.cpp +++ b/test/test_xsimd_api.cpp @@ -375,6 +375,7 @@ struct xsimd_api_integral_types_functions template void test_bitwise_lshift_multiple(typename std::enable_if::value, int>::type = 0) { +#ifndef XSIMD_NO_SUPPORTED_ARCHITECTURE constexpr auto Max = static_cast(std::numeric_limits::digits); constexpr auto max_batch = xsimd::make_batch_constant(); constexpr auto shifts = xsimd::make_batch_constant() % max_batch; @@ -390,6 +391,7 @@ struct xsimd_api_integral_types_functions { CHECK_EQ(shifted_cst.get(i), 1 << shifts.get(i)); } +#endif } template From caaab2a43b029b67ae6f34d2d42171d063d2cb8c Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 28 Nov 2025 18:59:38 +0100 Subject: [PATCH 17/17] Fix Avx2 if constexpr --- include/xsimd/arch/xsimd_avx2.hpp | 45 +++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/include/xsimd/arch/xsimd_avx2.hpp b/include/xsimd/arch/xsimd_avx2.hpp index 3c3edf4b1..714a02399 100644 --- a/include/xsimd/arch/xsimd_avx2.hpp +++ b/include/xsimd/arch/xsimd_avx2.hpp @@ -347,23 +347,40 @@ namespace xsimd } // bitwise_lshift multiple (constant) - template = 0> + template = 0> XSIMD_INLINE batch bitwise_lshift( batch const& self, batch_constant shifts, requires_arch req) noexcept { - XSIMD_IF_CONSTEXPR(std::is_same::value) - { - return utils::bitwise_lshift_as_twice_larger(self, shifts); - } - XSIMD_IF_CONSTEXPR(std::is_same::value) - { - constexpr auto mults = batch_constant(1u << Vs)...>(); - return _mm256_mullo_epi16(self, mults.as_batch()); - } - else - { - return bitwise_lshift(self, shifts.as_batch(), req); - } + return bitwise_lshift(self, shifts.as_batch(), req); + } + + template = 0> + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch req) noexcept + { + return bitwise_lshift(self, shifts.as_batch(), req); + } + + template = 0> + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch) noexcept + { + using uint_t = typename std::make_unsigned::type; + return bitwise_cast( + utils::bitwise_lshift_as_twice_larger( + bitwise_cast(self), + bitwise_cast(shifts))); + } + + template = 0> + XSIMD_INLINE batch bitwise_lshift( + batch const& self, batch_constant shifts, requires_arch) noexcept + { + using uint_t = typename std::make_unsigned::type; + return bitwise_cast( + utils::bitwise_lshift_as_twice_larger( + bitwise_cast(self), + bitwise_cast(shifts))); } // bitwise_rshift