@@ -30,7 +30,7 @@ Distributed under the Boost Software License, Version 1.0.
3030#include < cstdint>
3131#include < type_traits>
3232
33- #if 0 // defined(__AVX__) || defined(__SSE4_1__) || defined(__SSSE3__)
33+ #if 0 // defined(__AVX__) || defined(__SSE4_1__) || defined(__SSSE3__)
3434#include <tmmintrin.h> // for _mm_shuffle_epi8
3535#endif
3636
@@ -86,7 +86,7 @@ namespace algorithm
8686#pragma warning(disable : 4310) // cast truncates constant value
8787#pragma warning(disable : 4333) // right shift by too large amount
8888#endif
89- /* ! \brief Interleaves the bits of \emph a and \emph b.
89+ /* ! \brief Interleaves the bits of \em a and \em b.
9090
9191 On my Intel i7-8565u laptop able to boost to 4.6Ghz:
9292
@@ -97,7 +97,7 @@ namespace algorithm
9797 QUICKCPPLIB_TREQUIRES (QUICKCPPLIB_TPRED(std::is_unsigned<T>::value))
9898 inline R bit_interleave (T a, T b) noexcept
9999 {
100- #if 0 // defined(__AVX__) || defined(__SSE4_1__) || defined(__SSSE3__)
100+ #if 0 // defined(__AVX__) || defined(__SSE4_1__) || defined(__SSSE3__)
101101 /* https://lemire.me/blog/2018/01/09/how-fast-can-you-bit-interleave-32-bit-integers-simd-edition/
102102 says that AVX is considerably faster than the SSSE3 bit interleave if you need to interleave two 128
103103 bit values into a 256 bit value, but we don't support that here yet.
@@ -141,9 +141,10 @@ namespace algorithm
141141 inline bit_deinterleave_result<R> bit_deinterleave (T x) noexcept
142142 {
143143 constexpr T /* mask32 = T(0x00000000ffffffff), */ mask16 = T (0x0000ffff0000ffff ),
144- mask8 = T (0x00ff00ff00ff00ff ) /* 0000 0000 1111 1111 */ ,
145- mask4 = T (0x0f0f0f0f0f0f0f0f ) /* 0000 1111 */ , mask2 = T (0x3333333333333333 ) /* 0011 0011 */ ,
146- mask1 = T (0x5555555555555555 ) /* 0101 0101 */ ;
144+ mask8 = T (0x00ff00ff00ff00ff ) /* 0000 0000 1111 1111 */ ,
145+ mask4 = T (0x0f0f0f0f0f0f0f0f ) /* 0000 1111 */ ,
146+ mask2 = T (0x3333333333333333 ) /* 0011 0011 */ ,
147+ mask1 = T (0x5555555555555555 ) /* 0101 0101 */ ;
147148 T ret1 = x & mask1, ret2 = (x >> 1 ) & mask1;
148149 ret1 = (ret1 ^ (ret1 >> 1 )) & mask2;
149150 ret2 = (ret2 ^ (ret2 >> 1 )) & mask2;
0 commit comments