diff --git a/src/ops/i16.rs b/src/ops/i16.rs index 4addc9f..4add20b 100644 --- a/src/ops/i16.rs +++ b/src/ops/i16.rs @@ -199,19 +199,13 @@ impl_op! { impl_op! { fn lt { for Avx2(a: __m256i, b: __m256i) -> __m256i { - let gt = _mm256_cmpgt_epi16(a, b); - let eq = _mm256_cmpeq_epi16(a, b); - _mm256_andnot_si256(_mm256_or_si256(gt, eq), _mm256_set1_epi16(u32::MAX as i16)) + _mm256_cmpgt_epi16(b, a) } for Sse41(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi16(a, b); - let eq = _mm_cmpeq_epi16(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi16(u32::MAX as i16)) + _mm_cmpgt_epi16(b, a) } for Sse2(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi16(a, b); - let eq = _mm_cmpeq_epi16(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi16(u32::MAX as i16)) + _mm_cmpgt_epi16(b, a) } for Scalar(a: i16, b: i16) -> i16 { if a < b { diff --git a/src/ops/i32.rs b/src/ops/i32.rs index 7a161c9..11c93b9 100644 --- a/src/ops/i32.rs +++ b/src/ops/i32.rs @@ -209,19 +209,13 @@ impl_op! { impl_op! { fn lt { for Avx2(a: __m256i, b: __m256i) -> __m256i { - let gt = _mm256_cmpgt_epi32(a, b); - let eq = _mm256_cmpeq_epi32(a, b); - _mm256_andnot_si256(_mm256_or_si256(gt, eq), _mm256_set1_epi32(u32::MAX as i32)) + _mm256_cmpgt_epi32(b, a) } for Sse41(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi32(a, b); - let eq = _mm_cmpeq_epi32(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi32(u32::MAX as i32)) + _mm_cmpgt_epi32(b, a) } for Sse2(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi32(a, b); - let eq = _mm_cmpeq_epi32(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi32(u32::MAX as i32)) + _mm_cmpgt_epi32(b, a) } for Scalar(a: i32, b: i32) -> i32 { if a < b { diff --git a/src/ops/i64.rs b/src/ops/i64.rs index 1c413f2..7bfd2dc 100644 --- a/src/ops/i64.rs +++ b/src/ops/i64.rs @@ -239,19 +239,13 @@ impl_op! { impl_op! { fn lt { for Avx2(a: __m256i, b: __m256i) -> __m256i { - let gt = _mm256_cmpgt_epi64(a, b); - let eq = _mm256_cmpeq_epi64(a, b); - _mm256_andnot_si256(_mm256_or_si256(gt, eq), _mm256_set1_epi64x(u64::MAX as i64)) + _mm256_cmpgt_epi64(b, a) } for Sse41(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi64(a, b); - let eq = _mm_cmpeq_epi64(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi64x(u64::MAX as i64)) + _mm_cmpgt_epi64(b, a) } for Sse2(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi64(a, b); - let eq = _mm_cmpeq_epi64(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi64x(u64::MAX as i64)) + _mm_cmpgt_epi64(b, a) } for Scalar(a: i64, b: i64) -> i64 { if a < b { diff --git a/src/ops/i8.rs b/src/ops/i8.rs index 24f167d..3c9f209 100644 --- a/src/ops/i8.rs +++ b/src/ops/i8.rs @@ -221,19 +221,13 @@ impl_op! { impl_op! { fn lt { for Avx2(a: __m256i, b: __m256i) -> __m256i { - let gt = _mm256_cmpgt_epi8(a, b); - let eq = _mm256_cmpeq_epi8(a, b); - _mm256_andnot_si256(_mm256_or_si256(gt, eq), _mm256_set1_epi8(u32::MAX as i8)) + _mm256_cmpgt_epi8(b, a) } for Sse41(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi8(a, b); - let eq = _mm_cmpeq_epi8(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi8(u32::MAX as i8)) + _mm_cmpgt_epi8(b, a) } for Sse2(a: __m128i, b: __m128i) -> __m128i { - let gt = _mm_cmpgt_epi8(a, b); - let eq = _mm_cmpeq_epi8(a, b); - _mm_andnot_si128(_mm_or_si128(gt, eq), _mm_set1_epi8(u32::MAX as i8)) + _mm_cmpgt_epi8(b, a) } for Scalar(a: i8, b: i8) -> i8 { if a < b {