Skip to content

Commit 55dae77

Browse files
authored
Merge pull request #1966 from sayantn/saturation
Use generic SIMD intrinsics for cvtepi intrinsics
2 parents 6209f35 + 3b0641c commit 55dae77

File tree

6 files changed

+103
-282
lines changed

6 files changed

+103
-282
lines changed

crates/core_arch/src/simd.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22
33
#![allow(non_camel_case_types)]
44

5+
#[inline(always)]
6+
pub(crate) unsafe fn simd_imax<T: Copy>(a: T, b: T) -> T {
7+
let mask: T = crate::intrinsics::simd::simd_gt(a, b);
8+
crate::intrinsics::simd::simd_select(mask, a, b)
9+
}
10+
11+
#[inline(always)]
12+
pub(crate) unsafe fn simd_imin<T: Copy>(a: T, b: T) -> T {
13+
let mask: T = crate::intrinsics::simd::simd_lt(a, b);
14+
crate::intrinsics::simd::simd_select(mask, a, b)
15+
}
16+
517
macro_rules! simd_ty {
618
($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
719
#[repr(simd)]

crates/core_arch/src/x86/avx2.rs

Lines changed: 12 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1897,11 +1897,7 @@ pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m25
18971897
#[cfg_attr(test, assert_instr(vpmaxsw))]
18981898
#[stable(feature = "simd_x86", since = "1.27.0")]
18991899
pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1900-
unsafe {
1901-
let a = a.as_i16x16();
1902-
let b = b.as_i16x16();
1903-
transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1904-
}
1900+
unsafe { simd_imax(a.as_i16x16(), b.as_i16x16()).as_m256i() }
19051901
}
19061902

19071903
/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
@@ -1913,11 +1909,7 @@ pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
19131909
#[cfg_attr(test, assert_instr(vpmaxsd))]
19141910
#[stable(feature = "simd_x86", since = "1.27.0")]
19151911
pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1916-
unsafe {
1917-
let a = a.as_i32x8();
1918-
let b = b.as_i32x8();
1919-
transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1920-
}
1912+
unsafe { simd_imax(a.as_i32x8(), b.as_i32x8()).as_m256i() }
19211913
}
19221914

19231915
/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
@@ -1929,11 +1921,7 @@ pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
19291921
#[cfg_attr(test, assert_instr(vpmaxsb))]
19301922
#[stable(feature = "simd_x86", since = "1.27.0")]
19311923
pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1932-
unsafe {
1933-
let a = a.as_i8x32();
1934-
let b = b.as_i8x32();
1935-
transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1936-
}
1924+
unsafe { simd_imax(a.as_i8x32(), b.as_i8x32()).as_m256i() }
19371925
}
19381926

19391927
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
@@ -1945,11 +1933,7 @@ pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
19451933
#[cfg_attr(test, assert_instr(vpmaxuw))]
19461934
#[stable(feature = "simd_x86", since = "1.27.0")]
19471935
pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1948-
unsafe {
1949-
let a = a.as_u16x16();
1950-
let b = b.as_u16x16();
1951-
transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
1952-
}
1936+
unsafe { simd_imax(a.as_u16x16(), b.as_u16x16()).as_m256i() }
19531937
}
19541938

19551939
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
@@ -1961,11 +1945,7 @@ pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
19611945
#[cfg_attr(test, assert_instr(vpmaxud))]
19621946
#[stable(feature = "simd_x86", since = "1.27.0")]
19631947
pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1964-
unsafe {
1965-
let a = a.as_u32x8();
1966-
let b = b.as_u32x8();
1967-
transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
1968-
}
1948+
unsafe { simd_imax(a.as_u32x8(), b.as_u32x8()).as_m256i() }
19691949
}
19701950

19711951
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
@@ -1977,11 +1957,7 @@ pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
19771957
#[cfg_attr(test, assert_instr(vpmaxub))]
19781958
#[stable(feature = "simd_x86", since = "1.27.0")]
19791959
pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
1980-
unsafe {
1981-
let a = a.as_u8x32();
1982-
let b = b.as_u8x32();
1983-
transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
1984-
}
1960+
unsafe { simd_imax(a.as_u8x32(), b.as_u8x32()).as_m256i() }
19851961
}
19861962

19871963
/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@@ -1993,11 +1969,7 @@ pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
19931969
#[cfg_attr(test, assert_instr(vpminsw))]
19941970
#[stable(feature = "simd_x86", since = "1.27.0")]
19951971
pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
1996-
unsafe {
1997-
let a = a.as_i16x16();
1998-
let b = b.as_i16x16();
1999-
transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2000-
}
1972+
unsafe { simd_imin(a.as_i16x16(), b.as_i16x16()).as_m256i() }
20011973
}
20021974

20031975
/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
@@ -2009,11 +1981,7 @@ pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
20091981
#[cfg_attr(test, assert_instr(vpminsd))]
20101982
#[stable(feature = "simd_x86", since = "1.27.0")]
20111983
pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
2012-
unsafe {
2013-
let a = a.as_i32x8();
2014-
let b = b.as_i32x8();
2015-
transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2016-
}
1984+
unsafe { simd_imin(a.as_i32x8(), b.as_i32x8()).as_m256i() }
20171985
}
20181986

20191987
/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
@@ -2025,11 +1993,7 @@ pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
20251993
#[cfg_attr(test, assert_instr(vpminsb))]
20261994
#[stable(feature = "simd_x86", since = "1.27.0")]
20271995
pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
2028-
unsafe {
2029-
let a = a.as_i8x32();
2030-
let b = b.as_i8x32();
2031-
transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2032-
}
1996+
unsafe { simd_imin(a.as_i8x32(), b.as_i8x32()).as_m256i() }
20331997
}
20341998

20351999
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
@@ -2041,11 +2005,7 @@ pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
20412005
#[cfg_attr(test, assert_instr(vpminuw))]
20422006
#[stable(feature = "simd_x86", since = "1.27.0")]
20432007
pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2044-
unsafe {
2045-
let a = a.as_u16x16();
2046-
let b = b.as_u16x16();
2047-
transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
2048-
}
2008+
unsafe { simd_imin(a.as_u16x16(), b.as_u16x16()).as_m256i() }
20492009
}
20502010

20512011
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
@@ -2057,11 +2017,7 @@ pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
20572017
#[cfg_attr(test, assert_instr(vpminud))]
20582018
#[stable(feature = "simd_x86", since = "1.27.0")]
20592019
pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2060-
unsafe {
2061-
let a = a.as_u32x8();
2062-
let b = b.as_u32x8();
2063-
transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
2064-
}
2020+
unsafe { simd_imin(a.as_u32x8(), b.as_u32x8()).as_m256i() }
20652021
}
20662022

20672023
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
@@ -2073,11 +2029,7 @@ pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
20732029
#[cfg_attr(test, assert_instr(vpminub))]
20742030
#[stable(feature = "simd_x86", since = "1.27.0")]
20752031
pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2076-
unsafe {
2077-
let a = a.as_u8x32();
2078-
let b = b.as_u8x32();
2079-
transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
2080-
}
2032+
unsafe { simd_imin(a.as_u8x32(), b.as_u8x32()).as_m256i() }
20812033
}
20822034

20832035
/// Creates mask from the most significant bit of each 8-bit element in `a`,

0 commit comments

Comments
 (0)