@@ -10735,11 +10735,11 @@ pub fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
1073510735#[cfg_attr(test, assert_instr(vpmovswb))]
1073610736pub fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
1073710737 unsafe {
10738- transmute(vpmovswb(
10739- a.as_i16x32(),
10740- i8x32::ZERO,
10741- 0b11111111_11111111_11111111_11111111,
10738+ simd_cast::<_, i8x32>(simd_imax(
10739+ simd_imin(a.as_i16x32(), i16x32::splat(i8::MAX as _)),
10740+ i16x32::splat(i8::MIN as _),
1074210741 ))
10742+ .as_m256i()
1074310743 }
1074410744}
1074510745
@@ -10751,7 +10751,9 @@ pub fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
1075110751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1075210752#[cfg_attr(test, assert_instr(vpmovswb))]
1075310753pub fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10754- unsafe { transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k)) }
10754+ unsafe {
10755+ simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), src.as_i8x32()).as_m256i()
10756+ }
1075510757}
1075610758
1075710759/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -10762,7 +10764,7 @@ pub fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m
1076210764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1076310765#[cfg_attr(test, assert_instr(vpmovswb))]
1076410766pub fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10765- unsafe { transmute(vpmovswb(a.as_i16x32 (), i8x32::ZERO, k) ) }
10767+ unsafe { simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32 (), i8x32::ZERO).as_m256i( ) }
1076610768}
1076710769
1076810770/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
@@ -10773,7 +10775,13 @@ pub fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
1077310775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1077410776#[cfg_attr(test, assert_instr(vpmovswb))]
1077510777pub fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
10776- unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, 0b11111111_11111111)) }
10778+ unsafe {
10779+ simd_cast::<_, i8x16>(simd_imax(
10780+ simd_imin(a.as_i16x16(), i16x16::splat(i8::MAX as _)),
10781+ i16x16::splat(i8::MIN as _),
10782+ ))
10783+ .as_m128i()
10784+ }
1077710785}
1077810786
1077910787/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -10784,7 +10792,9 @@ pub fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
1078410792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1078510793#[cfg_attr(test, assert_instr(vpmovswb))]
1078610794pub fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10787- unsafe { transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k)) }
10795+ unsafe {
10796+ simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), src.as_i8x16()).as_m128i()
10797+ }
1078810798}
1078910799
1079010800/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -10795,7 +10805,7 @@ pub fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m
1079510805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1079610806#[cfg_attr(test, assert_instr(vpmovswb))]
1079710807pub fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10798- unsafe { transmute(vpmovswb256(a.as_i16x16 (), i8x16::ZERO, k) ) }
10808+ unsafe { simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16 (), i8x16::ZERO).as_m128i( ) }
1079910809}
1080010810
1080110811/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
@@ -10840,11 +10850,7 @@ pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
1084010850#[cfg_attr(test, assert_instr(vpmovuswb))]
1084110851pub fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
1084210852 unsafe {
10843- transmute(vpmovuswb(
10844- a.as_u16x32(),
10845- u8x32::ZERO,
10846- 0b11111111_11111111_11111111_11111111,
10847- ))
10853+ simd_cast::<_, u8x32>(simd_imin(a.as_u16x32(), u16x32::splat(u8::MAX as _))).as_m256i()
1084810854 }
1084910855}
1085010856
@@ -10856,7 +10862,9 @@ pub fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
1085610862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1085710863#[cfg_attr(test, assert_instr(vpmovuswb))]
1085810864pub fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
10859- unsafe { transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k)) }
10865+ unsafe {
10866+ simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), src.as_u8x32()).as_m256i()
10867+ }
1086010868}
1086110869
1086210870/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -10867,7 +10875,7 @@ pub fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __
1086710875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1086810876#[cfg_attr(test, assert_instr(vpmovuswb))]
1086910877pub fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
10870- unsafe { transmute(vpmovuswb(a.as_u16x32 (), u8x32::ZERO, k) ) }
10878+ unsafe { simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32 (), u8x32::ZERO).as_m256i( ) }
1087110879}
1087210880
1087310881/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
@@ -10879,11 +10887,7 @@ pub fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
1087910887#[cfg_attr(test, assert_instr(vpmovuswb))]
1088010888pub fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
1088110889 unsafe {
10882- transmute(vpmovuswb256(
10883- a.as_u16x16(),
10884- u8x16::ZERO,
10885- 0b11111111_11111111,
10886- ))
10890+ simd_cast::<_, u8x16>(simd_imin(a.as_u16x16(), u16x16::splat(u8::MAX as _))).as_m128i()
1088710891 }
1088810892}
1088910893
@@ -10895,7 +10899,9 @@ pub fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
1089510899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1089610900#[cfg_attr(test, assert_instr(vpmovuswb))]
1089710901pub fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
10898- unsafe { transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k)) }
10902+ unsafe {
10903+ simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), src.as_u8x16()).as_m128i()
10904+ }
1089910905}
1090010906
1090110907/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -10906,7 +10912,7 @@ pub fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __
1090610912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1090710913#[cfg_attr(test, assert_instr(vpmovuswb))]
1090810914pub fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
10909- unsafe { transmute(vpmovuswb256(a.as_u16x16 (), u8x16::ZERO, k) ) }
10915+ unsafe { simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16 (), u8x16::ZERO).as_m128i( ) }
1091010916}
1091110917
1091210918/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
@@ -11592,7 +11598,9 @@ pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a:
1159211598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1159311599#[cfg_attr(test, assert_instr(vpmovwb))]
1159411600pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
11595- vpmovwbmem(mem_addr, a.as_i16x32(), k);
11601+ let result = _mm512_cvtepi16_epi8(a).as_i8x32();
11602+ let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
11603+ simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
1159611604}
1159711605
1159811606/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
@@ -11603,7 +11611,9 @@ pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32,
1160311611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1160411612#[cfg_attr(test, assert_instr(vpmovwb))]
1160511613pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
11606- vpmovwbmem256(mem_addr, a.as_i16x16(), k);
11614+ let result = _mm256_cvtepi16_epi8(a).as_i8x16();
11615+ let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
11616+ simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
1160711617}
1160811618
1160911619/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
@@ -11614,7 +11624,13 @@ pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16,
1161411624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1161511625#[cfg_attr(test, assert_instr(vpmovwb))]
1161611626pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
11617- vpmovwbmem128(mem_addr, a.as_i16x8(), k);
11627+ let result: i8x8 = simd_shuffle!(
11628+ _mm_cvtepi16_epi8(a).as_i8x16(),
11629+ i8x16::ZERO,
11630+ [0, 1, 2, 3, 4, 5, 6, 7]
11631+ );
11632+ let mask = simd_select_bitmask(k, i8x8::splat(!0), i8x8::ZERO);
11633+ simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
1161811634}
1161911635
1162011636/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
@@ -11703,17 +11719,9 @@ unsafe extern "C" {
1170311719 #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
1170411720 fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
1170511721
11706- #[link_name = "llvm.x86.avx512.mask.pmovs.wb.512"]
11707- fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32;
11708- #[link_name = "llvm.x86.avx512.mask.pmovs.wb.256"]
11709- fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16;
1171011722 #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
1171111723 fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
1171211724
11713- #[link_name = "llvm.x86.avx512.mask.pmovus.wb.512"]
11714- fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32;
11715- #[link_name = "llvm.x86.avx512.mask.pmovus.wb.256"]
11716- fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
1171711725 #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
1171811726 fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
1171911727
@@ -11724,13 +11732,6 @@ unsafe extern "C" {
1172411732 #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
1172511733 fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
1172611734
11727- #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
11728- fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
11729- #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
11730- fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
11731- #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
11732- fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
11733-
1173411735 #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
1173511736 fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
1173611737 #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
0 commit comments