diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index ca4ca9a2de..ef5bf6b556 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -2778,8 +2778,17 @@ pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 16 { + _mm256_setzero_si256() + } else { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shl(a.as_u16x16(), u16x16::splat(shift as u16)).as_m256i() + } + } } /// Shifts packed 32-bit integers in `a` left by `count` while @@ -2790,8 +2799,17 @@ pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpslld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 32 { + _mm256_setzero_si256() + } else { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shl(a.as_u32x8(), u32x8::splat(shift as u32)).as_m256i() + } + } } /// Shifts packed 64-bit integers in `a` left by `count` while @@ -2802,8 +2820,17 @@ pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 64 { + _mm256_setzero_si256() + } else { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shl(a.as_u64x4(), u64x4::splat(shift as u64)).as_m256i() + } + } } /// Shifts packed 16-bit integers in `a` left by `IMM8` while @@ -3030,8 +3057,13 @@ pub const fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsraw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0].min(15); + unsafe { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shr(a.as_i16x16(), i16x16::splat(shift as i16)).as_m256i() + } } /// Shifts packed 32-bit integers in `a` right by `count` while @@ -3042,8 +3074,13 @@ pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrad))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0].min(31); + unsafe { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shr(a.as_i32x8(), i32x8::splat(shift as i32)).as_m256i() + } } /// Shifts packed 16-bit integers in `a` right by `IMM8` while @@ -3197,8 +3234,17 @@ pub const fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 16 { + _mm256_setzero_si256() + } else { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shr(a.as_u16x16(), u16x16::splat(shift as u16)).as_m256i() + } + } } /// Shifts packed 32-bit integers in `a` right by `count` while shifting in @@ -3209,8 +3255,17 @@ pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 32 { + _mm256_setzero_si256() + } else { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shr(a.as_u32x8(), u32x8::splat(shift as u32)).as_m256i() + } + } } /// Shifts packed 64-bit integers in `a` right by `count` while shifting in @@ -3221,8 +3276,17 @@ pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 64 { + _mm256_setzero_si256() + } else { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shr(a.as_u64x4(), u64x4::splat(shift as u64)).as_m256i() + } + } } /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in @@ -3919,22 +3983,6 @@ unsafe extern "C" { fn psignw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.psign.d"] fn psignd(a: i32x8, b: i32x8) -> i32x8; - #[link_name = "llvm.x86.avx2.psll.w"] - fn psllw(a: i16x16, count: i16x8) -> i16x16; - #[link_name = "llvm.x86.avx2.psll.d"] - fn pslld(a: i32x8, count: i32x4) -> i32x8; - #[link_name = "llvm.x86.avx2.psll.q"] - fn psllq(a: i64x4, count: i64x2) -> i64x4; - #[link_name = "llvm.x86.avx2.psra.w"] - fn psraw(a: i16x16, count: i16x8) -> i16x16; - #[link_name = "llvm.x86.avx2.psra.d"] - fn psrad(a: i32x8, count: i32x4) -> i32x8; - #[link_name = "llvm.x86.avx2.psrl.w"] - fn psrlw(a: i16x16, count: i16x8) -> i16x16; - #[link_name = "llvm.x86.avx2.psrl.d"] - fn psrld(a: i32x8, count: i32x4) -> i32x8; - #[link_name = "llvm.x86.avx2.psrl.q"] - fn psrlq(a: i64x4, count: i64x2) -> i64x4; #[link_name = "llvm.x86.avx2.pshuf.b"] fn pshufb(a: u8x32, b: u8x32) -> u8x32; #[link_name = "llvm.x86.avx2.permd"] @@ -5184,7 +5232,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_sll_epi16() { + const fn test_mm256_sll_epi16() { let a = _mm256_set1_epi16(0xFF); let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4); let r = _mm256_sll_epi16(a, b); @@ -5192,7 +5240,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_sll_epi32() { + const fn test_mm256_sll_epi32() { let a = _mm256_set1_epi32(0xFFFF); let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4); let r = _mm256_sll_epi32(a, b); @@ -5200,7 +5248,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_sll_epi64() { + const fn test_mm256_sll_epi64() { let a = _mm256_set1_epi64x(0xFFFFFFFF); let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4); let r = _mm256_sll_epi64(a, b); @@ -5275,7 +5323,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_sra_epi16() { + const fn test_mm256_sra_epi16() { let a = _mm256_set1_epi16(-1); let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); let r = _mm256_sra_epi16(a, b); @@ -5283,7 +5331,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_sra_epi32() { + const fn test_mm256_sra_epi32() { let a = _mm256_set1_epi32(-1); let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1); let r = _mm256_sra_epi32(a, b); @@ -5345,7 +5393,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_srl_epi16() { + const fn test_mm256_srl_epi16() { let a = _mm256_set1_epi16(0xFF); let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4); let r = _mm256_srl_epi16(a, b); @@ -5353,7 +5401,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_srl_epi32() { + const fn test_mm256_srl_epi32() { let a = _mm256_set1_epi32(0xFFFF); let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4); let r = _mm256_srl_epi32(a, b); @@ -5361,7 +5409,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_srl_epi64() { + const fn test_mm256_srl_epi64() { let a = _mm256_set1_epi64x(0xFFFFFFFF); let b = _mm_setr_epi64x(4, 0); let r = _mm256_srl_epi64(a, b); diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 78801e8902..308b4fa460 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7217,8 +7217,17 @@ pub const fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 16 { + _mm512_setzero_si512() + } else { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shl(a.as_u16x32(), u16x32::splat(shift as u16)).as_m512i() + } + } } /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7228,7 +7237,13 @@ pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_sll_epi16( + src: __m512i, + k: __mmask32, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_sll_epi16(a, count).as_i16x32(); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) @@ -7242,7 +7257,8 @@ pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m1 #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_sll_epi16(a, count).as_i16x32(); transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) @@ -7256,7 +7272,13 @@ pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m51 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_sll_epi16( + src: __m256i, + k: __mmask16, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_sll_epi16(a, count).as_i16x16(); transmute(simd_select_bitmask(k, shf, src.as_i16x16())) @@ -7270,7 +7292,8 @@ pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_sll_epi16(a, count).as_i16x16(); transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) @@ -7284,7 +7307,8 @@ pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m25 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sll_epi16(a, count).as_i16x8(); transmute(simd_select_bitmask(k, shf, src.as_i16x8())) @@ -7298,7 +7322,8 @@ pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sll_epi16(a, count).as_i16x8(); transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) @@ -7621,8 +7646,17 @@ pub const fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __ #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 16 { + _mm512_setzero_si512() + } else { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shr(a.as_u16x32(), u16x32::splat(shift as u16)).as_m512i() + } + } } /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7632,7 +7666,13 @@ pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_srl_epi16( + src: __m512i, + k: __mmask32, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_srl_epi16(a, count).as_i16x32(); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) @@ -7646,7 +7686,8 @@ pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m1 #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_srl_epi16(a, count).as_i16x32(); transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) @@ -7660,7 +7701,13 @@ pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m51 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_srl_epi16( + src: __m256i, + k: __mmask16, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_srl_epi16(a, count).as_i16x16(); transmute(simd_select_bitmask(k, shf, src.as_i16x16())) @@ -7674,7 +7721,8 @@ pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_srl_epi16(a, count).as_i16x16(); transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) @@ -7688,7 +7736,8 @@ pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m25 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_srl_epi16(a, count).as_i16x8(); transmute(simd_select_bitmask(k, shf, src.as_i16x8())) @@ -7702,7 +7751,8 @@ pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_srl_epi16(a, count).as_i16x8(); transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) @@ -8010,8 +8060,13 @@ pub const fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __ #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0].min(15); + unsafe { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shr(a.as_i16x32(), i16x32::splat(shift as i16)).as_m512i() + } } /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8021,7 +8076,13 @@ pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_sra_epi16( + src: __m512i, + k: __mmask32, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_sra_epi16(a, count).as_i16x32(); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) @@ -8035,7 +8096,8 @@ pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m1 #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_sra_epi16(a, count).as_i16x32(); transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) @@ -8049,7 +8111,13 @@ pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m51 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_sra_epi16( + src: __m256i, + k: __mmask16, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_sra_epi16(a, count).as_i16x16(); transmute(simd_select_bitmask(k, shf, src.as_i16x16())) @@ -8063,7 +8131,8 @@ pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_sra_epi16(a, count).as_i16x16(); transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) @@ -8077,7 +8146,8 @@ pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m25 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sra_epi16(a, count).as_i16x8(); transmute(simd_select_bitmask(k, shf, src.as_i16x8())) @@ -8091,7 +8161,8 @@ pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sra_epi16(a, count).as_i16x8(); transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) @@ -12706,15 +12777,6 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.pmaddubs.w.512"] fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32; - #[link_name = "llvm.x86.avx512.psll.w.512"] - fn vpsllw(a: i16x32, count: i16x8) -> i16x32; - - #[link_name = "llvm.x86.avx512.psrl.w.512"] - fn vpsrlw(a: i16x32, count: i16x8) -> i16x32; - - #[link_name = "llvm.x86.avx512.psra.w.512"] - fn vpsraw(a: i16x32, count: i16x8) -> i16x32; - #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"] fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32; #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"] @@ -18334,7 +18396,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_sll_epi16() { + const fn test_mm512_sll_epi16() { let a = _mm512_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm512_sll_epi16(a, count); @@ -18343,7 +18405,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_mask_sll_epi16() { + const fn test_mm512_mask_sll_epi16() { let a = _mm512_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm512_mask_sll_epi16(a, 0, a, count); @@ -18354,7 +18416,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_maskz_sll_epi16() { + const fn test_mm512_maskz_sll_epi16() { let a = _mm512_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm512_maskz_sll_epi16(0, a, count); @@ -18365,7 +18427,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_mask_sll_epi16() { + const fn test_mm256_mask_sll_epi16() { let a = _mm256_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm256_mask_sll_epi16(a, 0, a, count); @@ -18376,7 +18438,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_maskz_sll_epi16() { + const fn test_mm256_maskz_sll_epi16() { let a = _mm256_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm256_maskz_sll_epi16(0, a, count); @@ -18387,7 +18449,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_mask_sll_epi16() { + const fn test_mm_mask_sll_epi16() { let a = _mm_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm_mask_sll_epi16(a, 0, a, count); @@ -18398,7 +18460,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_maskz_sll_epi16() { + const fn test_mm_maskz_sll_epi16() { let a = _mm_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm_maskz_sll_epi16(0, a, count); @@ -18570,7 +18632,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_srl_epi16() { + const fn test_mm512_srl_epi16() { let a = _mm512_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm512_srl_epi16(a, count); @@ -18579,7 +18641,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_mask_srl_epi16() { + const fn test_mm512_mask_srl_epi16() { let a = _mm512_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm512_mask_srl_epi16(a, 0, a, count); @@ -18590,7 +18652,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_maskz_srl_epi16() { + const fn test_mm512_maskz_srl_epi16() { let a = _mm512_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm512_maskz_srl_epi16(0, a, count); @@ -18601,7 +18663,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_mask_srl_epi16() { + const fn test_mm256_mask_srl_epi16() { let a = _mm256_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm256_mask_srl_epi16(a, 0, a, count); @@ -18612,7 +18674,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_maskz_srl_epi16() { + const fn test_mm256_maskz_srl_epi16() { let a = _mm256_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm256_maskz_srl_epi16(0, a, count); @@ -18623,7 +18685,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_mask_srl_epi16() { + const fn test_mm_mask_srl_epi16() { let a = _mm_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm_mask_srl_epi16(a, 0, a, count); @@ -18634,7 +18696,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_maskz_srl_epi16() { + const fn test_mm_maskz_srl_epi16() { let a = _mm_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm_maskz_srl_epi16(0, a, count); @@ -18806,7 +18868,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_sra_epi16() { + const fn test_mm512_sra_epi16() { let a = _mm512_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm512_sra_epi16(a, count); @@ -18815,7 +18877,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_mask_sra_epi16() { + const fn test_mm512_mask_sra_epi16() { let a = _mm512_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm512_mask_sra_epi16(a, 0, a, count); @@ -18826,7 +18888,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_maskz_sra_epi16() { + const fn test_mm512_maskz_sra_epi16() { let a = _mm512_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm512_maskz_sra_epi16(0, a, count); @@ -18837,7 +18899,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_mask_sra_epi16() { + const fn test_mm256_mask_sra_epi16() { let a = _mm256_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm256_mask_sra_epi16(a, 0, a, count); @@ -18848,7 +18910,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_maskz_sra_epi16() { + const fn test_mm256_maskz_sra_epi16() { let a = _mm256_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm256_maskz_sra_epi16(0, a, count); @@ -18859,7 +18921,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_mask_sra_epi16() { + const fn test_mm_mask_sra_epi16() { let a = _mm_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm_mask_sra_epi16(a, 0, a, count); @@ -18870,7 +18932,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_maskz_sra_epi16() { + const fn test_mm_maskz_sra_epi16() { let a = _mm_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm_maskz_sra_epi16(0, a, count); diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 3730496e1e..a0843686b4 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -20652,8 +20652,17 @@ pub const fn _mm_maskz_srli_epi64(k: __mmask8, a: __m128i) -> _ #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 32 { + _mm512_setzero_si512() + } else { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shl(a.as_u32x16(), u32x16::splat(shift as u32)).as_m512i() + } + } } /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20663,7 +20672,13 @@ pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_sll_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_sll_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -20677,7 +20692,8 @@ pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m1 #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_sll_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) @@ -20691,7 +20707,13 @@ pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m51 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_sll_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_sll_epi32(a, count).as_i32x8(); transmute(simd_select_bitmask(k, shf, src.as_i32x8())) @@ -20705,7 +20727,8 @@ pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_sll_epi32(a, count).as_i32x8(); transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) @@ -20719,7 +20742,8 @@ pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sll_epi32(a, count).as_i32x4(); transmute(simd_select_bitmask(k, shf, src.as_i32x4())) @@ -20733,7 +20757,8 @@ pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sll_epi32(a, count).as_i32x4(); transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) @@ -20747,8 +20772,17 @@ pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 32 { + _mm512_setzero_si512() + } else { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shr(a.as_u32x16(), u32x16::splat(shift as u32)).as_m512i() + } + } } /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20758,7 +20792,13 @@ pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_srl_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_srl_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -20772,7 +20812,8 @@ pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m1 #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_srl_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) @@ -20786,7 +20827,13 @@ pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m51 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_srl_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_srl_epi32(a, count).as_i32x8(); transmute(simd_select_bitmask(k, shf, src.as_i32x8())) @@ -20800,7 +20847,8 @@ pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_srl_epi32(a, count).as_i32x8(); transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) @@ -20814,7 +20862,8 @@ pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_srl_epi32(a, count).as_i32x4(); transmute(simd_select_bitmask(k, shf, src.as_i32x4())) @@ -20828,7 +20877,8 @@ pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_srl_epi32(a, count).as_i32x4(); transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) @@ -20842,8 +20892,17 @@ pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 64 { + _mm512_setzero_si512() + } else { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shl(a.as_u64x8(), u64x8::splat(shift as u64)).as_m512i() + } + } } /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20853,7 +20912,13 @@ pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_sll_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_sll_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -20867,7 +20932,8 @@ pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m12 #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_sll_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) @@ -20881,7 +20947,13 @@ pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_sll_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_sll_epi64(a, count).as_i64x4(); transmute(simd_select_bitmask(k, shf, src.as_i64x4())) @@ -20895,7 +20967,8 @@ pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_sll_epi64(a, count).as_i64x4(); transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) @@ -20909,7 +20982,8 @@ pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sll_epi64(a, count).as_i64x2(); transmute(simd_select_bitmask(k, shf, src.as_i64x2())) @@ -20923,7 +20997,8 @@ pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sll_epi64(a, count).as_i64x2(); transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) @@ -20937,8 +21012,17 @@ pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 64 { + _mm512_setzero_si512() + } else { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shr(a.as_u64x8(), u64x8::splat(shift as u64)).as_m512i() + } + } } /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20948,7 +21032,13 @@ pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_srl_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_srl_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -20962,7 +21052,8 @@ pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m12 #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_srl_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) @@ -20976,7 +21067,13 @@ pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_srl_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_srl_epi64(a, count).as_i64x4(); transmute(simd_select_bitmask(k, shf, src.as_i64x4())) @@ -20990,7 +21087,8 @@ pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_srl_epi64(a, count).as_i64x4(); transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) @@ -21004,7 +21102,8 @@ pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_srl_epi64(a, count).as_i64x2(); transmute(simd_select_bitmask(k, shf, src.as_i64x2())) @@ -21018,7 +21117,8 @@ pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_srl_epi64(a, count).as_i64x2(); transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) @@ -21032,8 +21132,13 @@ pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0].min(31); + unsafe { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shr(a.as_i32x16(), i32x16::splat(shift as i32)).as_m512i() + } } /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21043,7 +21148,13 @@ pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_sra_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_sra_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -21057,7 +21168,8 @@ pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m1 #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_sra_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) @@ -21071,7 +21183,13 @@ pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m51 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_sra_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_sra_epi32(a, count).as_i32x8(); transmute(simd_select_bitmask(k, shf, src.as_i32x8())) @@ -21085,7 +21203,8 @@ pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_sra_epi32(a, count).as_i32x8(); transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) @@ -21099,7 +21218,8 @@ pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sra_epi32(a, count).as_i32x4(); transmute(simd_select_bitmask(k, shf, src.as_i32x4())) @@ -21113,7 +21233,8 @@ pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sra_epi32(a, count).as_i32x4(); transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) @@ -21127,8 +21248,13 @@ pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0].min(63); + unsafe { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shr(a.as_i64x8(), i64x8::splat(shift as i64)).as_m512i() + } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21138,7 +21264,13 @@ pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_sra_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_sra_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -21152,7 +21284,8 @@ pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m12 #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_sra_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) @@ -21166,8 +21299,13 @@ pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0].min(63); + unsafe { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shr(a.as_i64x4(), i64x4::splat(shift as i64)).as_m256i() + } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21177,7 +21315,13 @@ pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_sra_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_sra_epi64(a, count).as_i64x4(); transmute(simd_select_bitmask(k, shf, src.as_i64x4())) @@ -21191,7 +21335,8 @@ pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_sra_epi64(a, count).as_i64x4(); transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) @@ -21205,8 +21350,13 @@ pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0].min(63); + unsafe { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shr(a.as_i64x2(), i64x2::splat(shift as i64)).as_m128i() + } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21216,7 +21366,8 @@ pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sra_epi64(a, count).as_i64x2(); transmute(simd_select_bitmask(k, shf, src.as_i64x2())) @@ -21230,7 +21381,8 @@ pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sra_epi64(a, count).as_i64x2(); transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) @@ -44774,25 +44926,6 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"] fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8; - #[link_name = "llvm.x86.avx512.psll.d.512"] - fn vpslld(a: i32x16, count: i32x4) -> i32x16; - #[link_name = "llvm.x86.avx512.psrl.d.512"] - fn vpsrld(a: i32x16, count: i32x4) -> i32x16; - #[link_name = "llvm.x86.avx512.psll.q.512"] - fn vpsllq(a: i64x8, count: i64x2) -> i64x8; - #[link_name = "llvm.x86.avx512.psrl.q.512"] - fn vpsrlq(a: i64x8, count: i64x2) -> i64x8; - - #[link_name = "llvm.x86.avx512.psra.d.512"] - fn vpsrad(a: i32x16, count: i32x4) -> i32x16; - - #[link_name = "llvm.x86.avx512.psra.q.512"] - fn vpsraq(a: i64x8, count: i64x2) -> i64x8; - #[link_name = "llvm.x86.avx512.psra.q.256"] - fn vpsraq256(a: i64x4, count: i64x2) -> i64x4; - #[link_name = "llvm.x86.avx512.psra.q.128"] - fn vpsraq128(a: i64x2, count: i64x2) -> i64x2; - #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"] fn vpermilps(a: f32x16, b: i32x16) -> f32x16; #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"] @@ -54649,7 +54782,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_sll_epi32() { + const fn test_mm512_sll_epi32() { #[rustfmt::skip] let a = _mm512_set_epi32( 1 << 31, 1 << 0, 1 << 1, 1 << 2, @@ -54670,7 +54803,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_mask_sll_epi32() { + const fn test_mm512_mask_sll_epi32() { #[rustfmt::skip] let a = _mm512_set_epi32( 1 << 31, 1 << 0, 1 << 1, 1 << 2, @@ -54693,7 +54826,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_maskz_sll_epi32() { + const fn test_mm512_maskz_sll_epi32() { #[rustfmt::skip] let a = _mm512_set_epi32( 1 << 31, 1 << 0, 1 << 1, 1 << 2, @@ -54710,7 +54843,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_mask_sll_epi32() { + const fn test_mm256_mask_sll_epi32() { let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm256_mask_sll_epi32(a, 0, a, count); @@ -54721,7 +54854,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_maskz_sll_epi32() { + const fn test_mm256_maskz_sll_epi32() { let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm256_maskz_sll_epi32(0, a, count); @@ -54732,7 +54865,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_mask_sll_epi32() { + const fn test_mm_mask_sll_epi32() { let a = _mm_set_epi32(1 << 13, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm_mask_sll_epi32(a, 0, a, count); @@ -54743,7 +54876,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_maskz_sll_epi32() { + const fn test_mm_maskz_sll_epi32() { let a = _mm_set_epi32(1 << 13, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm_maskz_sll_epi32(0, a, count); @@ -54754,7 +54887,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_srl_epi32() { + const fn test_mm512_srl_epi32() { #[rustfmt::skip] let a = _mm512_set_epi32( 1 << 31, 1 << 0, 1 << 1, 1 << 2, @@ -54769,7 +54902,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_mask_srl_epi32() { + const fn test_mm512_mask_srl_epi32() { #[rustfmt::skip] let a = _mm512_set_epi32( 1 << 31, 1 << 0, 1 << 1, 1 << 2, @@ -54786,7 +54919,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_maskz_srl_epi32() { + const fn test_mm512_maskz_srl_epi32() { #[rustfmt::skip] let a = _mm512_set_epi32( 1 << 31, 1 << 0, 1 << 1, 1 << 2, @@ -54803,7 +54936,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_mask_srl_epi32() { + const fn test_mm256_mask_srl_epi32() { let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm256_mask_srl_epi32(a, 0, a, count); @@ -54814,7 +54947,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_maskz_srl_epi32() { + const fn test_mm256_maskz_srl_epi32() { let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm256_maskz_srl_epi32(0, a, count); @@ -54825,7 +54958,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_mask_srl_epi32() { + const fn test_mm_mask_srl_epi32() { let a = _mm_set_epi32(1 << 5, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm_mask_srl_epi32(a, 0, a, count); @@ -54836,7 +54969,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_maskz_srl_epi32() { + const fn test_mm_maskz_srl_epi32() { let a = _mm_set_epi32(1 << 5, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm_maskz_srl_epi32(0, a, count); @@ -54847,7 +54980,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_sra_epi32() { + const fn test_mm512_sra_epi32() { let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); let count = _mm_set_epi32(1, 0, 0, 2); let r = _mm512_sra_epi32(a, count); @@ -54856,7 +54989,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_mask_sra_epi32() { + const fn test_mm512_mask_sra_epi32() { let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16); let count = _mm_set_epi32(0, 0, 0, 2); let r = _mm512_mask_sra_epi32(a, 0, a, count); @@ -54867,7 +55000,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_maskz_sra_epi32() { + const fn test_mm512_maskz_sra_epi32() { let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14); let count = _mm_set_epi32(2, 0, 0, 2); let r = _mm512_maskz_sra_epi32(0, a, count); @@ -54878,7 +55011,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_mask_sra_epi32() { + const fn test_mm256_mask_sra_epi32() { let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm256_mask_sra_epi32(a, 0, a, count); @@ -54889,7 +55022,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_maskz_sra_epi32() { + const fn test_mm256_maskz_sra_epi32() { let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm256_maskz_sra_epi32(0, a, count); @@ -54900,7 +55033,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_mask_sra_epi32() { + const fn test_mm_mask_sra_epi32() { let a = _mm_set_epi32(1 << 5, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm_mask_sra_epi32(a, 0, a, count); @@ -54911,7 +55044,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_maskz_sra_epi32() { + const fn test_mm_maskz_sra_epi32() { let a = _mm_set_epi32(1 << 5, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm_maskz_sra_epi32(0, a, count); diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index fbf62c362f..6c9b78b353 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -577,8 +577,17 @@ pub const fn _mm_slli_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 16 { + _mm_setzero_si128() + } else { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shl(a.as_u16x8(), u16x8::splat(shift as u16)).as_m128i() + } + } } /// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros. @@ -609,8 +618,17 @@ pub const fn _mm_slli_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pslld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 32 { + _mm_setzero_si128() + } else { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shl(a.as_u32x4(), u32x4::splat(shift as u32)).as_m128i() + } + } } /// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros. @@ -641,8 +659,17 @@ pub const fn _mm_slli_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 64 { + _mm_setzero_si128() + } else { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shl(a.as_u64x2(), u64x2::splat(shift as u64)).as_m128i() + } + } } /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign @@ -668,8 +695,13 @@ pub const fn _mm_srai_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psraw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0].min(15); + unsafe { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shr(a.as_i16x8(), i16x8::splat(shift as i16)).as_m128i() + } } /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign @@ -695,8 +727,13 @@ pub const fn _mm_srai_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrad))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0].min(31); + unsafe { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shr(a.as_i32x4(), i32x4::splat(shift as i32)).as_m128i() + } } /// Shifts `a` right by `IMM8` bytes while shifting in zeros. @@ -780,8 +817,17 @@ pub const fn _mm_srli_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 16 { + _mm_setzero_si128() + } else { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shr(a.as_u16x8(), u16x8::splat(shift as u16)).as_m128i() + } + } } /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in @@ -813,8 +859,17 @@ pub const fn _mm_srli_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 32 { + _mm_setzero_si128() + } else { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shr(a.as_u32x4(), u32x4::splat(shift as u32)).as_m128i() + } + } } /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in @@ -846,8 +901,17 @@ pub const fn _mm_srli_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 64 { + _mm_setzero_si128() + } else { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shr(a.as_u64x2(), u64x2::splat(shift as u64)).as_m128i() + } + } } /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and @@ -3246,22 +3310,6 @@ unsafe extern "C" { fn pmaddwd(a: i16x8, b: i16x8) -> i32x4; #[link_name = "llvm.x86.sse2.psad.bw"] fn psadbw(a: u8x16, b: u8x16) -> u64x2; - #[link_name = "llvm.x86.sse2.psll.w"] - fn psllw(a: i16x8, count: i16x8) -> i16x8; - #[link_name = "llvm.x86.sse2.psll.d"] - fn pslld(a: i32x4, count: i32x4) -> i32x4; - #[link_name = "llvm.x86.sse2.psll.q"] - fn psllq(a: i64x2, count: i64x2) -> i64x2; - #[link_name = "llvm.x86.sse2.psra.w"] - fn psraw(a: i16x8, count: i16x8) -> i16x8; - #[link_name = "llvm.x86.sse2.psra.d"] - fn psrad(a: i32x4, count: i32x4) -> i32x4; - #[link_name = "llvm.x86.sse2.psrl.w"] - fn psrlw(a: i16x8, count: i16x8) -> i16x8; - #[link_name = "llvm.x86.sse2.psrl.d"] - fn psrld(a: i32x4, count: i32x4) -> i32x4; - #[link_name = "llvm.x86.sse2.psrl.q"] - fn psrlq(a: i64x2, count: i64x2) -> i64x2; #[link_name = "llvm.x86.sse2.cvtps2dq"] fn cvtps2dq(a: __m128) -> i32x4; #[link_name = "llvm.x86.sse2.maskmov.dqu"] @@ -3770,7 +3818,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_sll_epi16() { + const fn test_mm_sll_epi16() { let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4)); assert_eq_m128i( @@ -3795,7 +3843,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_sll_epi32() { + const fn test_mm_sll_epi32() { let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4)); assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); @@ -3817,7 +3865,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_sll_epi64() { + const fn test_mm_sll_epi64() { let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4)); assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); @@ -3842,7 +3890,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_sra_epi16() { + const fn test_mm_sra_epi16() { let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4)); assert_eq_m128i( @@ -3867,7 +3915,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_sra_epi32() { + const fn test_mm_sra_epi32() { let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4)); assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); @@ -3921,7 +3969,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_srl_epi16() { + const fn test_mm_srl_epi16() { let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4)); assert_eq_m128i( @@ -3946,7 +3994,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_srl_epi32() { + const fn test_mm_srl_epi32() { let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4)); assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); @@ -3968,7 +4016,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_srl_epi64() { + const fn test_mm_srl_epi64() { let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4)); assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 0fd9b09363..2b7a3f35a3 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -9225,7 +9225,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_sll_epi64() { + const fn test_mm512_sll_epi64() { #[rustfmt::skip] let a = _mm512_set_epi64( 1 << 63, 1 << 32, 1 << 32, 1 << 32, @@ -9245,7 +9245,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_mask_sll_epi64() { + const fn test_mm512_mask_sll_epi64() { #[rustfmt::skip] let a = _mm512_set_epi64( 1 << 63, 1 << 32, 1 << 32, 1 << 32, @@ -9264,7 +9264,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_maskz_sll_epi64() { + const fn test_mm512_maskz_sll_epi64() { #[rustfmt::skip] let a = _mm512_set_epi64( 1 << 32, 1 << 32, 1 << 32, 1 << 32, @@ -9279,7 +9279,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_mask_sll_epi64() { + const fn test_mm256_mask_sll_epi64() { let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); let count = _mm_set_epi64x(0, 1); let r = _mm256_mask_sll_epi64(a, 0, a, count); @@ -9290,7 +9290,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_maskz_sll_epi64() { + const fn test_mm256_maskz_sll_epi64() { let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); let count = _mm_set_epi64x(0, 1); let r = _mm256_maskz_sll_epi64(0, a, count); @@ -9301,7 +9301,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_mask_sll_epi64() { + const fn test_mm_mask_sll_epi64() { let a = _mm_set_epi64x(1 << 63, 1 << 32); let count = _mm_set_epi64x(0, 1); let r = _mm_mask_sll_epi64(a, 0, a, count); @@ -9312,7 +9312,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_maskz_sll_epi64() { + const fn test_mm_maskz_sll_epi64() { let a = _mm_set_epi64x(1 << 63, 1 << 32); let count = _mm_set_epi64x(0, 1); let r = _mm_maskz_sll_epi64(0, a, count); @@ -9323,7 +9323,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_srl_epi64() { + const fn test_mm512_srl_epi64() { #[rustfmt::skip] let a = _mm512_set_epi64( 1 << 0, 1 << 32, 1 << 32, 1 << 32, @@ -9340,7 +9340,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_mask_srl_epi64() { + const fn test_mm512_mask_srl_epi64() { #[rustfmt::skip] let a = _mm512_set_epi64( 1 << 0, 1 << 32, 1 << 32, 1 << 32, @@ -9359,7 +9359,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_maskz_srl_epi64() { + const fn test_mm512_maskz_srl_epi64() { #[rustfmt::skip] let a = _mm512_set_epi64( 1 << 32, 1 << 32, 1 << 32, 1 << 32, @@ -9374,7 +9374,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_mask_srl_epi64() { + const fn test_mm256_mask_srl_epi64() { let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); let count = _mm_set_epi64x(0, 1); let r = _mm256_mask_srl_epi64(a, 0, a, count); @@ -9385,7 +9385,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_maskz_srl_epi64() { + const fn test_mm256_maskz_srl_epi64() { let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); let count = _mm_set_epi64x(0, 1); let r = _mm256_maskz_srl_epi64(0, a, count); @@ -9396,7 +9396,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_mask_srl_epi64() { + const fn test_mm_mask_srl_epi64() { let a = _mm_set_epi64x(1 << 5, 0); let count = _mm_set_epi64x(0, 1); let r = _mm_mask_srl_epi64(a, 0, a, count); @@ -9407,7 +9407,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_maskz_srl_epi64() { + const fn test_mm_maskz_srl_epi64() { let a = _mm_set_epi64x(1 << 5, 0); let count = _mm_set_epi64x(0, 1); let r = _mm_maskz_srl_epi64(0, a, count); @@ -9418,7 +9418,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_sra_epi64() { + const fn test_mm512_sra_epi64() { let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); let count = _mm_set_epi64x(0, 2); let r = _mm512_sra_epi64(a, count); @@ -9427,7 +9427,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_mask_sra_epi64() { + const fn test_mm512_mask_sra_epi64() { let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); let count = _mm_set_epi64x(0, 2); let r = _mm512_mask_sra_epi64(a, 0, a, count); @@ -9438,7 +9438,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_maskz_sra_epi64() { + const fn test_mm512_maskz_sra_epi64() { let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); let count = _mm_set_epi64x(0, 2); let r = _mm512_maskz_sra_epi64(0, a, count); @@ -9449,7 +9449,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_sra_epi64() { + const fn test_mm256_sra_epi64() { let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); let count = _mm_set_epi64x(0, 1); let r = _mm256_sra_epi64(a, count); @@ -9458,7 +9458,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_mask_sra_epi64() { + const fn test_mm256_mask_sra_epi64() { let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); let count = _mm_set_epi64x(0, 1); let r = _mm256_mask_sra_epi64(a, 0, a, count); @@ -9469,7 +9469,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_maskz_sra_epi64() { + const fn test_mm256_maskz_sra_epi64() { let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); let count = _mm_set_epi64x(0, 1); let r = _mm256_maskz_sra_epi64(0, a, count); @@ -9480,7 +9480,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_sra_epi64() { + const fn test_mm_sra_epi64() { let a = _mm_set_epi64x(1 << 5, 0); let count = _mm_set_epi64x(0, 1); let r = _mm_sra_epi64(a, count); @@ -9489,7 +9489,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_mask_sra_epi64() { + const fn test_mm_mask_sra_epi64() { let a = _mm_set_epi64x(1 << 5, 0); let count = _mm_set_epi64x(0, 1); let r = _mm_mask_sra_epi64(a, 0, a, count); @@ -9500,7 +9500,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_maskz_sra_epi64() { + const fn test_mm_maskz_sra_epi64() { let a = _mm_set_epi64x(1 << 5, 0); let count = _mm_set_epi64x(0, 1); let r = _mm_maskz_sra_epi64(0, a, count);