From 6283fd634f50159e45dd41dcf35c0d18ec042648 Mon Sep 17 00:00:00 2001 From: okaneco <47607823+okaneco@users.noreply.github.com> Date: Thu, 19 Feb 2026 12:30:11 -0500 Subject: [PATCH 1/6] x86: use `simd::intrinsics` for shifts Use intrinsics in `sse2`, `avx2`, `avx512f`, and `avx512bw` Mark intrinsics and tests that call these functions as const --- crates/core_arch/src/x86/sse2.rs | 128 +++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 40 deletions(-) diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index fbf62c362f..6c9b78b353 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -577,8 +577,17 @@ pub const fn _mm_slli_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 16 { + _mm_setzero_si128() + } else { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shl(a.as_u16x8(), u16x8::splat(shift as u16)).as_m128i() + } + } } /// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros. @@ -609,8 +618,17 @@ pub const fn _mm_slli_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pslld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 32 { + _mm_setzero_si128() + } else { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shl(a.as_u32x4(), u32x4::splat(shift as u32)).as_m128i() + } + } } /// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros. @@ -641,8 +659,17 @@ pub const fn _mm_slli_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 64 { + _mm_setzero_si128() + } else { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shl(a.as_u64x2(), u64x2::splat(shift as u64)).as_m128i() + } + } } /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign @@ -668,8 +695,13 @@ pub const fn _mm_srai_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psraw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0].min(15); + unsafe { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shr(a.as_i16x8(), i16x8::splat(shift as i16)).as_m128i() + } } /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign @@ -695,8 +727,13 @@ pub const fn _mm_srai_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrad))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0].min(31); + unsafe { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shr(a.as_i32x4(), i32x4::splat(shift as i32)).as_m128i() + } } /// Shifts `a` right by `IMM8` bytes while shifting in zeros. @@ -780,8 +817,17 @@ pub const fn _mm_srli_epi16(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 16 { + _mm_setzero_si128() + } else { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shr(a.as_u16x8(), u16x8::splat(shift as u16)).as_m128i() + } + } } /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in @@ -813,8 +859,17 @@ pub const fn _mm_srli_epi32(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 32 { + _mm_setzero_si128() + } else { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shr(a.as_u32x4(), u32x4::splat(shift as u32)).as_m128i() + } + } } /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in @@ -846,8 +901,17 @@ pub const fn _mm_srli_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 64 { + _mm_setzero_si128() + } else { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shr(a.as_u64x2(), u64x2::splat(shift as u64)).as_m128i() + } + } } /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and @@ -3246,22 +3310,6 @@ unsafe extern "C" { fn pmaddwd(a: i16x8, b: i16x8) -> i32x4; #[link_name = "llvm.x86.sse2.psad.bw"] fn psadbw(a: u8x16, b: u8x16) -> u64x2; - #[link_name = "llvm.x86.sse2.psll.w"] - fn psllw(a: i16x8, count: i16x8) -> i16x8; - #[link_name = "llvm.x86.sse2.psll.d"] - fn pslld(a: i32x4, count: i32x4) -> i32x4; - #[link_name = "llvm.x86.sse2.psll.q"] - fn psllq(a: i64x2, count: i64x2) -> i64x2; - #[link_name = "llvm.x86.sse2.psra.w"] - fn psraw(a: i16x8, count: i16x8) -> i16x8; - #[link_name = "llvm.x86.sse2.psra.d"] - fn psrad(a: i32x4, count: i32x4) -> i32x4; - #[link_name = "llvm.x86.sse2.psrl.w"] - fn psrlw(a: i16x8, count: i16x8) -> i16x8; - #[link_name = "llvm.x86.sse2.psrl.d"] - fn psrld(a: i32x4, count: i32x4) -> i32x4; - #[link_name = "llvm.x86.sse2.psrl.q"] - fn psrlq(a: i64x2, count: i64x2) -> i64x2; #[link_name = "llvm.x86.sse2.cvtps2dq"] fn cvtps2dq(a: __m128) -> i32x4; #[link_name = "llvm.x86.sse2.maskmov.dqu"] @@ -3770,7 +3818,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_sll_epi16() { + const fn test_mm_sll_epi16() { let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4)); assert_eq_m128i( @@ -3795,7 +3843,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_sll_epi32() { + const fn test_mm_sll_epi32() { let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4)); assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0)); @@ -3817,7 +3865,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_sll_epi64() { + const fn test_mm_sll_epi64() { let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4)); assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0)); @@ -3842,7 +3890,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_sra_epi16() { + const fn test_mm_sra_epi16() { let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4)); assert_eq_m128i( @@ -3867,7 +3915,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_sra_epi32() { + const fn test_mm_sra_epi32() { let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4)); assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000)); @@ -3921,7 +3969,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_srl_epi16() { + const fn test_mm_srl_epi16() { let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF); let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4)); assert_eq_m128i( @@ -3946,7 +3994,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_srl_epi32() { + const fn test_mm_srl_epi32() { let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF); let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4)); assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000)); @@ -3968,7 +4016,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_srl_epi64() { + const fn test_mm_srl_epi64() { let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF); let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4)); assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000)); From 90ce0899c854e6fe8ccdb01dc6c71795fc99ad5a Mon Sep 17 00:00:00 2001 From: okaneco <47607823+okaneco@users.noreply.github.com> Date: Thu, 19 Feb 2026 12:55:30 -0500 Subject: [PATCH 2/6] Use intrinsics in `avx2` --- crates/core_arch/src/x86/avx2.rs | 128 +++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 40 deletions(-) diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index ca4ca9a2de..ef5bf6b556 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -2778,8 +2778,17 @@ pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 16 { + _mm256_setzero_si256() + } else { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shl(a.as_u16x16(), u16x16::splat(shift as u16)).as_m256i() + } + } } /// Shifts packed 32-bit integers in `a` left by `count` while @@ -2790,8 +2799,17 @@ pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpslld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 32 { + _mm256_setzero_si256() + } else { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shl(a.as_u32x8(), u32x8::splat(shift as u32)).as_m256i() + } + } } /// Shifts packed 64-bit integers in `a` left by `count` while @@ -2802,8 +2820,17 @@ pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 64 { + _mm256_setzero_si256() + } else { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shl(a.as_u64x4(), u64x4::splat(shift as u64)).as_m256i() + } + } } /// Shifts packed 16-bit integers in `a` left by `IMM8` while @@ -3030,8 +3057,13 @@ pub const fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsraw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0].min(15); + unsafe { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shr(a.as_i16x16(), i16x16::splat(shift as i16)).as_m256i() + } } /// Shifts packed 32-bit integers in `a` right by `count` while @@ -3042,8 +3074,13 @@ pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrad))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0].min(31); + unsafe { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shr(a.as_i32x8(), i32x8::splat(shift as i32)).as_m256i() + } } /// Shifts packed 16-bit integers in `a` right by `IMM8` while @@ -3197,8 +3234,17 @@ pub const fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 16 { + _mm256_setzero_si256() + } else { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shr(a.as_u16x16(), u16x16::splat(shift as u16)).as_m256i() + } + } } /// Shifts packed 32-bit integers in `a` right by `count` while shifting in @@ -3209,8 +3255,17 @@ pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrld))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 32 { + _mm256_setzero_si256() + } else { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shr(a.as_u32x8(), u32x8::splat(shift as u32)).as_m256i() + } + } } /// Shifts packed 64-bit integers in `a` right by `count` while shifting in @@ -3221,8 +3276,17 @@ pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlq))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 64 { + _mm256_setzero_si256() + } else { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shr(a.as_u64x4(), u64x4::splat(shift as u64)).as_m256i() + } + } } /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in @@ -3919,22 +3983,6 @@ unsafe extern "C" { fn psignw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.psign.d"] fn psignd(a: i32x8, b: i32x8) -> i32x8; - #[link_name = "llvm.x86.avx2.psll.w"] - fn psllw(a: i16x16, count: i16x8) -> i16x16; - #[link_name = "llvm.x86.avx2.psll.d"] - fn pslld(a: i32x8, count: i32x4) -> i32x8; - #[link_name = "llvm.x86.avx2.psll.q"] - fn psllq(a: i64x4, count: i64x2) -> i64x4; - #[link_name = "llvm.x86.avx2.psra.w"] - fn psraw(a: i16x16, count: i16x8) -> i16x16; - #[link_name = "llvm.x86.avx2.psra.d"] - fn psrad(a: i32x8, count: i32x4) -> i32x8; - #[link_name = "llvm.x86.avx2.psrl.w"] - fn psrlw(a: i16x16, count: i16x8) -> i16x16; - #[link_name = "llvm.x86.avx2.psrl.d"] - fn psrld(a: i32x8, count: i32x4) -> i32x8; - #[link_name = "llvm.x86.avx2.psrl.q"] - fn psrlq(a: i64x4, count: i64x2) -> i64x4; #[link_name = "llvm.x86.avx2.pshuf.b"] fn pshufb(a: u8x32, b: u8x32) -> u8x32; #[link_name = "llvm.x86.avx2.permd"] @@ -5184,7 +5232,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_sll_epi16() { + const fn test_mm256_sll_epi16() { let a = _mm256_set1_epi16(0xFF); let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4); let r = _mm256_sll_epi16(a, b); @@ -5192,7 +5240,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_sll_epi32() { + const fn test_mm256_sll_epi32() { let a = _mm256_set1_epi32(0xFFFF); let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4); let r = _mm256_sll_epi32(a, b); @@ -5200,7 +5248,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_sll_epi64() { + const fn test_mm256_sll_epi64() { let a = _mm256_set1_epi64x(0xFFFFFFFF); let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4); let r = _mm256_sll_epi64(a, b); @@ -5275,7 +5323,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_sra_epi16() { + const fn test_mm256_sra_epi16() { let a = _mm256_set1_epi16(-1); let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); let r = _mm256_sra_epi16(a, b); @@ -5283,7 +5331,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_sra_epi32() { + const fn test_mm256_sra_epi32() { let a = _mm256_set1_epi32(-1); let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1); let r = _mm256_sra_epi32(a, b); @@ -5345,7 +5393,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_srl_epi16() { + const fn test_mm256_srl_epi16() { let a = _mm256_set1_epi16(0xFF); let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4); let r = _mm256_srl_epi16(a, b); @@ -5353,7 +5401,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_srl_epi32() { + const fn test_mm256_srl_epi32() { let a = _mm256_set1_epi32(0xFFFF); let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4); let r = _mm256_srl_epi32(a, b); @@ -5361,7 +5409,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_srl_epi64() { + const fn test_mm256_srl_epi64() { let a = _mm256_set1_epi64x(0xFFFFFFFF); let b = _mm_setr_epi64x(4, 0); let r = _mm256_srl_epi64(a, b); From 1fd2c6b7fffbb7edf95a19a3a8a3995e2325b2b4 Mon Sep 17 00:00:00 2001 From: okaneco <47607823+okaneco@users.noreply.github.com> Date: Thu, 19 Feb 2026 13:17:48 -0500 Subject: [PATCH 3/6] Use intrinsics in `avx512bw` and add const to applicable fns/tests --- crates/core_arch/src/x86/avx512bw.rs | 95 +++++++++++++++++++--------- 1 file changed, 65 insertions(+), 30 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 78801e8902..c88618cc08 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7217,8 +7217,17 @@ pub const fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 16 { + _mm512_setzero_si512() + } else { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shl(a.as_u16x32(), u16x32::splat(shift as u16)).as_m512i() + } + } } /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7228,7 +7237,13 @@ pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_sll_epi16( + src: __m512i, + k: __mmask32, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_sll_epi16(a, count).as_i16x32(); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) @@ -7242,7 +7257,8 @@ pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m1 #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_sll_epi16(a, count).as_i16x32(); transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) @@ -7621,8 +7637,17 @@ pub const fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __ #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 16 { + _mm512_setzero_si512() + } else { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shr(a.as_u16x32(), u16x32::splat(shift as u16)).as_m512i() + } + } } /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7632,7 +7657,13 @@ pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_srl_epi16( + src: __m512i, + k: __mmask32, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_srl_epi16(a, count).as_i16x32(); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) @@ -7646,7 +7677,8 @@ pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m1 #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_srl_epi16(a, count).as_i16x32(); transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) @@ -8010,8 +8042,13 @@ pub const fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __ #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0].min(15); + unsafe { + // SAFETY: We checked above that the shift is less than 16 bits. + simd_shr(a.as_i16x32(), i16x32::splat(shift as i16)).as_m512i() + } } /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8021,7 +8058,13 @@ pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_sra_epi16( + src: __m512i, + k: __mmask32, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_sra_epi16(a, count).as_i16x32(); transmute(simd_select_bitmask(k, shf, src.as_i16x32())) @@ -8035,7 +8078,8 @@ pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m1 #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_sra_epi16(a, count).as_i16x32(); transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) @@ -12706,15 +12750,6 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.pmaddubs.w.512"] fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32; - #[link_name = "llvm.x86.avx512.psll.w.512"] - fn vpsllw(a: i16x32, count: i16x8) -> i16x32; - - #[link_name = "llvm.x86.avx512.psrl.w.512"] - fn vpsrlw(a: i16x32, count: i16x8) -> i16x32; - - #[link_name = "llvm.x86.avx512.psra.w.512"] - fn vpsraw(a: i16x32, count: i16x8) -> i16x32; - #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"] fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32; #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"] @@ -18334,7 +18369,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_sll_epi16() { + const fn test_mm512_sll_epi16() { let a = _mm512_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm512_sll_epi16(a, count); @@ -18343,7 +18378,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_mask_sll_epi16() { + const fn test_mm512_mask_sll_epi16() { let a = _mm512_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm512_mask_sll_epi16(a, 0, a, count); @@ -18354,7 +18389,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_maskz_sll_epi16() { + const fn test_mm512_maskz_sll_epi16() { let a = _mm512_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm512_maskz_sll_epi16(0, a, count); @@ -18570,7 +18605,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_srl_epi16() { + const fn test_mm512_srl_epi16() { let a = _mm512_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm512_srl_epi16(a, count); @@ -18579,7 +18614,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_mask_srl_epi16() { + const fn test_mm512_mask_srl_epi16() { let a = _mm512_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm512_mask_srl_epi16(a, 0, a, count); @@ -18590,7 +18625,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_maskz_srl_epi16() { + const fn test_mm512_maskz_srl_epi16() { let a = _mm512_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm512_maskz_srl_epi16(0, a, count); @@ -18806,7 +18841,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_sra_epi16() { + const fn test_mm512_sra_epi16() { let a = _mm512_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm512_sra_epi16(a, count); @@ -18815,7 +18850,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_mask_sra_epi16() { + const fn test_mm512_mask_sra_epi16() { let a = _mm512_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm512_mask_sra_epi16(a, 0, a, count); @@ -18826,7 +18861,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_maskz_sra_epi16() { + const fn test_mm512_maskz_sra_epi16() { let a = _mm512_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm512_maskz_sra_epi16(0, a, count); From cef4ad5bbe881341ae33a3e2c4813e8728fdde24 Mon Sep 17 00:00:00 2001 From: okaneco <47607823+okaneco@users.noreply.github.com> Date: Thu, 19 Feb 2026 13:51:26 -0500 Subject: [PATCH 4/6] Use intrinsics in `avx512f` and add const to applicable fns/tests --- crates/core_arch/src/x86/avx512f.rs | 208 ++++++++++++++++++------- crates/core_arch/src/x86_64/avx512f.rs | 30 ++-- 2 files changed, 163 insertions(+), 75 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 3730496e1e..7882509932 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -20652,8 +20652,17 @@ pub const fn _mm_maskz_srli_epi64(k: __mmask8, a: __m128i) -> _ #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 32 { + _mm512_setzero_si512() + } else { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shl(a.as_u32x16(), u32x16::splat(shift as u32)).as_m512i() + } + } } /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20663,7 +20672,13 @@ pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_sll_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_sll_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -20677,7 +20692,8 @@ pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m1 #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_sll_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) @@ -20747,8 +20763,17 @@ pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 32 { + _mm512_setzero_si512() + } else { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shr(a.as_u32x16(), u32x16::splat(shift as u32)).as_m512i() + } + } } /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20758,7 +20783,13 @@ pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_srl_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_srl_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -20772,7 +20803,8 @@ pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m1 #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_srl_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) @@ -20842,8 +20874,17 @@ pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 64 { + _mm512_setzero_si512() + } else { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shl(a.as_u64x8(), u64x8::splat(shift as u64)).as_m512i() + } + } } /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20853,7 +20894,13 @@ pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_sll_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_sll_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -20867,7 +20914,8 @@ pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m12 #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_sll_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) @@ -20937,8 +20985,17 @@ pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0]; + unsafe { + if shift >= 64 { + _mm512_setzero_si512() + } else { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shr(a.as_u64x8(), u64x8::splat(shift as u64)).as_m512i() + } + } } /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20948,7 +21005,13 @@ pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_srl_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_srl_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -20962,7 +21025,8 @@ pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m12 #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_srl_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) @@ -21032,8 +21096,13 @@ pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0].min(31); + unsafe { + // SAFETY: We checked above that the shift is less than 32 bits. + simd_shr(a.as_i32x16(), i32x16::splat(shift as i32)).as_m512i() + } } /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21043,7 +21112,13 @@ pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_sra_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_sra_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, src.as_i32x16())) @@ -21057,7 +21132,8 @@ pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m1 #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_sra_epi32(a, count).as_i32x16(); transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) @@ -21127,8 +21203,13 @@ pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { - unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { + let shift = count.as_u64x2().as_array()[0].min(63); + unsafe { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shr(a.as_i64x8(), i64x8::splat(shift as i64)).as_m512i() + } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21138,7 +21219,13 @@ pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_sra_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { unsafe { let shf = _mm512_sra_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, src.as_i64x8())) @@ -21152,7 +21239,8 @@ pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m12 #[target_feature(enable = "avx512f")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { unsafe { let shf = _mm512_sra_epi64(a, count).as_i64x8(); transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) @@ -21166,8 +21254,13 @@ pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i { - unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i { + let shift = count.as_u64x2().as_array()[0].min(63); + unsafe { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shr(a.as_i64x4(), i64x4::splat(shift as i64)).as_m256i() + } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21177,7 +21270,13 @@ pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i { #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_sra_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_sra_epi64(a, count).as_i64x4(); transmute(simd_select_bitmask(k, shf, src.as_i64x4())) @@ -21191,7 +21290,8 @@ pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_sra_epi64(a, count).as_i64x4(); transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) @@ -21205,8 +21305,13 @@ pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i { - unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i { + let shift = count.as_u64x2().as_array()[0].min(63); + unsafe { + // SAFETY: We checked above that the shift is less than 64 bits. + simd_shr(a.as_i64x2(), i64x2::splat(shift as i64)).as_m128i() + } } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21216,7 +21321,8 @@ pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i { #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sra_epi64(a, count).as_i64x2(); transmute(simd_select_bitmask(k, shf, src.as_i64x2())) @@ -21230,7 +21336,8 @@ pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraq))] -pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sra_epi64(a, count).as_i64x2(); transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) @@ -44774,25 +44881,6 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"] fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8; - #[link_name = "llvm.x86.avx512.psll.d.512"] - fn vpslld(a: i32x16, count: i32x4) -> i32x16; - #[link_name = "llvm.x86.avx512.psrl.d.512"] - fn vpsrld(a: i32x16, count: i32x4) -> i32x16; - #[link_name = "llvm.x86.avx512.psll.q.512"] - fn vpsllq(a: i64x8, count: i64x2) -> i64x8; - #[link_name = "llvm.x86.avx512.psrl.q.512"] - fn vpsrlq(a: i64x8, count: i64x2) -> i64x8; - - #[link_name = "llvm.x86.avx512.psra.d.512"] - fn vpsrad(a: i32x16, count: i32x4) -> i32x16; - - #[link_name = "llvm.x86.avx512.psra.q.512"] - fn vpsraq(a: i64x8, count: i64x2) -> i64x8; - #[link_name = "llvm.x86.avx512.psra.q.256"] - fn vpsraq256(a: i64x4, count: i64x2) -> i64x4; - #[link_name = "llvm.x86.avx512.psra.q.128"] - fn vpsraq128(a: i64x2, count: i64x2) -> i64x2; - #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"] fn vpermilps(a: f32x16, b: i32x16) -> f32x16; #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"] @@ -54649,7 +54737,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_sll_epi32() { + const fn test_mm512_sll_epi32() { #[rustfmt::skip] let a = _mm512_set_epi32( 1 << 31, 1 << 0, 1 << 1, 1 << 2, @@ -54670,7 +54758,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_mask_sll_epi32() { + const fn test_mm512_mask_sll_epi32() { #[rustfmt::skip] let a = _mm512_set_epi32( 1 << 31, 1 << 0, 1 << 1, 1 << 2, @@ -54693,7 +54781,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_maskz_sll_epi32() { + const fn test_mm512_maskz_sll_epi32() { #[rustfmt::skip] let a = _mm512_set_epi32( 1 << 31, 1 << 0, 1 << 1, 1 << 2, @@ -54754,7 +54842,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_srl_epi32() { + const fn test_mm512_srl_epi32() { #[rustfmt::skip] let a = _mm512_set_epi32( 1 << 31, 1 << 0, 1 << 1, 1 << 2, @@ -54769,7 +54857,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_mask_srl_epi32() { + const fn test_mm512_mask_srl_epi32() { #[rustfmt::skip] let a = _mm512_set_epi32( 1 << 31, 1 << 0, 1 << 1, 1 << 2, @@ -54786,7 +54874,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_maskz_srl_epi32() { + const fn test_mm512_maskz_srl_epi32() { #[rustfmt::skip] let a = _mm512_set_epi32( 1 << 31, 1 << 0, 1 << 1, 1 << 2, @@ -54847,7 +54935,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_sra_epi32() { + const fn test_mm512_sra_epi32() { let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); let count = _mm_set_epi32(1, 0, 0, 2); let r = _mm512_sra_epi32(a, count); @@ -54856,7 +54944,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_mask_sra_epi32() { + const fn test_mm512_mask_sra_epi32() { let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16); let count = _mm_set_epi32(0, 0, 0, 2); let r = _mm512_mask_sra_epi32(a, 0, a, count); @@ -54867,7 +54955,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_maskz_sra_epi32() { + const fn test_mm512_maskz_sra_epi32() { let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14); let count = _mm_set_epi32(2, 0, 0, 2); let r = _mm512_maskz_sra_epi32(0, a, count); diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 0fd9b09363..bf3f7a554e 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -9225,7 +9225,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_sll_epi64() { + const fn test_mm512_sll_epi64() { #[rustfmt::skip] let a = _mm512_set_epi64( 1 << 63, 1 << 32, 1 << 32, 1 << 32, @@ -9245,7 +9245,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_mask_sll_epi64() { + const fn test_mm512_mask_sll_epi64() { #[rustfmt::skip] let a = _mm512_set_epi64( 1 << 63, 1 << 32, 1 << 32, 1 << 32, @@ -9264,7 +9264,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_maskz_sll_epi64() { + const fn test_mm512_maskz_sll_epi64() { #[rustfmt::skip] let a = _mm512_set_epi64( 1 << 32, 1 << 32, 1 << 32, 1 << 32, @@ -9323,7 +9323,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_srl_epi64() { + const fn test_mm512_srl_epi64() { #[rustfmt::skip] let a = _mm512_set_epi64( 1 << 0, 1 << 32, 1 << 32, 1 << 32, @@ -9340,7 +9340,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_mask_srl_epi64() { + const fn test_mm512_mask_srl_epi64() { #[rustfmt::skip] let a = _mm512_set_epi64( 1 << 0, 1 << 32, 1 << 32, 1 << 32, @@ -9359,7 +9359,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_maskz_srl_epi64() { + const fn test_mm512_maskz_srl_epi64() { #[rustfmt::skip] let a = _mm512_set_epi64( 1 << 32, 1 << 32, 1 << 32, 1 << 32, @@ -9418,7 +9418,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_sra_epi64() { + const fn test_mm512_sra_epi64() { let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); let count = _mm_set_epi64x(0, 2); let r = _mm512_sra_epi64(a, count); @@ -9427,7 +9427,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_mask_sra_epi64() { + const fn test_mm512_mask_sra_epi64() { let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); let count = _mm_set_epi64x(0, 2); let r = _mm512_mask_sra_epi64(a, 0, a, count); @@ -9438,7 +9438,7 @@ mod tests { } #[simd_test(enable = "avx512f")] - fn test_mm512_maskz_sra_epi64() { + const fn test_mm512_maskz_sra_epi64() { let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); let count = _mm_set_epi64x(0, 2); let r = _mm512_maskz_sra_epi64(0, a, count); @@ -9449,7 +9449,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_sra_epi64() { + const fn test_mm256_sra_epi64() { let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); let count = _mm_set_epi64x(0, 1); let r = _mm256_sra_epi64(a, count); @@ -9458,7 +9458,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_mask_sra_epi64() { + const fn test_mm256_mask_sra_epi64() { let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); let count = _mm_set_epi64x(0, 1); let r = _mm256_mask_sra_epi64(a, 0, a, count); @@ -9469,7 +9469,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_maskz_sra_epi64() { + const fn test_mm256_maskz_sra_epi64() { let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); let count = _mm_set_epi64x(0, 1); let r = _mm256_maskz_sra_epi64(0, a, count); @@ -9480,7 +9480,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_sra_epi64() { + const fn test_mm_sra_epi64() { let a = _mm_set_epi64x(1 << 5, 0); let count = _mm_set_epi64x(0, 1); let r = _mm_sra_epi64(a, count); @@ -9489,7 +9489,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_mask_sra_epi64() { + const fn test_mm_mask_sra_epi64() { let a = _mm_set_epi64x(1 << 5, 0); let count = _mm_set_epi64x(0, 1); let r = _mm_mask_sra_epi64(a, 0, a, count); @@ -9500,7 +9500,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_maskz_sra_epi64() { + const fn test_mm_maskz_sra_epi64() { let a = _mm_set_epi64x(1 << 5, 0); let count = _mm_set_epi64x(0, 1); let r = _mm_maskz_sra_epi64(0, a, count); From 227af9bc3955ea7ae1da453d61c3bf3fe115d3ba Mon Sep 17 00:00:00 2001 From: okaneco <47607823+okaneco@users.noreply.github.com> Date: Thu, 19 Feb 2026 14:20:18 -0500 Subject: [PATCH 5/6] Mark intrinsics and tests that call `sse2` shifts as const --- crates/core_arch/src/x86/avx512bw.rs | 30 ++++++++++-------- crates/core_arch/src/x86/avx512f.rs | 42 ++++++++++++++++---------- crates/core_arch/src/x86_64/avx512f.rs | 8 ++--- 3 files changed, 48 insertions(+), 32 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index c88618cc08..67c6c56473 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7300,7 +7300,8 @@ pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m25 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sll_epi16(a, count).as_i16x8(); transmute(simd_select_bitmask(k, shf, src.as_i16x8())) @@ -7314,7 +7315,8 @@ pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sll_epi16(a, count).as_i16x8(); transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) @@ -7720,7 +7722,8 @@ pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m25 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_srl_epi16(a, count).as_i16x8(); transmute(simd_select_bitmask(k, shf, src.as_i16x8())) @@ -7734,7 +7737,8 @@ pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_srl_epi16(a, count).as_i16x8(); transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) @@ -8121,7 +8125,8 @@ pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m25 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sra_epi16(a, count).as_i16x8(); transmute(simd_select_bitmask(k, shf, src.as_i16x8())) @@ -8135,7 +8140,8 @@ pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sra_epi16(a, count).as_i16x8(); transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) @@ -18422,7 +18428,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_mask_sll_epi16() { + const fn test_mm_mask_sll_epi16() { let a = _mm_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm_mask_sll_epi16(a, 0, a, count); @@ -18433,7 +18439,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_maskz_sll_epi16() { + const fn test_mm_maskz_sll_epi16() { let a = _mm_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm_maskz_sll_epi16(0, a, count); @@ -18658,7 +18664,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_mask_srl_epi16() { + const fn test_mm_mask_srl_epi16() { let a = _mm_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm_mask_srl_epi16(a, 0, a, count); @@ -18669,7 +18675,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_maskz_srl_epi16() { + const fn test_mm_maskz_srl_epi16() { let a = _mm_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm_maskz_srl_epi16(0, a, count); @@ -18894,7 +18900,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_mask_sra_epi16() { + const fn test_mm_mask_sra_epi16() { let a = _mm_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm_mask_sra_epi16(a, 0, a, count); @@ -18905,7 +18911,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_maskz_sra_epi16() { + const fn test_mm_maskz_sra_epi16() { let a = _mm_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm_maskz_sra_epi16(0, a, count); diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 7882509932..e3fe97f3be 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -20735,7 +20735,8 @@ pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sll_epi32(a, count).as_i32x4(); transmute(simd_select_bitmask(k, shf, src.as_i32x4())) @@ -20749,7 +20750,8 @@ pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sll_epi32(a, count).as_i32x4(); transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) @@ -20846,7 +20848,8 @@ pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_srl_epi32(a, count).as_i32x4(); transmute(simd_select_bitmask(k, shf, src.as_i32x4())) @@ -20860,7 +20863,8 @@ pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_srl_epi32(a, count).as_i32x4(); transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) @@ -20957,7 +20961,8 @@ pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sll_epi64(a, count).as_i64x2(); transmute(simd_select_bitmask(k, shf, src.as_i64x2())) @@ -20971,7 +20976,8 @@ pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sll_epi64(a, count).as_i64x2(); transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) @@ -21068,7 +21074,8 @@ pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_srl_epi64(a, count).as_i64x2(); transmute(simd_select_bitmask(k, shf, src.as_i64x2())) @@ -21082,7 +21089,8 @@ pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_srl_epi64(a, count).as_i64x2(); transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) @@ -21175,7 +21183,8 @@ pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sra_epi32(a, count).as_i32x4(); transmute(simd_select_bitmask(k, shf, src.as_i32x4())) @@ -21189,7 +21198,8 @@ pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { unsafe { let shf = _mm_sra_epi32(a, count).as_i32x4(); transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) @@ -54820,7 +54830,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_mask_sll_epi32() { + const fn test_mm_mask_sll_epi32() { let a = _mm_set_epi32(1 << 13, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm_mask_sll_epi32(a, 0, a, count); @@ -54831,7 +54841,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_maskz_sll_epi32() { + const fn test_mm_maskz_sll_epi32() { let a = _mm_set_epi32(1 << 13, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm_maskz_sll_epi32(0, a, count); @@ -54913,7 +54923,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_mask_srl_epi32() { + const fn test_mm_mask_srl_epi32() { let a = _mm_set_epi32(1 << 5, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm_mask_srl_epi32(a, 0, a, count); @@ -54924,7 +54934,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_maskz_srl_epi32() { + const fn test_mm_maskz_srl_epi32() { let a = _mm_set_epi32(1 << 5, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm_maskz_srl_epi32(0, a, count); @@ -54988,7 +54998,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_mask_sra_epi32() { + const fn test_mm_mask_sra_epi32() { let a = _mm_set_epi32(1 << 5, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm_mask_sra_epi32(a, 0, a, count); @@ -54999,7 +55009,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_maskz_sra_epi32() { + const fn test_mm_maskz_sra_epi32() { let a = _mm_set_epi32(1 << 5, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm_maskz_sra_epi32(0, a, count); diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index bf3f7a554e..6f9db8610f 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -9301,7 +9301,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_mask_sll_epi64() { + const fn test_mm_mask_sll_epi64() { let a = _mm_set_epi64x(1 << 63, 1 << 32); let count = _mm_set_epi64x(0, 1); let r = _mm_mask_sll_epi64(a, 0, a, count); @@ -9312,7 +9312,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_maskz_sll_epi64() { + const fn test_mm_maskz_sll_epi64() { let a = _mm_set_epi64x(1 << 63, 1 << 32); let count = _mm_set_epi64x(0, 1); let r = _mm_maskz_sll_epi64(0, a, count); @@ -9396,7 +9396,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_mask_srl_epi64() { + const fn test_mm_mask_srl_epi64() { let a = _mm_set_epi64x(1 << 5, 0); let count = _mm_set_epi64x(0, 1); let r = _mm_mask_srl_epi64(a, 0, a, count); @@ -9407,7 +9407,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm_maskz_srl_epi64() { + const fn test_mm_maskz_srl_epi64() { let a = _mm_set_epi64x(1 << 5, 0); let count = _mm_set_epi64x(0, 1); let r = _mm_maskz_srl_epi64(0, a, count); From dda4463e2cd2a8ec20687f83fb18d893d7194f0b Mon Sep 17 00:00:00 2001 From: okaneco <47607823+okaneco@users.noreply.github.com> Date: Thu, 19 Feb 2026 14:36:28 -0500 Subject: [PATCH 6/6] Mark intrinsics and tests that call `avx2` shifts as const --- crates/core_arch/src/x86/avx512bw.rs | 45 ++++++++++++----- crates/core_arch/src/x86/avx512f.rs | 67 ++++++++++++++++++++------ crates/core_arch/src/x86_64/avx512f.rs | 8 +-- 3 files changed, 88 insertions(+), 32 deletions(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 67c6c56473..308b4fa460 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -7272,7 +7272,13 @@ pub const fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_sll_epi16( + src: __m256i, + k: __mmask16, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_sll_epi16(a, count).as_i16x16(); transmute(simd_select_bitmask(k, shf, src.as_i16x16())) @@ -7286,7 +7292,8 @@ pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllw))] -pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_sll_epi16(a, count).as_i16x16(); transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) @@ -7694,7 +7701,13 @@ pub const fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_srl_epi16( + src: __m256i, + k: __mmask16, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_srl_epi16(a, count).as_i16x16(); transmute(simd_select_bitmask(k, shf, src.as_i16x16())) @@ -7708,7 +7721,8 @@ pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlw))] -pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_srl_epi16(a, count).as_i16x16(); transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) @@ -8097,7 +8111,13 @@ pub const fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_sra_epi16( + src: __m256i, + k: __mmask16, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_sra_epi16(a, count).as_i16x16(); transmute(simd_select_bitmask(k, shf, src.as_i16x16())) @@ -8111,7 +8131,8 @@ pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m1 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsraw))] -pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_sra_epi16(a, count).as_i16x16(); transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) @@ -18406,7 +18427,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_mask_sll_epi16() { + const fn test_mm256_mask_sll_epi16() { let a = _mm256_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm256_mask_sll_epi16(a, 0, a, count); @@ -18417,7 +18438,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_maskz_sll_epi16() { + const fn test_mm256_maskz_sll_epi16() { let a = _mm256_set1_epi16(1 << 15); let count = _mm_set1_epi16(2); let r = _mm256_maskz_sll_epi16(0, a, count); @@ -18642,7 +18663,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_mask_srl_epi16() { + const fn test_mm256_mask_srl_epi16() { let a = _mm256_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm256_mask_srl_epi16(a, 0, a, count); @@ -18653,7 +18674,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_maskz_srl_epi16() { + const fn test_mm256_maskz_srl_epi16() { let a = _mm256_set1_epi16(1 << 1); let count = _mm_set1_epi16(2); let r = _mm256_maskz_srl_epi16(0, a, count); @@ -18878,7 +18899,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_mask_sra_epi16() { + const fn test_mm256_mask_sra_epi16() { let a = _mm256_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm256_mask_sra_epi16(a, 0, a, count); @@ -18889,7 +18910,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_maskz_sra_epi16() { + const fn test_mm256_maskz_sra_epi16() { let a = _mm256_set1_epi16(8); let count = _mm_set1_epi16(1); let r = _mm256_maskz_sra_epi16(0, a, count); diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index e3fe97f3be..a0843686b4 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -20707,7 +20707,13 @@ pub const fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_sll_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_sll_epi32(a, count).as_i32x8(); transmute(simd_select_bitmask(k, shf, src.as_i32x8())) @@ -20721,7 +20727,8 @@ pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpslld))] -pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_sll_epi32(a, count).as_i32x8(); transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) @@ -20820,7 +20827,13 @@ pub const fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_srl_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_srl_epi32(a, count).as_i32x8(); transmute(simd_select_bitmask(k, shf, src.as_i32x8())) @@ -20834,7 +20847,8 @@ pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrld))] -pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_srl_epi32(a, count).as_i32x8(); transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) @@ -20933,7 +20947,13 @@ pub const fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_sll_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_sll_epi64(a, count).as_i64x4(); transmute(simd_select_bitmask(k, shf, src.as_i64x4())) @@ -20947,7 +20967,8 @@ pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsllq))] -pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_sll_epi64(a, count).as_i64x4(); transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) @@ -21046,7 +21067,13 @@ pub const fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_srl_epi64( + src: __m256i, + k: __mmask8, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_srl_epi64(a, count).as_i64x4(); transmute(simd_select_bitmask(k, shf, src.as_i64x4())) @@ -21060,7 +21087,8 @@ pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrlq))] -pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_srl_epi64(a, count).as_i64x4(); transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) @@ -21155,7 +21183,13 @@ pub const fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_sra_epi32( + src: __m256i, + k: __mmask8, + a: __m256i, + count: __m128i, +) -> __m256i { unsafe { let shf = _mm256_sra_epi32(a, count).as_i32x8(); transmute(simd_select_bitmask(k, shf, src.as_i32x8())) @@ -21169,7 +21203,8 @@ pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m12 #[target_feature(enable = "avx512f,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpsrad))] -pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { unsafe { let shf = _mm256_sra_epi32(a, count).as_i32x8(); transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) @@ -54808,7 +54843,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_mask_sll_epi32() { + const fn test_mm256_mask_sll_epi32() { let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm256_mask_sll_epi32(a, 0, a, count); @@ -54819,7 +54854,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_maskz_sll_epi32() { + const fn test_mm256_maskz_sll_epi32() { let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm256_maskz_sll_epi32(0, a, count); @@ -54901,7 +54936,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_mask_srl_epi32() { + const fn test_mm256_mask_srl_epi32() { let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm256_mask_srl_epi32(a, 0, a, count); @@ -54912,7 +54947,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_maskz_srl_epi32() { + const fn test_mm256_maskz_srl_epi32() { let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm256_maskz_srl_epi32(0, a, count); @@ -54976,7 +55011,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_mask_sra_epi32() { + const fn test_mm256_mask_sra_epi32() { let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm256_mask_sra_epi32(a, 0, a, count); @@ -54987,7 +55022,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_maskz_sra_epi32() { + const fn test_mm256_maskz_sra_epi32() { let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0); let count = _mm_set_epi32(0, 0, 0, 1); let r = _mm256_maskz_sra_epi32(0, a, count); diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 6f9db8610f..2b7a3f35a3 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -9279,7 +9279,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_mask_sll_epi64() { + const fn test_mm256_mask_sll_epi64() { let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); let count = _mm_set_epi64x(0, 1); let r = _mm256_mask_sll_epi64(a, 0, a, count); @@ -9290,7 +9290,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_maskz_sll_epi64() { + const fn test_mm256_maskz_sll_epi64() { let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); let count = _mm_set_epi64x(0, 1); let r = _mm256_maskz_sll_epi64(0, a, count); @@ -9374,7 +9374,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_mask_srl_epi64() { + const fn test_mm256_mask_srl_epi64() { let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); let count = _mm_set_epi64x(0, 1); let r = _mm256_mask_srl_epi64(a, 0, a, count); @@ -9385,7 +9385,7 @@ mod tests { } #[simd_test(enable = "avx512f,avx512vl")] - fn test_mm256_maskz_srl_epi64() { + const fn test_mm256_maskz_srl_epi64() { let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); let count = _mm_set_epi64x(0, 1); let r = _mm256_maskz_srl_epi64(0, a, count);