From 6283fd634f50159e45dd41dcf35c0d18ec042648 Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Thu, 19 Feb 2026 12:30:11 -0500
Subject: [PATCH 1/6] x86: use `simd::intrinsics` for shifts

Use intrinsics in `sse2`, `avx2`, `avx512f`, and `avx512bw`
Mark intrinsics and tests that call these functions as const
---
 crates/core_arch/src/x86/sse2.rs | 128 +++++++++++++++++++++----------
 1 file changed, 88 insertions(+), 40 deletions(-)
diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs
index fbf62c362f..6c9b78b353 100644
--- a/crates/core_arch/src/x86/sse2.rs
+++ b/crates/core_arch/src/x86/sse2.rs
@@ -577,8 +577,17 @@ pub const fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psllw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(psllw(a.as_i16x8(), count.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 16 {
+            _mm_setzero_si128()
+        } else {
+            // SAFETY: We checked above that the shift is less than 16 bits.
+            simd_shl(a.as_u16x8(), u16x8::splat(shift as u16)).as_m128i()
+        }
+    }
 }
 
 /// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros.
@@ -609,8 +618,17 @@ pub const fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pslld))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(pslld(a.as_i32x4(), count.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 32 {
+            _mm_setzero_si128()
+        } else {
+            // SAFETY: We checked above that the shift is less than 32 bits.
+            simd_shl(a.as_u32x4(), u32x4::splat(shift as u32)).as_m128i()
+        }
+    }
 }
 
 /// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros.
@@ -641,8 +659,17 @@ pub const fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psllq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(psllq(a.as_i64x2(), count.as_i64x2())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 64 {
+            _mm_setzero_si128()
+        } else {
+            // SAFETY: We checked above that the shift is less than 64 bits.
+            simd_shl(a.as_u64x2(), u64x2::splat(shift as u64)).as_m128i()
+        }
+    }
 }
 
 /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign
@@ -668,8 +695,13 @@ pub const fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psraw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(psraw(a.as_i16x8(), count.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
+    let shift = count.as_u64x2().as_array()[0].min(15);
+    unsafe {
+        // SAFETY: We checked above that the shift is less than 16 bits.
+        simd_shr(a.as_i16x8(), i16x8::splat(shift as i16)).as_m128i()
+    }
 }
 
 /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign
@@ -695,8 +727,13 @@ pub const fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrad))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(psrad(a.as_i32x4(), count.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
+    let shift = count.as_u64x2().as_array()[0].min(31);
+    unsafe {
+        // SAFETY: We checked above that the shift is less than 32 bits.
+        simd_shr(a.as_i32x4(), i32x4::splat(shift as i32)).as_m128i()
+    }
 }
 
 /// Shifts `a` right by `IMM8` bytes while shifting in zeros.
@@ -780,8 +817,17 @@ pub const fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrlw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 16 {
+            _mm_setzero_si128()
+        } else {
+            // SAFETY: We checked above that the shift is less than 16 bits.
+            simd_shr(a.as_u16x8(), u16x8::splat(shift as u16)).as_m128i()
+        }
+    }
 }
 
 /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
@@ -813,8 +859,17 @@ pub const fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrld))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(psrld(a.as_i32x4(), count.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 32 {
+            _mm_setzero_si128()
+        } else {
+            // SAFETY: We checked above that the shift is less than 32 bits.
+            simd_shr(a.as_u32x4(), u32x4::splat(shift as u32)).as_m128i()
+        }
+    }
 }
 
 /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
@@ -846,8 +901,17 @@ pub const fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psrlq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 64 {
+            _mm_setzero_si128()
+        } else {
+            // SAFETY: We checked above that the shift is less than 64 bits.
+            simd_shr(a.as_u64x2(), u64x2::splat(shift as u64)).as_m128i()
+        }
+    }
 }
 
 /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
@@ -3246,22 +3310,6 @@ unsafe extern "C" {
     fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
     #[link_name = "llvm.x86.sse2.psad.bw"]
     fn psadbw(a: u8x16, b: u8x16) -> u64x2;
-    #[link_name = "llvm.x86.sse2.psll.w"]
-    fn psllw(a: i16x8, count: i16x8) -> i16x8;
-    #[link_name = "llvm.x86.sse2.psll.d"]
-    fn pslld(a: i32x4, count: i32x4) -> i32x4;
-    #[link_name = "llvm.x86.sse2.psll.q"]
-    fn psllq(a: i64x2, count: i64x2) -> i64x2;
-    #[link_name = "llvm.x86.sse2.psra.w"]
-    fn psraw(a: i16x8, count: i16x8) -> i16x8;
-    #[link_name = "llvm.x86.sse2.psra.d"]
-    fn psrad(a: i32x4, count: i32x4) -> i32x4;
-    #[link_name = "llvm.x86.sse2.psrl.w"]
-    fn psrlw(a: i16x8, count: i16x8) -> i16x8;
-    #[link_name = "llvm.x86.sse2.psrl.d"]
-    fn psrld(a: i32x4, count: i32x4) -> i32x4;
-    #[link_name = "llvm.x86.sse2.psrl.q"]
-    fn psrlq(a: i64x2, count: i64x2) -> i64x2;
     #[link_name = "llvm.x86.sse2.cvtps2dq"]
     fn cvtps2dq(a: __m128) -> i32x4;
     #[link_name = "llvm.x86.sse2.maskmov.dqu"]
@@ -3770,7 +3818,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    fn test_mm_sll_epi16() {
+    const fn test_mm_sll_epi16() {
         let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
         let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(
@@ -3795,7 +3843,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    fn test_mm_sll_epi32() {
+    const fn test_mm_sll_epi32() {
         let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
         let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
@@ -3817,7 +3865,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    fn test_mm_sll_epi64() {
+    const fn test_mm_sll_epi64() {
         let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
         let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
@@ -3842,7 +3890,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    fn test_mm_sra_epi16() {
+    const fn test_mm_sra_epi16() {
         let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
         let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(
@@ -3867,7 +3915,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    fn test_mm_sra_epi32() {
+    const fn test_mm_sra_epi32() {
         let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
         let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
@@ -3921,7 +3969,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    fn test_mm_srl_epi16() {
+    const fn test_mm_srl_epi16() {
         let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
         let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(
@@ -3946,7 +3994,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    fn test_mm_srl_epi32() {
+    const fn test_mm_srl_epi32() {
         let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
         let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
@@ -3968,7 +4016,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    fn test_mm_srl_epi64() {
+    const fn test_mm_srl_epi64() {
         let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
         let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));

From 90ce0899c854e6fe8ccdb01dc6c71795fc99ad5a Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Thu, 19 Feb 2026 12:55:30 -0500
Subject: [PATCH 2/6] Use intrinsics in `avx2`

---
 crates/core_arch/src/x86/avx2.rs | 128 +++++++++++++++++++++----------
 1 file changed, 88 insertions(+), 40 deletions(-)

diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs
index ca4ca9a2de..ef5bf6b556 100644
--- a/crates/core_arch/src/x86/avx2.rs
+++ b/crates/core_arch/src/x86/avx2.rs
@@ -2778,8 +2778,17 @@ pub fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
-    unsafe { transmute(psllw(a.as_i16x16(), count.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 16 {
+            _mm256_setzero_si256()
+        } else {
+            // SAFETY: We checked above that the shift is less than 16 bits.
+            simd_shl(a.as_u16x16(), u16x16::splat(shift as u16)).as_m256i()
+        }
+    }
 }
 
 /// Shifts packed 32-bit integers in `a` left by `count` while
@@ -2790,8 +2799,17 @@ pub fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpslld))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
-    unsafe { transmute(pslld(a.as_i32x8(), count.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 32 {
+            _mm256_setzero_si256()
+        } else {
+            // SAFETY: We checked above that the shift is less than 32 bits.
+            simd_shl(a.as_u32x8(), u32x8::splat(shift as u32)).as_m256i()
+        }
+    }
 }
 
 /// Shifts packed 64-bit integers in `a` left by `count` while
@@ -2802,8 +2820,17 @@ pub fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
-    unsafe { transmute(psllq(a.as_i64x4(), count.as_i64x2())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 64 {
+            _mm256_setzero_si256()
+        } else {
+            // SAFETY: We checked above that the shift is less than 64 bits.
+            simd_shl(a.as_u64x4(), u64x4::splat(shift as u64)).as_m256i()
+        }
+    }
 }
 
 /// Shifts packed 16-bit integers in `a` left by `IMM8` while
@@ -3030,8 +3057,13 @@ pub const fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsraw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
-    unsafe { transmute(psraw(a.as_i16x16(), count.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
+    let shift = count.as_u64x2().as_array()[0].min(15);
+    unsafe {
+        // SAFETY: We checked above that the shift is less than 16 bits.
+        simd_shr(a.as_i16x16(), i16x16::splat(shift as i16)).as_m256i()
+    }
 }
 
 /// Shifts packed 32-bit integers in `a` right by `count` while
@@ -3042,8 +3074,13 @@ pub fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrad))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
-    unsafe { transmute(psrad(a.as_i32x8(), count.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
+    let shift = count.as_u64x2().as_array()[0].min(31);
+    unsafe {
+        // SAFETY: We checked above that the shift is less than 32 bits.
+        simd_shr(a.as_i32x8(), i32x8::splat(shift as i32)).as_m256i()
+    }
 }
 
 /// Shifts packed 16-bit integers in `a` right by `IMM8` while
@@ -3197,8 +3234,17 @@ pub const fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
-    unsafe { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 16 {
+            _mm256_setzero_si256()
+        } else {
+            // SAFETY: We checked above that the shift is less than 16 bits.
+            simd_shr(a.as_u16x16(), u16x16::splat(shift as u16)).as_m256i()
+        }
+    }
 }
 
 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in
@@ -3209,8 +3255,17 @@ pub fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrld))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
-    unsafe { transmute(psrld(a.as_i32x8(), count.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 32 {
+            _mm256_setzero_si256()
+        } else {
+            // SAFETY: We checked above that the shift is less than 32 bits.
+            simd_shr(a.as_u32x8(), u32x8::splat(shift as u32)).as_m256i()
+        }
+    }
 }
 
 /// Shifts packed 64-bit integers in `a` right by `count` while shifting in
@@ -3221,8 +3276,17 @@ pub fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
-    unsafe { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 64 {
+            _mm256_setzero_si256()
+        } else {
+            // SAFETY: We checked above that the shift is less than 64 bits.
+            simd_shr(a.as_u64x4(), u64x4::splat(shift as u64)).as_m256i()
+        }
+    }
 }
 
 /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
@@ -3919,22 +3983,6 @@ unsafe extern "C" {
     fn psignw(a: i16x16, b: i16x16) -> i16x16;
     #[link_name = "llvm.x86.avx2.psign.d"]
     fn psignd(a: i32x8, b: i32x8) -> i32x8;
-    #[link_name = "llvm.x86.avx2.psll.w"]
-    fn psllw(a: i16x16, count: i16x8) -> i16x16;
-    #[link_name = "llvm.x86.avx2.psll.d"]
-    fn pslld(a: i32x8, count: i32x4) -> i32x8;
-    #[link_name = "llvm.x86.avx2.psll.q"]
-    fn psllq(a: i64x4, count: i64x2) -> i64x4;
-    #[link_name = "llvm.x86.avx2.psra.w"]
-    fn psraw(a: i16x16, count: i16x8) -> i16x16;
-    #[link_name = "llvm.x86.avx2.psra.d"]
-    fn psrad(a: i32x8, count: i32x4) -> i32x8;
-    #[link_name = "llvm.x86.avx2.psrl.w"]
-    fn psrlw(a: i16x16, count: i16x8) -> i16x16;
-    #[link_name = "llvm.x86.avx2.psrl.d"]
-    fn psrld(a: i32x8, count: i32x4) -> i32x8;
-    #[link_name = "llvm.x86.avx2.psrl.q"]
-    fn psrlq(a: i64x4, count: i64x2) -> i64x4;
     #[link_name = "llvm.x86.avx2.pshuf.b"]
     fn pshufb(a: u8x32, b: u8x32) -> u8x32;
     #[link_name = "llvm.x86.avx2.permd"]
@@ -5184,7 +5232,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    fn test_mm256_sll_epi16() {
+    const fn test_mm256_sll_epi16() {
         let a = _mm256_set1_epi16(0xFF);
         let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
         let r = _mm256_sll_epi16(a, b);
@@ -5192,7 +5240,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    fn test_mm256_sll_epi32() {
+    const fn test_mm256_sll_epi32() {
         let a = _mm256_set1_epi32(0xFFFF);
         let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
         let r = _mm256_sll_epi32(a, b);
@@ -5200,7 +5248,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    fn test_mm256_sll_epi64() {
+    const fn test_mm256_sll_epi64() {
         let a = _mm256_set1_epi64x(0xFFFFFFFF);
         let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
         let r = _mm256_sll_epi64(a, b);
@@ -5275,7 +5323,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    fn test_mm256_sra_epi16() {
+    const fn test_mm256_sra_epi16() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
         let r = _mm256_sra_epi16(a, b);
@@ -5283,7 +5331,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    fn test_mm256_sra_epi32() {
+    const fn test_mm256_sra_epi32() {
         let a = _mm256_set1_epi32(-1);
         let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
         let r = _mm256_sra_epi32(a, b);
@@ -5345,7 +5393,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    fn test_mm256_srl_epi16() {
+    const fn test_mm256_srl_epi16() {
         let a = _mm256_set1_epi16(0xFF);
         let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
         let r = _mm256_srl_epi16(a, b);
@@ -5353,7 +5401,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    fn test_mm256_srl_epi32() {
+    const fn test_mm256_srl_epi32() {
         let a = _mm256_set1_epi32(0xFFFF);
         let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
         let r = _mm256_srl_epi32(a, b);
@@ -5361,7 +5409,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    fn test_mm256_srl_epi64() {
+    const fn test_mm256_srl_epi64() {
         let a = _mm256_set1_epi64x(0xFFFFFFFF);
         let b = _mm_setr_epi64x(4, 0);
         let r = _mm256_srl_epi64(a, b);

From 1fd2c6b7fffbb7edf95a19a3a8a3995e2325b2b4 Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Thu, 19 Feb 2026 13:17:48 -0500
Subject: [PATCH 3/6] Use intrinsics in `avx512bw` and add const to applicable
 fns/tests

---
 crates/core_arch/src/x86/avx512bw.rs | 95 +++++++++++++++++++---------
 1 file changed, 65 insertions(+), 30 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs
index 78801e8902..c88618cc08 100644
--- a/crates/core_arch/src/x86/avx512bw.rs
+++ b/crates/core_arch/src/x86/avx512bw.rs
@@ -7217,8 +7217,17 @@ pub const fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw))]
-pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
-    unsafe { transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 16 {
+            _mm512_setzero_si512()
+        } else {
+            // SAFETY: We checked above that the shift is less than 16 bits.
+            simd_shl(a.as_u16x32(), u16x32::splat(shift as u16)).as_m512i()
+        }
+    }
 }
 
 /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7228,7 +7237,13 @@ pub fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw))]
-pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sll_epi16(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_sll_epi16(a, count).as_i16x32();
         transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
@@ -7242,7 +7257,8 @@ pub fn _mm512_mask_sll_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m1
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw))]
-pub fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
     unsafe {
         let shf = _mm512_sll_epi16(a, count).as_i16x32();
         transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
@@ -7621,8 +7637,17 @@ pub const fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw))]
-pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
-    unsafe { transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 16 {
+            _mm512_setzero_si512()
+        } else {
+            // SAFETY: We checked above that the shift is less than 16 bits.
+            simd_shr(a.as_u16x32(), u16x32::splat(shift as u16)).as_m512i()
+        }
+    }
 }
 
 /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7632,7 +7657,13 @@ pub fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw))]
-pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srl_epi16(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_srl_epi16(a, count).as_i16x32();
         transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
@@ -7646,7 +7677,8 @@ pub fn _mm512_mask_srl_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m1
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw))]
-pub fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
     unsafe {
         let shf = _mm512_srl_epi16(a, count).as_i16x32();
         transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
@@ -8010,8 +8042,13 @@ pub const fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw))]
-pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
-    unsafe { transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
+    let shift = count.as_u64x2().as_array()[0].min(15);
+    unsafe {
+        // SAFETY: We checked above that the shift is less than 16 bits.
+        simd_shr(a.as_i16x32(), i16x32::splat(shift as i16)).as_m512i()
+    }
 }
 
 /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -8021,7 +8058,13 @@ pub fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw))]
-pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sra_epi16(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_sra_epi16(a, count).as_i16x32();
         transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
@@ -8035,7 +8078,8 @@ pub fn _mm512_mask_sra_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m1
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw))]
-pub fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
     unsafe {
         let shf = _mm512_sra_epi16(a, count).as_i16x32();
         transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
@@ -12706,15 +12750,6 @@ unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
     fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
 
-    #[link_name = "llvm.x86.avx512.psll.w.512"]
-    fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
-
-    #[link_name = "llvm.x86.avx512.psrl.w.512"]
-    fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
-
-    #[link_name = "llvm.x86.avx512.psra.w.512"]
-    fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
-
     #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
     fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
     #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
@@ -18334,7 +18369,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_sll_epi16() {
+    const fn test_mm512_sll_epi16() {
         let a = _mm512_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm512_sll_epi16(a, count);
@@ -18343,7 +18378,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_mask_sll_epi16() {
+    const fn test_mm512_mask_sll_epi16() {
         let a = _mm512_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm512_mask_sll_epi16(a, 0, a, count);
@@ -18354,7 +18389,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_maskz_sll_epi16() {
+    const fn test_mm512_maskz_sll_epi16() {
         let a = _mm512_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm512_maskz_sll_epi16(0, a, count);
@@ -18570,7 +18605,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_srl_epi16() {
+    const fn test_mm512_srl_epi16() {
         let a = _mm512_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm512_srl_epi16(a, count);
@@ -18579,7 +18614,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_mask_srl_epi16() {
+    const fn test_mm512_mask_srl_epi16() {
         let a = _mm512_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm512_mask_srl_epi16(a, 0, a, count);
@@ -18590,7 +18625,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_maskz_srl_epi16() {
+    const fn test_mm512_maskz_srl_epi16() {
         let a = _mm512_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm512_maskz_srl_epi16(0, a, count);
@@ -18806,7 +18841,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_sra_epi16() {
+    const fn test_mm512_sra_epi16() {
         let a = _mm512_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm512_sra_epi16(a, count);
@@ -18815,7 +18850,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_mask_sra_epi16() {
+    const fn test_mm512_mask_sra_epi16() {
         let a = _mm512_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm512_mask_sra_epi16(a, 0, a, count);
@@ -18826,7 +18861,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    fn test_mm512_maskz_sra_epi16() {
+    const fn test_mm512_maskz_sra_epi16() {
         let a = _mm512_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm512_maskz_sra_epi16(0, a, count);

From cef4ad5bbe881341ae33a3e2c4813e8728fdde24 Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Thu, 19 Feb 2026 13:51:26 -0500
Subject: [PATCH 4/6] Use intrinsics in `avx512f` and add const to applicable
 fns/tests

---
 crates/core_arch/src/x86/avx512f.rs    | 208 ++++++++++++++++++-------
 crates/core_arch/src/x86_64/avx512f.rs |  30 ++--
 2 files changed, 163 insertions(+), 75 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 3730496e1e..7882509932 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -20652,8 +20652,17 @@ pub const fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> _
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld))]
-pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
-    unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 32 {
+            _mm512_setzero_si512()
+        } else {
+            // SAFETY: We checked above that the shift is less than 32 bits.
+            simd_shl(a.as_u32x16(), u32x16::splat(shift as u32)).as_m512i()
+        }
+    }
 }
 
 /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -20663,7 +20672,13 @@ pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld))]
-pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sll_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_sll_epi32(a, count).as_i32x16();
         transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -20677,7 +20692,8 @@ pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m1
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld))]
-pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
     unsafe {
         let shf = _mm512_sll_epi32(a, count).as_i32x16();
         transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
@@ -20747,8 +20763,17 @@ pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld))]
-pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
-    unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 32 {
+            _mm512_setzero_si512()
+        } else {
+            // SAFETY: We checked above that the shift is less than 32 bits.
+            simd_shr(a.as_u32x16(), u32x16::splat(shift as u32)).as_m512i()
+        }
+    }
 }
 
 /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -20758,7 +20783,13 @@ pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld))]
-pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srl_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_srl_epi32(a, count).as_i32x16();
         transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -20772,7 +20803,8 @@ pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m1
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld))]
-pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
     unsafe {
         let shf = _mm512_srl_epi32(a, count).as_i32x16();
         transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
@@ -20842,8 +20874,17 @@ pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq))]
-pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
-    unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 64 {
+            _mm512_setzero_si512()
+        } else {
+            // SAFETY: We checked above that the shift is less than 64 bits.
+            simd_shl(a.as_u64x8(), u64x8::splat(shift as u64)).as_m512i()
+        }
+    }
 }
 
 /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -20853,7 +20894,13 @@ pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq))]
-pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sll_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_sll_epi64(a, count).as_i64x8();
         transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -20867,7 +20914,8 @@ pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m12
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq))]
-pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
     unsafe {
         let shf = _mm512_sll_epi64(a, count).as_i64x8();
         transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
@@ -20937,8 +20985,17 @@ pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
-pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
-    unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
+    let shift = count.as_u64x2().as_array()[0];
+    unsafe {
+        if shift >= 64 {
+            _mm512_setzero_si512()
+        } else {
+            // SAFETY: We checked above that the shift is less than 64 bits.
+            simd_shr(a.as_u64x8(), u64x8::splat(shift as u64)).as_m512i()
+        }
+    }
 }
 
 /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -20948,7 +21005,13 @@ pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
-pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srl_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_srl_epi64(a, count).as_i64x8();
         transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -20962,7 +21025,8 @@ pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m12
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
-pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
     unsafe {
         let shf = _mm512_srl_epi64(a, count).as_i64x8();
         transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
@@ -21032,8 +21096,13 @@ pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad))]
-pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
-    unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
+    let shift = count.as_u64x2().as_array()[0].min(31);
+    unsafe {
+        // SAFETY: We checked above that the shift is less than 32 bits.
+        simd_shr(a.as_i32x16(), i32x16::splat(shift as i32)).as_m512i()
+    }
 }
 
 /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -21043,7 +21112,13 @@ pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad))]
-pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sra_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_sra_epi32(a, count).as_i32x16();
         transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -21057,7 +21132,8 @@ pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m1
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad))]
-pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
     unsafe {
         let shf = _mm512_sra_epi32(a, count).as_i32x16();
         transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
@@ -21127,8 +21203,13 @@ pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq))]
-pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
-    unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
+    let shift = count.as_u64x2().as_array()[0].min(63);
+    unsafe {
+        // SAFETY: We checked above that the shift is less than 64 bits.
+        simd_shr(a.as_i64x8(), i64x8::splat(shift as i64)).as_m512i()
+    }
 }
 
 /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -21138,7 +21219,13 @@ pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq))]
-pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sra_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    count: __m128i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_sra_epi64(a, count).as_i64x8();
         transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -21152,7 +21239,8 @@ pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m12
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq))]
-pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
     unsafe {
         let shf = _mm512_sra_epi64(a, count).as_i64x8();
         transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
@@ -21166,8 +21254,13 @@ pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq))]
-pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
-    unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
+    let shift = count.as_u64x2().as_array()[0].min(63);
+    unsafe {
+        // SAFETY: We checked above that the shift is less than 64 bits.
+        simd_shr(a.as_i64x4(), i64x4::splat(shift as i64)).as_m256i()
+    }
 }
 
 /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -21177,7 +21270,13 @@ pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq))]
-pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sra_epi64(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    count: __m128i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_sra_epi64(a, count).as_i64x4();
         transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
@@ -21191,7 +21290,8 @@ pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq))]
-pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
     unsafe {
         let shf = _mm256_sra_epi64(a, count).as_i64x4();
         transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
@@ -21205,8 +21305,13 @@ pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq))]
-pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
+    let shift = count.as_u64x2().as_array()[0].min(63);
+    unsafe {
+        // SAFETY: We checked above that the shift is less than 64 bits.
+        simd_shr(a.as_i64x2(), i64x2::splat(shift as i64)).as_m128i()
+    }
 }
 
 /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -21216,7 +21321,8 @@ pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq))]
-pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sra_epi64(a, count).as_i64x2();
         transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
@@ -21230,7 +21336,8 @@ pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq))]
-pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sra_epi64(a, count).as_i64x2();
         transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
@@ -44774,25 +44881,6 @@ unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
     fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
 
-    #[link_name = "llvm.x86.avx512.psll.d.512"]
-    fn vpslld(a: i32x16, count: i32x4) -> i32x16;
-    #[link_name = "llvm.x86.avx512.psrl.d.512"]
-    fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
-    #[link_name = "llvm.x86.avx512.psll.q.512"]
-    fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
-    #[link_name = "llvm.x86.avx512.psrl.q.512"]
-    fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
-
-    #[link_name = "llvm.x86.avx512.psra.d.512"]
-    fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
-
-    #[link_name = "llvm.x86.avx512.psra.q.512"]
-    fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
-    #[link_name = "llvm.x86.avx512.psra.q.256"]
-    fn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
-    #[link_name = "llvm.x86.avx512.psra.q.128"]
-    fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
-
     #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
     fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
     #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
@@ -54649,7 +54737,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_sll_epi32() {
+    const fn test_mm512_sll_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 31, 1 << 0, 1 << 1, 1 << 2,
@@ -54670,7 +54758,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_mask_sll_epi32() {
+    const fn test_mm512_mask_sll_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 31, 1 << 0, 1 << 1, 1 << 2,
@@ -54693,7 +54781,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_maskz_sll_epi32() {
+    const fn test_mm512_maskz_sll_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 31, 1 << 0, 1 << 1, 1 << 2,
@@ -54754,7 +54842,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_srl_epi32() {
+    const fn test_mm512_srl_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 31, 1 << 0, 1 << 1, 1 << 2,
@@ -54769,7 +54857,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_mask_srl_epi32() {
+    const fn test_mm512_mask_srl_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 31, 1 << 0, 1 << 1, 1 << 2,
@@ -54786,7 +54874,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_maskz_srl_epi32() {
+    const fn test_mm512_maskz_srl_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 31, 1 << 0, 1 << 1, 1 << 2,
@@ -54847,7 +54935,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_sra_epi32() {
+    const fn test_mm512_sra_epi32() {
         let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
         let count = _mm_set_epi32(1, 0, 0, 2);
         let r = _mm512_sra_epi32(a, count);
@@ -54856,7 +54944,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_mask_sra_epi32() {
+    const fn test_mm512_mask_sra_epi32() {
         let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
         let count = _mm_set_epi32(0, 0, 0, 2);
         let r = _mm512_mask_sra_epi32(a, 0, a, count);
@@ -54867,7 +54955,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_maskz_sra_epi32() {
+    const fn test_mm512_maskz_sra_epi32() {
         let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
         let count = _mm_set_epi32(2, 0, 0, 2);
         let r = _mm512_maskz_sra_epi32(0, a, count);
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index 0fd9b09363..bf3f7a554e 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -9225,7 +9225,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_sll_epi64() {
+    const fn test_mm512_sll_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 63, 1 << 32, 1 << 32, 1 << 32,
@@ -9245,7 +9245,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_mask_sll_epi64() {
+    const fn test_mm512_mask_sll_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 63, 1 << 32, 1 << 32, 1 << 32,
@@ -9264,7 +9264,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_maskz_sll_epi64() {
+    const fn test_mm512_maskz_sll_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 32, 1 << 32, 1 << 32,
@@ -9323,7 +9323,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_srl_epi64() {
+    const fn test_mm512_srl_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 0, 1 << 32, 1 << 32, 1 << 32,
@@ -9340,7 +9340,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_mask_srl_epi64() {
+    const fn test_mm512_mask_srl_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 0, 1 << 32, 1 << 32, 1 << 32,
@@ -9359,7 +9359,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_maskz_srl_epi64() {
+    const fn test_mm512_maskz_srl_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 32, 1 << 32, 1 << 32,
@@ -9418,7 +9418,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_sra_epi64() {
+    const fn test_mm512_sra_epi64() {
         let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
         let count = _mm_set_epi64x(0, 2);
         let r = _mm512_sra_epi64(a, count);
@@ -9427,7 +9427,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_mask_sra_epi64() {
+    const fn test_mm512_mask_sra_epi64() {
         let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
         let count = _mm_set_epi64x(0, 2);
         let r = _mm512_mask_sra_epi64(a, 0, a, count);
@@ -9438,7 +9438,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    fn test_mm512_maskz_sra_epi64() {
+    const fn test_mm512_maskz_sra_epi64() {
         let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
         let count = _mm_set_epi64x(0, 2);
         let r = _mm512_maskz_sra_epi64(0, a, count);
@@ -9449,7 +9449,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm256_sra_epi64() {
+    const fn test_mm256_sra_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_sra_epi64(a, count);
@@ -9458,7 +9458,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm256_mask_sra_epi64() {
+    const fn test_mm256_mask_sra_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_mask_sra_epi64(a, 0, a, count);
@@ -9469,7 +9469,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm256_maskz_sra_epi64() {
+    const fn test_mm256_maskz_sra_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_maskz_sra_epi64(0, a, count);
@@ -9480,7 +9480,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm_sra_epi64() {
+    const fn test_mm_sra_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_sra_epi64(a, count);
@@ -9489,7 +9489,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm_mask_sra_epi64() {
+    const fn test_mm_mask_sra_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_mask_sra_epi64(a, 0, a, count);
@@ -9500,7 +9500,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm_maskz_sra_epi64() {
+    const fn test_mm_maskz_sra_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_maskz_sra_epi64(0, a, count);

From 227af9bc3955ea7ae1da453d61c3bf3fe115d3ba Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Thu, 19 Feb 2026 14:20:18 -0500
Subject: [PATCH 5/6] Mark intrinsics and tests that call `sse2` shifts as
 const

---
 crates/core_arch/src/x86/avx512bw.rs   | 30 ++++++++++--------
 crates/core_arch/src/x86/avx512f.rs    | 42 ++++++++++++++++----------
 crates/core_arch/src/x86_64/avx512f.rs |  8 ++---
 3 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs
index c88618cc08..67c6c56473 100644
--- a/crates/core_arch/src/x86/avx512bw.rs
+++ b/crates/core_arch/src/x86/avx512bw.rs
@@ -7300,7 +7300,8 @@ pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m25
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw))]
-pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sll_epi16(a, count).as_i16x8();
         transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
@@ -7314,7 +7315,8 @@ pub fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw))]
-pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sll_epi16(a, count).as_i16x8();
         transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
@@ -7720,7 +7722,8 @@ pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m25
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw))]
-pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srl_epi16(a, count).as_i16x8();
         transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
@@ -7734,7 +7737,8 @@ pub fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw))]
-pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srl_epi16(a, count).as_i16x8();
         transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
@@ -8121,7 +8125,8 @@ pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m25
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw))]
-pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sra_epi16(a, count).as_i16x8();
         transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
@@ -8135,7 +8140,8 @@ pub fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw))]
-pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sra_epi16(a, count).as_i16x8();
         transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
@@ -18422,7 +18428,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_mask_sll_epi16() {
+    const fn test_mm_mask_sll_epi16() {
         let a = _mm_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm_mask_sll_epi16(a, 0, a, count);
@@ -18433,7 +18439,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_maskz_sll_epi16() {
+    const fn test_mm_maskz_sll_epi16() {
         let a = _mm_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm_maskz_sll_epi16(0, a, count);
@@ -18658,7 +18664,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_mask_srl_epi16() {
+    const fn test_mm_mask_srl_epi16() {
         let a = _mm_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm_mask_srl_epi16(a, 0, a, count);
@@ -18669,7 +18675,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_maskz_srl_epi16() {
+    const fn test_mm_maskz_srl_epi16() {
         let a = _mm_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm_maskz_srl_epi16(0, a, count);
@@ -18894,7 +18900,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_mask_sra_epi16() {
+    const fn test_mm_mask_sra_epi16() {
         let a = _mm_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm_mask_sra_epi16(a, 0, a, count);
@@ -18905,7 +18911,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm_maskz_sra_epi16() {
+    const fn test_mm_maskz_sra_epi16() {
         let a = _mm_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm_maskz_sra_epi16(0, a, count);
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 7882509932..e3fe97f3be 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -20735,7 +20735,8 @@ pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld))]
-pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sll_epi32(a, count).as_i32x4();
         transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
@@ -20749,7 +20750,8 @@ pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld))]
-pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sll_epi32(a, count).as_i32x4();
         transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
@@ -20846,7 +20848,8 @@ pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld))]
-pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srl_epi32(a, count).as_i32x4();
         transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
@@ -20860,7 +20863,8 @@ pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld))]
-pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srl_epi32(a, count).as_i32x4();
         transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
@@ -20957,7 +20961,8 @@ pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq))]
-pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sll_epi64(a, count).as_i64x2();
         transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
@@ -20971,7 +20976,8 @@ pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq))]
-pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sll_epi64(a, count).as_i64x2();
         transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
@@ -21068,7 +21074,8 @@ pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
-pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srl_epi64(a, count).as_i64x2();
         transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
@@ -21082,7 +21089,8 @@ pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
-pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srl_epi64(a, count).as_i64x2();
         transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
@@ -21175,7 +21183,8 @@ pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad))]
-pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sra_epi32(a, count).as_i32x4();
         transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
@@ -21189,7 +21198,8 @@ pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad))]
-pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sra_epi32(a, count).as_i32x4();
         transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
@@ -54820,7 +54830,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm_mask_sll_epi32() {
+    const fn test_mm_mask_sll_epi32() {
         let a = _mm_set_epi32(1 << 13, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm_mask_sll_epi32(a, 0, a, count);
@@ -54831,7 +54841,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm_maskz_sll_epi32() {
+    const fn test_mm_maskz_sll_epi32() {
         let a = _mm_set_epi32(1 << 13, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm_maskz_sll_epi32(0, a, count);
@@ -54913,7 +54923,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm_mask_srl_epi32() {
+    const fn test_mm_mask_srl_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm_mask_srl_epi32(a, 0, a, count);
@@ -54924,7 +54934,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm_maskz_srl_epi32() {
+    const fn test_mm_maskz_srl_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm_maskz_srl_epi32(0, a, count);
@@ -54988,7 +54998,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm_mask_sra_epi32() {
+    const fn test_mm_mask_sra_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm_mask_sra_epi32(a, 0, a, count);
@@ -54999,7 +55009,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm_maskz_sra_epi32() {
+    const fn test_mm_maskz_sra_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm_maskz_sra_epi32(0, a, count);
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index bf3f7a554e..6f9db8610f 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -9301,7 +9301,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm_mask_sll_epi64() {
+    const fn test_mm_mask_sll_epi64() {
         let a = _mm_set_epi64x(1 << 63, 1 << 32);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_mask_sll_epi64(a, 0, a, count);
@@ -9312,7 +9312,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm_maskz_sll_epi64() {
+    const fn test_mm_maskz_sll_epi64() {
         let a = _mm_set_epi64x(1 << 63, 1 << 32);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_maskz_sll_epi64(0, a, count);
@@ -9396,7 +9396,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm_mask_srl_epi64() {
+    const fn test_mm_mask_srl_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_mask_srl_epi64(a, 0, a, count);
@@ -9407,7 +9407,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm_maskz_srl_epi64() {
+    const fn test_mm_maskz_srl_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_maskz_srl_epi64(0, a, count);

From dda4463e2cd2a8ec20687f83fb18d893d7194f0b Mon Sep 17 00:00:00 2001
From: okaneco <47607823+okaneco@users.noreply.github.com>
Date: Thu, 19 Feb 2026 14:36:28 -0500
Subject: [PATCH 6/6] Mark intrinsics and tests that call `avx2` shifts as
 const

---
 crates/core_arch/src/x86/avx512bw.rs   | 45 ++++++++++++-----
 crates/core_arch/src/x86/avx512f.rs    | 67 ++++++++++++++++++++------
 crates/core_arch/src/x86_64/avx512f.rs |  8 +--
 3 files changed, 88 insertions(+), 32 deletions(-)

diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs
index 67c6c56473..308b4fa460 100644
--- a/crates/core_arch/src/x86/avx512bw.rs
+++ b/crates/core_arch/src/x86/avx512bw.rs
@@ -7272,7 +7272,13 @@ pub const fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw))]
-pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sll_epi16(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    count: __m128i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_sll_epi16(a, count).as_i16x16();
         transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
@@ -7286,7 +7292,8 @@ pub fn _mm256_mask_sll_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m1
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw))]
-pub fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
     unsafe {
         let shf = _mm256_sll_epi16(a, count).as_i16x16();
         transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
@@ -7694,7 +7701,13 @@ pub const fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw))]
-pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srl_epi16(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    count: __m128i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_srl_epi16(a, count).as_i16x16();
         transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
@@ -7708,7 +7721,8 @@ pub fn _mm256_mask_srl_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m1
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw))]
-pub fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
     unsafe {
         let shf = _mm256_srl_epi16(a, count).as_i16x16();
         transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
@@ -8097,7 +8111,13 @@ pub const fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw))]
-pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sra_epi16(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    count: __m128i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_sra_epi16(a, count).as_i16x16();
         transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
@@ -8111,7 +8131,8 @@ pub fn _mm256_mask_sra_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m1
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw))]
-pub fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
     unsafe {
         let shf = _mm256_sra_epi16(a, count).as_i16x16();
         transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
@@ -18406,7 +18427,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm256_mask_sll_epi16() {
+    const fn test_mm256_mask_sll_epi16() {
         let a = _mm256_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm256_mask_sll_epi16(a, 0, a, count);
@@ -18417,7 +18438,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm256_maskz_sll_epi16() {
+    const fn test_mm256_maskz_sll_epi16() {
         let a = _mm256_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm256_maskz_sll_epi16(0, a, count);
@@ -18642,7 +18663,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm256_mask_srl_epi16() {
+    const fn test_mm256_mask_srl_epi16() {
         let a = _mm256_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm256_mask_srl_epi16(a, 0, a, count);
@@ -18653,7 +18674,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm256_maskz_srl_epi16() {
+    const fn test_mm256_maskz_srl_epi16() {
         let a = _mm256_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm256_maskz_srl_epi16(0, a, count);
@@ -18878,7 +18899,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm256_mask_sra_epi16() {
+    const fn test_mm256_mask_sra_epi16() {
         let a = _mm256_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm256_mask_sra_epi16(a, 0, a, count);
@@ -18889,7 +18910,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    fn test_mm256_maskz_sra_epi16() {
+    const fn test_mm256_maskz_sra_epi16() {
         let a = _mm256_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm256_maskz_sra_epi16(0, a, count);
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index e3fe97f3be..a0843686b4 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -20707,7 +20707,13 @@ pub const fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld))]
-pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sll_epi32(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    count: __m128i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_sll_epi32(a, count).as_i32x8();
         transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
@@ -20721,7 +20727,8 @@ pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld))]
-pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
     unsafe {
         let shf = _mm256_sll_epi32(a, count).as_i32x8();
         transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
@@ -20820,7 +20827,13 @@ pub const fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld))]
-pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srl_epi32(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    count: __m128i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_srl_epi32(a, count).as_i32x8();
         transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
@@ -20834,7 +20847,8 @@ pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld))]
-pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
     unsafe {
         let shf = _mm256_srl_epi32(a, count).as_i32x8();
         transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
@@ -20933,7 +20947,13 @@ pub const fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq))]
-pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sll_epi64(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    count: __m128i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_sll_epi64(a, count).as_i64x4();
         transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
@@ -20947,7 +20967,8 @@ pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq))]
-pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
     unsafe {
         let shf = _mm256_sll_epi64(a, count).as_i64x4();
         transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
@@ -21046,7 +21067,13 @@ pub const fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
-pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srl_epi64(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    count: __m128i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_srl_epi64(a, count).as_i64x4();
         transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
@@ -21060,7 +21087,8 @@ pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq))]
-pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
     unsafe {
         let shf = _mm256_srl_epi64(a, count).as_i64x4();
         transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
@@ -21155,7 +21183,13 @@ pub const fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad))]
-pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sra_epi32(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    count: __m128i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_sra_epi32(a, count).as_i32x8();
         transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
@@ -21169,7 +21203,8 @@ pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad))]
-pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
     unsafe {
         let shf = _mm256_sra_epi32(a, count).as_i32x8();
         transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
@@ -54808,7 +54843,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm256_mask_sll_epi32() {
+    const fn test_mm256_mask_sll_epi32() {
         let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm256_mask_sll_epi32(a, 0, a, count);
@@ -54819,7 +54854,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm256_maskz_sll_epi32() {
+    const fn test_mm256_maskz_sll_epi32() {
         let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm256_maskz_sll_epi32(0, a, count);
@@ -54901,7 +54936,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm256_mask_srl_epi32() {
+    const fn test_mm256_mask_srl_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm256_mask_srl_epi32(a, 0, a, count);
@@ -54912,7 +54947,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm256_maskz_srl_epi32() {
+    const fn test_mm256_maskz_srl_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm256_maskz_srl_epi32(0, a, count);
@@ -54976,7 +55011,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm256_mask_sra_epi32() {
+    const fn test_mm256_mask_sra_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm256_mask_sra_epi32(a, 0, a, count);
@@ -54987,7 +55022,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm256_maskz_sra_epi32() {
+    const fn test_mm256_maskz_sra_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm256_maskz_sra_epi32(0, a, count);
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index 6f9db8610f..2b7a3f35a3 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -9279,7 +9279,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm256_mask_sll_epi64() {
+    const fn test_mm256_mask_sll_epi64() {
         let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_mask_sll_epi64(a, 0, a, count);
@@ -9290,7 +9290,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm256_maskz_sll_epi64() {
+    const fn test_mm256_maskz_sll_epi64() {
         let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_maskz_sll_epi64(0, a, count);
@@ -9374,7 +9374,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm256_mask_srl_epi64() {
+    const fn test_mm256_mask_srl_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_mask_srl_epi64(a, 0, a, count);
@@ -9385,7 +9385,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    fn test_mm256_maskz_srl_epi64() {
+    const fn test_mm256_maskz_srl_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_maskz_srl_epi64(0, a, count);