diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 88afaae8b8..41f01d445f 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -25039,16 +25039,9 @@ pub unsafe fn vst1q_lane_f64(a: *mut f64, b: float64x2_t) { #[inline(always)] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] +#[cfg_attr(test, assert_instr(stp))] pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v1f64.p0" - )] - fn _vst2_f64(a: float64x1_t, b: float64x1_t, ptr: *mut i8); - } - _vst2_f64(b.0, b.1, a as _) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64)"] @@ -25125,14 +25118,7 @@ pub unsafe fn vst2_lane_u64(a: *mut u64, b: uint64x1x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v2f64.p0" - )] - fn _vst2q_f64(a: float64x2_t, b: float64x2_t, ptr: *mut i8); - } - _vst2q_f64(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(f64, 2, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s64)"] @@ -25143,14 +25129,7 @@ pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v2i64.p0" - )] - fn _vst2q_s64(a: int64x2_t, b: int64x2_t, ptr: *mut i8); - } - _vst2q_s64(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i64, 2, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64)"] @@ -25295,14 +25274,7 @@ pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v1f64.p0" - )] - fn _vst3_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut i8); - } - _vst3_f64(b.0, b.1, b.2, a as _) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f64)"] @@ -25379,14 +25351,7 @@ pub unsafe fn vst3_lane_u64(a: *mut u64, b: uint64x1x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st3))] pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v2f64.p0" - )] - fn _vst3q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut i8); - } - _vst3q_f64(b.0, b.1, b.2, a as _) + crate::core_arch::macros::interleaving_store!(f64, 2, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s64)"] @@ -25397,14 +25362,7 @@ pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st3))] pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v2i64.p0" - )] - fn _vst3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i8); - } - _vst3q_s64(b.0, b.1, b.2, a as _) + crate::core_arch::macros::interleaving_store!(i64, 2, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f64)"] @@ -25549,14 +25507,7 @@ pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v1f64.p0" - )] - fn _vst4_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut i8); - } - _vst4_f64(b.0, b.1, b.2, b.3, a as _) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f64)"] @@ -25647,14 +25598,7 @@ pub unsafe fn vst4_lane_u64(a: *mut u64, b: uint64x1x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v2f64.p0" - )] - fn _vst4q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut i8); - } - _vst4q_f64(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(f64, 2, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s64)"] @@ -25665,14 +25609,7 @@ pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v2i64.p0" - )] - fn _vst4q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i8); - } - _vst4q_s64(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i64, 2, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f64)"] diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs index 2df4ba7443..2fbd2255aa 100644 --- a/crates/core_arch/src/aarch64/neon/mod.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -1050,6 +1050,14 @@ mod tests { test_vld1q_f16_x2(f16, 16, float16x8x2_t, vst1q_f16_x2, vld1q_f16_x2); test_vld1q_f16_x3(f16, 24, float16x8x3_t, vst1q_f16_x3, vld1q_f16_x3); test_vld1q_f16_x4(f16, 32, float16x8x4_t, vst1q_f16_x4, vld1q_f16_x4); + + test_vld2_f16_x2(f16, 8, float16x4x2_t, vst2_f16, vld2_f16); + test_vld2_f16_x3(f16, 12, float16x4x3_t, vst3_f16, vld3_f16); + test_vld2_f16_x4(f16, 16, float16x4x4_t, vst4_f16, vld4_f16); + + test_vld2q_f16_x2(f16, 16, float16x8x2_t, vst2q_f16, vld2q_f16); + test_vld3q_f16_x3(f16, 24, float16x8x3_t, vst3q_f16, vld3q_f16); + test_vld4q_f16_x4(f16, 32, float16x8x4_t, vst4q_f16, vld4q_f16); } macro_rules! wide_store_load_roundtrip_aes { diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index 45c83b880e..62201edfda 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -66001,14 +66001,7 @@ pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v2f32.p0" - )] - fn _vst2_f32(a: float32x2_t, b: float32x2_t, ptr: *mut i8); - } - _vst2_f32(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(f32, 2, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"] @@ -66020,14 +66013,7 @@ pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v4f32.p0" - )] - fn _vst2q_f32(a: float32x4_t, b: float32x4_t, ptr: *mut i8); - } - _vst2q_f32(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(f32, 4, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"] @@ -66039,14 +66025,7 @@ pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v8i8.p0" - )] - fn _vst2_s8(a: int8x8_t, b: int8x8_t, ptr: *mut i8); - } - _vst2_s8(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i8, 8, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"] @@ -66058,14 +66037,7 @@ pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v16i8.p0" - )] - fn _vst2q_s8(a: int8x16_t, b: int8x16_t, ptr: *mut i8); - } - _vst2q_s8(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i8, 16, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"] @@ -66077,14 +66049,7 @@ pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v4i16.p0" - )] - fn _vst2_s16(a: int16x4_t, b: int16x4_t, ptr: *mut i8); - } - _vst2_s16(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i16, 4, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"] @@ -66096,14 +66061,7 @@ pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v8i16.p0" - )] - fn _vst2q_s16(a: int16x8_t, b: int16x8_t, ptr: *mut i8); - } - _vst2q_s16(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i16, 8, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"] @@ -66115,14 +66073,7 @@ pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v2i32.p0" - )] - fn _vst2_s32(a: int32x2_t, b: int32x2_t, ptr: *mut i8); - } - _vst2_s32(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i32, 2, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"] @@ -66134,14 +66085,7 @@ pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v4i32.p0" - )] - fn _vst2q_s32(a: int32x4_t, b: int32x4_t, ptr: *mut i8); - } - _vst2q_s32(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i32, 4, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] @@ -66865,11 +66809,7 @@ pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v1i64.p0")] - fn _vst2_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, size: i32); - } - _vst2_s64(a as _, b.0, b.1, 8) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"] @@ -66881,14 +66821,7 @@ pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v1i64.p0" - )] - fn _vst2_s64(a: int64x1_t, b: int64x1_t, ptr: *mut i8); - } - _vst2_s64(b.0, b.1, a as _) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)"] @@ -67233,11 +67166,7 @@ pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v2f32")] - fn _vst3_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, size: i32); - } - _vst3_f32(a as _, b.0, b.1, b.2, 4) + crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"] @@ -67249,11 +67178,7 @@ pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4f32")] - fn _vst3q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, size: i32); - } - _vst3q_f32(a as _, b.0, b.1, b.2, 4) + crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"] @@ -67265,11 +67190,7 @@ pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8i8")] - fn _vst3_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, size: i32); - } - _vst3_s8(a as _, b.0, b.1, b.2, 1) + crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"] @@ -67281,11 +67202,7 @@ pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v16i8")] - fn _vst3q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, size: i32); - } - _vst3q_s8(a as _, b.0, b.1, b.2, 1) + crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"] @@ -67297,11 +67214,7 @@ pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4i16")] - fn _vst3_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, size: i32); - } - _vst3_s16(a as _, b.0, b.1, b.2, 2) + crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"] @@ -67313,11 +67226,7 @@ pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8i16")] - fn _vst3q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, size: i32); - } - _vst3q_s16(a as _, b.0, b.1, b.2, 2) + crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"] @@ -67329,11 +67238,7 @@ pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v2i32")] - fn _vst3_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, size: i32); - } - _vst3_s32(a as _, b.0, b.1, b.2, 4) + crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"] @@ -67345,11 +67250,7 @@ pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4i32")] - fn _vst3q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, size: i32); - } - _vst3q_s32(a as _, b.0, b.1, b.2, 4) + crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] @@ -68153,14 +68054,7 @@ pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v1i64.p0" - )] - fn _vst3_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i8); - } - _vst3_s64(b.0, b.1, b.2, a as _) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"] @@ -68172,11 +68066,7 @@ pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v1i64")] - fn _vst3_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, c: int64x1_t, size: i32); - } - _vst3_s64(a as _, b.0, b.1, b.2, 8) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)"] @@ -68712,14 +68602,7 @@ pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v2f32.p0" - )] - fn _vst4_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut i8); - } - _vst4_f32(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(f32, 2, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"] @@ -68731,14 +68614,7 @@ pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v4f32.p0" - )] - fn _vst4q_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut i8); - } - _vst4q_f32(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(f32, 4, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"] @@ -68750,14 +68626,7 @@ pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v8i8.p0" - )] - fn _vst4_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8); - } - _vst4_s8(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i8, 8, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"] @@ -68769,14 +68638,7 @@ pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v16i8.p0" - )] - fn _vst4q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8); - } - _vst4q_s8(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i8, 16, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"] @@ -68788,14 +68650,7 @@ pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v4i16.p0" - )] - fn _vst4_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i8); - } - _vst4_s16(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i16, 4, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"] @@ -68807,14 +68662,7 @@ pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v8i16.p0" - )] - fn _vst4q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i8); - } - _vst4q_s16(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i16, 8, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"] @@ -68826,14 +68674,7 @@ pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v2i32.p0" - )] - fn _vst4_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i8); - } - _vst4_s32(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i32, 2, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"] @@ -68845,14 +68686,7 @@ pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v4i32.p0" - )] - fn _vst4q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i8); - } - _vst4q_s32(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i32, 4, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] @@ -69576,18 +69410,7 @@ pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v1i64")] - fn _vst4_s64( - ptr: *mut i8, - a: int64x1_t, - b: int64x1_t, - c: int64x1_t, - d: int64x1_t, - size: i32, - ); - } - _vst4_s64(a as _, b.0, b.1, b.2, b.3, 8) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"] @@ -69599,14 +69422,7 @@ pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v1i64.p0" - )] - fn _vst4_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i8); - } - _vst4_s64(b.0, b.1, b.2, b.3, a as _) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)"] diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index 9f6922efee..5a582fe177 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -187,6 +187,17 @@ macro_rules! simd_masked_store { }; } +/// The first N indices `[0, 1, 2, ...]`. +pub(crate) const fn identity() -> [u32; N] { + let mut out = [0u32; N]; + let mut i = 0usize; + while i < N { + out[i] = i as u32; + i += 1; + } + out +} + /// The first N even indices `[0, 2, 4, ...]`. pub(crate) const fn even() -> [u32; N] { let mut out = [0u32; N]; @@ -277,3 +288,59 @@ macro_rules! deinterleaving_load { #[allow(unused)] pub(crate) use deinterleaving_load; + +pub(crate) const fn interleave_mask() +-> [u32; LANES] { + let mut out = [0u32; LANES]; + let mut j = 0usize; + while j < LANES { + out[j] = ((j % K) * N + j / K) as u32; + j += 1; + } + out +} + +#[allow(unused)] +macro_rules! interleaving_store { + ($elem:ty, $lanes:literal, 2, $ptr:expr, $v:expr) => {{ + use $crate::core_arch::macros::interleave_mask; + use $crate::core_arch::simd::Simd; + + type W = Simd<$elem, { $lanes * 2 }>; + let w: W = simd_shuffle!($v.0, $v.1, interleave_mask::<{ $lanes * 2 }, $lanes, 2>()); + $crate::ptr::write_unaligned($ptr as *mut W, w); + }}; + + // N = 3 + ($elem:ty, $lanes:literal, 3, $ptr:expr, $v:expr) => {{ + use $crate::core_arch::macros::{identity, interleave_mask}; + use $crate::core_arch::simd::Simd; + + let v0v1: Simd<$elem, { $lanes * 2 }> = + simd_shuffle!($v.0, $v.1, identity::<{ $lanes * 2 }>()); + let v2v2: Simd<$elem, { $lanes * 2 }> = + simd_shuffle!($v.2, $v.2, identity::<{ $lanes * 2 }>()); + + type W = Simd<$elem, { $lanes * 3 }>; + let w: W = simd_shuffle!(v0v1, v2v2, interleave_mask::<{ $lanes * 3 }, $lanes, 3>()); + $crate::ptr::write_unaligned($ptr as *mut W, w); + }}; + + // N = 4 + ($elem:ty, $lanes:literal, 4, $ptr:expr, $v:expr) => {{ + use $crate::core_arch::macros::{identity, interleave_mask}; + use $crate::core_arch::simd::Simd; + + let v0v1: Simd<$elem, { $lanes * 2 }> = + simd_shuffle!($v.0, $v.1, identity::<{ $lanes * 2 }>()); + let v2v3: Simd<$elem, { $lanes * 2 }> = + simd_shuffle!($v.2, $v.3, identity::<{ $lanes * 2 }>()); + + type W = Simd<$elem, { $lanes * 4 }>; + let w: W = simd_shuffle!(v0v1, v2v3, interleave_mask::<{ $lanes * 4 }, $lanes, 4>()); + $crate::ptr::write_unaligned($ptr as *mut W, w); + }}; +} + +#[allow(unused)] +pub(crate) use interleaving_store; diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index 9190c8518a..0ec8024fdf 100644 --- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -4567,20 +4567,11 @@ intrinsics: unsafe: [neon] attr: - *neon-stable - assert_instr: [st1] + assert_instr: [stp] types: - - ['f64', float64x1x2_t, float64x1_t] + - ['f64', float64x1x2_t] compose: - - LLVMLink: - name: 'st2.{neon_type[1]}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst2{neon_type[1].nox}" doc: "Store multiple 2-element structures from two registers" @@ -4591,19 +4582,10 @@ intrinsics: - *neon-stable assert_instr: [st2] types: - - [i64, int64x2x2_t, int64x2_t] - - [f64, float64x2x2_t, float64x2_t] + - [i64, int64x2x2_t, "2"] + - [f64, float64x2x2_t, "2"] compose: - - LLVMLink: - name: 'st2.{neon_type[1]}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "2", a, b], [], true] - name: "vst2{neon_type[1].lane_nox}" doc: "Store multiple 2-element structures from two registers" @@ -4781,19 +4763,9 @@ intrinsics: safety: unsafe: [neon] types: - - [f64, float64x1x3_t, float64x1_t] + - [f64, float64x1x3_t] compose: - - LLVMLink: - name: 'st3.{neon_type[1].nox}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst3{neon_type[1].lane_nox}" doc: "Store multiple 3-element structures from three registers" @@ -4860,20 +4832,10 @@ intrinsics: safety: unsafe: [neon] types: - - [i64, int64x2x3_t, int64x2_t] - - [f64, float64x2x3_t, float64x2_t] + - [i64, int64x2x3_t, "2"] + - [f64, float64x2x3_t, "2"] compose: - - LLVMLink: - name: 'st3.{neon_type[1].nox}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "3", a, b], [], true] - name: "vst3{neon_type[1].nox}" doc: "Store multiple 3-element structures from three registers" @@ -4995,20 +4957,9 @@ intrinsics: safety: unsafe: [neon] types: - - [f64, float64x1x4_t, float64x1_t] + - [f64, float64x1x4_t] compose: - - LLVMLink: - name: 'st4.{neon_type[1].nox}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'd: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst4{neon_type[1].lane_nox}" doc: "Store multiple 4-element structures from four registers" @@ -5075,21 +5026,10 @@ intrinsics: safety: unsafe: [neon] types: - - [i64, int64x2x4_t, int64x2_t] - - [f64, float64x2x4_t, float64x2_t] + - [i64, int64x2x4_t, "2"] + - [f64, float64x2x4_t, "2"] compose: - - LLVMLink: - name: 'st4.{neon_type[1].nox}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'd: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "4", a, b], [], true] - name: "vst4{neon_type[1].nox}" doc: "Store multiple 4-element structures from four registers" diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index 8e10fff984..23145d6d66 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -5049,17 +5049,7 @@ intrinsics: types: - [i64, int64x1x2_t, int64x1_t] compose: - - LLVMLink: - name: 'vst2.{neon_type[1]}' - arguments: - - 'ptr: *mut i8' - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'size: i32' - links: - - link: 'llvm.arm.neon.vst2.v{neon_type[1].lane}{type[0]}.p0' - arch: arm - - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', '8']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst2{neon_type[1].nox}" doc: "Store multiple 2-element structures from two registers" @@ -5092,16 +5082,7 @@ intrinsics: types: - [i64, int64x1x2_t, int64x1_t] compose: - - LLVMLink: - name: 'st2.{neon_type[1]}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst2{neon_type[1].nox}" doc: "Store multiple 2-element structures from two registers" @@ -5113,26 +5094,16 @@ intrinsics: safety: unsafe: [neon] types: - - [i8, int8x8x2_t, int8x8_t] - - [i16, int16x4x2_t, int16x4_t] - - [i32, int32x2x2_t, int32x2_t] - - [i8, int8x16x2_t, int8x16_t] - - [i16, int16x8x2_t, int16x8_t] - - [i32, int32x4x2_t, int32x4_t] - - [f32, float32x2x2_t, float32x2_t] - - [f32, float32x4x2_t, float32x4_t] + - [i8, int8x8x2_t, "8"] + - [i16, int16x4x2_t, "4"] + - [i32, int32x2x2_t, "2"] + - [i8, int8x16x2_t, "16"] + - [i16, int16x8x2_t, "8"] + - [i32, int32x4x2_t, "4"] + - [f32, float32x2x2_t, "2"] + - [f32, float32x4x2_t, "4"] compose: - - LLVMLink: - name: 'st2.{neon_type[1]}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] - + - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "2", a, b], [], true] - name: "vst2{neon_type[1].nox}" doc: "Store multiple 2-element structures from two registers" @@ -5426,17 +5397,7 @@ intrinsics: types: - [i64, int64x1x3_t, int64x1_t] compose: - - LLVMLink: - name: 'st3.{neon_type[1].nox}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst3{neon_type[1].nox}" doc: "Store multiple 3-element structures from three registers" @@ -5471,18 +5432,7 @@ intrinsics: types: - [i64, int64x1x3_t, int64x1_t] compose: - - LLVMLink: - name: 'vst3.{neon_type[1]}' - arguments: - - 'ptr: *mut i8' - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'size: i32' - links: - - link: 'llvm.arm.neon.vst3.p0.v{neon_type[1].lane}{type[0]}' - arch: arm - - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', '8']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst3{neon_type[1].nox}" doc: "Store multiple 3-element structures from three registers" @@ -5571,27 +5521,16 @@ intrinsics: safety: unsafe: [neon] types: - - [i8, int8x8x3_t, int8x8_t, '1'] - - [i16, int16x4x3_t, int16x4_t, '2'] - - [i32, int32x2x3_t, int32x2_t, '4'] - - [i8, int8x16x3_t, int8x16_t, '1'] - - [i16, int16x8x3_t, int16x8_t, '2'] - - [i32, int32x4x3_t, int32x4_t, '4'] - - [f32, float32x2x3_t, float32x2_t, '4'] - - [f32, float32x4x3_t, float32x4_t, '4'] + - [i8, int8x8x3_t, '8'] + - [i16, int16x4x3_t, '4'] + - [i32, int32x2x3_t, '2'] + - [i8, int8x16x3_t, '16'] + - [i16, int16x8x3_t, '8'] + - [i32, int32x4x3_t, '4'] + - [f32, float32x2x3_t, '2'] + - [f32, float32x4x3_t, '4'] compose: - - LLVMLink: - name: 'vst3.{neon_type[1]}' - arguments: - - 'ptr: *mut i8' - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'size: i32' - links: - - link: 'llvm.arm.neon.vst3.p0.v{neon_type[1].lane}{type[0]}' - arch: arm - - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', "{type[3]}"]] + - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "3", a, b], [], true] - name: "vst3{neon_type[1].nox}" @@ -5853,19 +5792,7 @@ intrinsics: types: - [i64, int64x1x4_t, int64x1_t] compose: - - LLVMLink: - name: 'vst4.{neon_type[1]}' - arguments: - - 'ptr: *mut i8' - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'd: {type[2]}' - - 'size: i32' - links: - - link: 'llvm.arm.neon.vst4.p0.v{neon_type[1].lane}{type[0]}' - arch: arm - - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', '8']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst4{neon_type[1].nox}" doc: "Store multiple 4-element structures from four registers" @@ -5879,18 +5806,7 @@ intrinsics: types: - [i64, int64x1x4_t, int64x1_t] compose: - - LLVMLink: - name: 'vst4.{neon_type[1]}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'd: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st4.{neon_type[2]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst4{neon_type[1].nox}" doc: "Store multiple 4-element structures from four registers" @@ -6114,27 +6030,16 @@ intrinsics: safety: unsafe: [neon] types: - - [i8, int8x8x4_t, int8x8_t] - - [i16, int16x4x4_t, int16x4_t] - - [i32, int32x2x4_t, int32x2_t] - - [i8, int8x16x4_t, int8x16_t] - - [i16, int16x8x4_t, int16x8_t] - - [i32, int32x4x4_t, int32x4_t] - - [f32, float32x2x4_t, float32x2_t] - - [f32, float32x4x4_t, float32x4_t] + - [i8, int8x8x4_t, "8"] + - [i16, int16x4x4_t, "4"] + - [i32, int32x2x4_t, "2"] + - [i8, int8x16x4_t, "16"] + - [i16, int16x8x4_t, "8"] + - [i32, int32x4x4_t, "4"] + - [f32, float32x2x4_t, "2"] + - [f32, float32x4x4_t, "4"] compose: - - LLVMLink: - name: 'vst4.{neon_type[1]}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'd: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "4", a, b], [], true] - name: "vst4{neon_type[1].nox}"