From 6897f50fb22fb0148e62973120c37c897fb4afc9 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sun, 15 Feb 2026 13:22:01 +0100 Subject: [PATCH] use `intrinsics::simd` for vpadd --- .../core_arch/src/aarch64/neon/generated.rs | 155 +++++------------- crates/core_arch/src/macros.rs | 24 ++- .../spec/neon/aarch64.spec.yml | 91 +++++----- 3 files changed, 102 insertions(+), 168 deletions(-) diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 119f903de7..c0e46c30ef 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -16067,14 +16067,11 @@ pub fn vpaddd_u64(a: uint64x2_t) -> u64 { #[cfg(not(target_arch = "arm64ec"))] #[cfg_attr(test, assert_instr(faddp))] pub fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.faddp.v8f16" - )] - fn _vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t; + unsafe { + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>()); + simd_add(even, odd) } - unsafe { _vpaddq_f16(a, b) } } #[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"] @@ -16083,14 +16080,11 @@ pub fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(faddp))] pub fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.faddp.v4f32" - )] - fn _vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t; + unsafe { + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>()); + simd_add(even, odd) } - unsafe { _vpaddq_f32(a, b) } } #[doc = "Floating-point add pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)"] @@ -16099,14 +16093,11 @@ pub fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(faddp))] pub fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.faddp.v2f64" - )] - fn _vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t; + unsafe { + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); + simd_add(even, odd) } - unsafe { _vpaddq_f64(a, b) } } #[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s8)"] @@ -16115,14 +16106,11 @@ pub fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.addp.v16i8" - )] - fn _vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t; + unsafe { + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>()); + simd_add(even, odd) } - unsafe { _vpaddq_s8(a, b) } } #[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s16)"] @@ -16131,14 +16119,11 @@ pub fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.addp.v8i16" - )] - fn _vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t; + unsafe { + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>()); + simd_add(even, odd) } - unsafe { _vpaddq_s16(a, b) } } #[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s32)"] @@ -16147,14 +16132,11 @@ pub fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.addp.v4i32" - )] - fn _vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t; + unsafe { + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>()); + simd_add(even, odd) } - unsafe { _vpaddq_s32(a, b) } } #[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s64)"] @@ -16163,119 +16145,62 @@ pub fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.addp.v2i64" - )] - fn _vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t; + unsafe { + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); + simd_add(even, odd) } - unsafe { _vpaddq_s64(a, b) } } #[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"] #[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(addp))] -pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - unsafe { transmute(vpaddq_s8(transmute(a), transmute(b))) } -} -#[doc = "Add Pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"] -#[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - let a: uint8x16_t = - unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; - let b: uint8x16_t = - unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { - let ret_val: uint8x16_t = transmute(vpaddq_s8(transmute(a), transmute(b))); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>()); + simd_add(even, odd) } } #[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"] #[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(addp))] -pub fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - unsafe { transmute(vpaddq_s16(transmute(a), transmute(b))) } -} -#[doc = "Add Pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"] -#[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; - let b: uint16x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { - let ret_val: uint16x8_t = transmute(vpaddq_s16(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>()); + simd_add(even, odd) } } #[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"] #[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(addp))] -pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - unsafe { transmute(vpaddq_s32(transmute(a), transmute(b))) } -} -#[doc = "Add Pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"] -#[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; - let b: uint32x4_t = unsafe { simd_shuffle!(b, b, [3, 2, 1, 0]) }; unsafe { - let ret_val: uint32x4_t = transmute(vpaddq_s32(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>()); + simd_add(even, odd) } } #[doc = "Add Pairwise"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"] #[inline(always)] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon")] -#[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(addp))] -pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - unsafe { transmute(vpaddq_s64(transmute(a), transmute(b))) } -} -#[doc = "Add Pairwise"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"] -#[inline(always)] -#[cfg(target_endian = "big")] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(addp))] pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; - let b: uint64x2_t = unsafe { simd_shuffle!(b, b, [1, 0]) }; unsafe { - let ret_val: uint64x2_t = transmute(vpaddq_s64(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [1, 0]) + let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>()); + let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>()); + simd_add(even, odd) } } #[doc = "Floating-point add pairwise"] diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index d40ce51c74..9f6922efee 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -187,9 +187,31 @@ macro_rules! simd_masked_store { }; } +/// The first N even indices `[0, 2, 4, ...]`. +pub(crate) const fn even() -> [u32; N] { + let mut out = [0u32; N]; + let mut i = 0usize; + while i < N { + out[i] = (2 * i) as u32; + i += 1; + } + out +} + +/// The first N odd indices `[1, 3, 5, ...]`. +pub(crate) const fn odd() -> [u32; N] { + let mut out = [0u32; N]; + let mut i = 0usize; + while i < N { + out[i] = (2 * i + 1) as u32; + i += 1; + } + out +} + +/// Multiples of N offset by K `[K, K+N, K+2N, ...]`. pub(crate) const fn deinterleave_mask() -> [u32; LANES] { - // Produces: [K, K+N, K+2N, ...] let mut out = [0u32; LANES]; let mut i = 0usize; while i < LANES { diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index b81f04ebc0..7ab68ff5f2 100644 --- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -6961,28 +6961,29 @@ intrinsics: - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]] - FnCall: ["vmovl{neon_type[0].noq}", [a]] - - name: "vpadd{neon_type.no}" - doc: Floating-point add pairwise - arguments: ["a: {neon_type}", "b: {neon_type}"] - return_type: "{type}" + - name: "vpadd{neon_type[0].no}" + doc: "Floating-point add pairwise" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" attr: [*neon-stable] assert_instr: [faddp] safety: safe types: - - float32x4_t - - float64x2_t + - [float32x4_t, "4"] + - [float64x2_t, "2"] compose: - - LLVMLink: - name: "faddp.{neon_type}" - links: - - link: "llvm.aarch64.neon.faddp.{neon_type}" - arch: aarch64,arm64ec - + - Let: + - even + - FnCall: ["simd_shuffle!", [a, b, "crate::core_arch::macros::even::<{type[1]}>()"]] + - Let: + - odd + - FnCall: ["simd_shuffle!", [a, b, "crate::core_arch::macros::odd::<{type[1]}>()"]] + - FnCall: [simd_add, [even, odd]] - - name: "vpadd{neon_type.no}" + - name: "vpadd{neon_type[0].no}" doc: Floating-point add pairwise - arguments: ["a: {neon_type}", "b: {neon_type}"] - return_type: "{type}" + arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] + return_type: "{neon_type[0]}" attr: - *neon-fp16 - *neon-stable-fp16 @@ -6990,14 +6991,15 @@ intrinsics: assert_instr: [faddp] safety: safe types: - - float16x8_t + - [float16x8_t, "8"] compose: - - LLVMLink: - name: "faddp.{neon_type}" - links: - - link: "llvm.aarch64.neon.faddp.{neon_type}" - arch: aarch64,arm64ec - + - Let: + - even + - FnCall: ["simd_shuffle!", [a, b, "crate::core_arch::macros::even::<{type[1]}>()"]] + - Let: + - odd + - FnCall: ["simd_shuffle!", [a, b, "crate::core_arch::macros::odd::<{type[1]}>()"]] + - FnCall: [simd_add, [even, odd]] - name: "vpmax{neon_type.no}" doc: Floating-point add pairwise @@ -13235,26 +13237,6 @@ intrinsics: - link: "llvm.aarch64.neon.usqadd.{neon_type[1]}" arch: aarch64,arm64ec - - name: "vpadd{neon_type.no}" - doc: "Add Pairwise" - arguments: ["a: {neon_type}", "b: {neon_type}"] - return_type: "{neon_type}" - attr: - - *neon-stable - assert_instr: [addp] - safety: safe - types: - - int8x16_t - - int16x8_t - - int32x4_t - - int64x2_t - compose: - - LLVMLink: - name: "vpadd{neon_type.no}" - links: - - link: "llvm.aarch64.neon.addp.{neon_type}" - arch: aarch64,arm64ec - - name: "vpadd{neon_type[0].no}" doc: "Add Pairwise" arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] @@ -13264,17 +13246,22 @@ intrinsics: assert_instr: [addp] safety: safe types: - - [uint8x16_t, int8x16_t] - - [uint16x8_t, int16x8_t] - - [uint32x4_t, int32x4_t] - - [uint64x2_t, int64x2_t] + - [int8x16_t, "16"] + - [int16x8_t, "8"] + - [int32x4_t, "4"] + - [int64x2_t, "2"] + - [uint8x16_t, "16"] + - [uint16x8_t, "8"] + - [uint32x4_t, "4"] + - [uint64x2_t, "2"] compose: - - FnCall: - - transmute - - - FnCall: - - 'vpadd{neon_type[1].no}' - - - FnCall: [transmute, [a]] - - FnCall: [transmute, [b]] + - Let: + - even + - FnCall: ["simd_shuffle!", [a, b, "crate::core_arch::macros::even::<{type[1]}>()"]] + - Let: + - odd + - FnCall: ["simd_shuffle!", [a, b, "crate::core_arch::macros::odd::<{type[1]}>()"]] + - FnCall: [simd_add, [even, odd]] - name: "vpaddd_s64" doc: "Add pairwise"