Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 40 additions & 115 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16067,14 +16067,11 @@ pub fn vpaddd_u64(a: uint64x2_t) -> u64 {
#[cfg(not(target_arch = "arm64ec"))]
#[cfg_attr(test, assert_instr(faddp))]
pub fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.faddp.v8f16"
)]
fn _vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
unsafe {
let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>());
let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>());
simd_add(even, odd)
}
unsafe { _vpaddq_f16(a, b) }
}
#[doc = "Floating-point add pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"]
Expand All @@ -16083,14 +16080,11 @@ pub fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(faddp))]
pub fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.faddp.v4f32"
)]
fn _vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
unsafe {
let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>());
let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>());
simd_add(even, odd)
}
unsafe { _vpaddq_f32(a, b) }
}
#[doc = "Floating-point add pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)"]
Expand All @@ -16099,14 +16093,11 @@ pub fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(faddp))]
pub fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.faddp.v2f64"
)]
fn _vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
unsafe {
let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>());
let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>());
simd_add(even, odd)
}
unsafe { _vpaddq_f64(a, b) }
}
#[doc = "Add Pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s8)"]
Expand All @@ -16115,14 +16106,11 @@ pub fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.addp.v16i8"
)]
fn _vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
unsafe {
let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>());
let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>());
simd_add(even, odd)
}
unsafe { _vpaddq_s8(a, b) }
}
#[doc = "Add Pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s16)"]
Expand All @@ -16131,14 +16119,11 @@ pub fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.addp.v8i16"
)]
fn _vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
unsafe {
let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>());
let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>());
simd_add(even, odd)
}
unsafe { _vpaddq_s16(a, b) }
}
#[doc = "Add Pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s32)"]
Expand All @@ -16147,14 +16132,11 @@ pub fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.addp.v4i32"
)]
fn _vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
unsafe {
let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>());
let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>());
simd_add(even, odd)
}
unsafe { _vpaddq_s32(a, b) }
}
#[doc = "Add Pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s64)"]
Expand All @@ -16163,119 +16145,62 @@ pub fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
unsafe extern "unadjusted" {
#[cfg_attr(
any(target_arch = "aarch64", target_arch = "arm64ec"),
link_name = "llvm.aarch64.neon.addp.v2i64"
)]
fn _vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
unsafe {
let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>());
let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>());
simd_add(even, odd)
}
unsafe { _vpaddq_s64(a, b) }
}
#[doc = "Add Pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"]
#[inline(always)]
#[cfg(target_endian = "little")]
#[target_feature(enable = "neon")]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
unsafe { transmute(vpaddq_s8(transmute(a), transmute(b))) }
}
#[doc = "Add Pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"]
#[inline(always)]
#[cfg(target_endian = "big")]
#[target_feature(enable = "neon")]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
let a: uint8x16_t =
unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
let b: uint8x16_t =
unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
unsafe {
let ret_val: uint8x16_t = transmute(vpaddq_s8(transmute(a), transmute(b)));
simd_shuffle!(
ret_val,
ret_val,
[15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
)
let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>());
let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>());
simd_add(even, odd)
}
}
#[doc = "Add Pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"]
#[inline(always)]
#[cfg(target_endian = "little")]
#[target_feature(enable = "neon")]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
unsafe { transmute(vpaddq_s16(transmute(a), transmute(b))) }
}
#[doc = "Add Pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"]
#[inline(always)]
#[cfg(target_endian = "big")]
#[target_feature(enable = "neon")]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
let b: uint16x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
unsafe {
let ret_val: uint16x8_t = transmute(vpaddq_s16(transmute(a), transmute(b)));
simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>());
let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>());
simd_add(even, odd)
}
}
#[doc = "Add Pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"]
#[inline(always)]
#[cfg(target_endian = "little")]
#[target_feature(enable = "neon")]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
unsafe { transmute(vpaddq_s32(transmute(a), transmute(b))) }
}
#[doc = "Add Pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"]
#[inline(always)]
#[cfg(target_endian = "big")]
#[target_feature(enable = "neon")]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
let b: uint32x4_t = unsafe { simd_shuffle!(b, b, [3, 2, 1, 0]) };
unsafe {
let ret_val: uint32x4_t = transmute(vpaddq_s32(transmute(a), transmute(b)));
simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>());
let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>());
simd_add(even, odd)
}
}
#[doc = "Add Pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"]
#[inline(always)]
#[cfg(target_endian = "little")]
#[target_feature(enable = "neon")]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
unsafe { transmute(vpaddq_s64(transmute(a), transmute(b))) }
}
#[doc = "Add Pairwise"]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"]
#[inline(always)]
#[cfg(target_endian = "big")]
#[target_feature(enable = "neon")]
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
#[cfg_attr(test, assert_instr(addp))]
pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
let b: uint64x2_t = unsafe { simd_shuffle!(b, b, [1, 0]) };
unsafe {
let ret_val: uint64x2_t = transmute(vpaddq_s64(transmute(a), transmute(b)));
simd_shuffle!(ret_val, ret_val, [1, 0])
let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>());
let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>());
simd_add(even, odd)
}
}
#[doc = "Floating-point add pairwise"]
Expand Down
24 changes: 23 additions & 1 deletion crates/core_arch/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,31 @@ macro_rules! simd_masked_store {
};
}

/// The first N even indices `[0, 2, 4, ...]`.
pub(crate) const fn even<const N: usize>() -> [u32; N] {
let mut out = [0u32; N];
let mut i = 0usize;
while i < N {
out[i] = (2 * i) as u32;
i += 1;
}
out
}

/// The first N odd indices `[1, 3, 5, ...]`.
pub(crate) const fn odd<const N: usize>() -> [u32; N] {
let mut out = [0u32; N];
let mut i = 0usize;
while i < N {
out[i] = (2 * i + 1) as u32;
i += 1;
}
out
}

/// Multiples of N offset by K `[K, K+N, K+2N, ...]`.
pub(crate) const fn deinterleave_mask<const LANES: usize, const N: usize, const K: usize>()
-> [u32; LANES] {
// Produces: [K, K+N, K+2N, ...]
let mut out = [0u32; LANES];
let mut i = 0usize;
while i < LANES {
Expand Down
Loading