diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index 0441611f38..49846efc8d 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -1,13 +1,19 @@ #!/usr/bin/env sh -set -ex - if [ $# -lt 2 ]; then - >&2 echo "Usage: $0 " + >&2 echo "Usage: $0 <..args for \`cargo test\`..>" exit 1 fi -case ${2} in +set -ex + +# Pop both arguments and leave "$@" as containing args to be forwarded to `cargo test` +TARGET="$1" +shift +CC_KIND="$1" +shift + +case ${CC_KIND} in clang) export CC="${CLANG_PATH}" CC_ARG_STYLE=clang @@ -35,49 +41,54 @@ echo "PROFILE=${PROFILE}" INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml" -case ${1} in +case ${TARGET} in aarch64_be*) export CFLAGS="-I${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc/usr/include --sysroot={AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc -Wno-nonportable-vector-initialization" ARCH=aarch64_be + RUNTIME_RUSTFLAGS= ;; aarch64*) export CFLAGS="-I/usr/aarch64-linux-gnu/include/" ARCH=aarch64 + RUNTIME_RUSTFLAGS=-Ctarget-feature=+sve,+sve2 ;; armv7*) export CFLAGS="-I/usr/arm-linux-gnueabihf/include/" ARCH=arm + RUNTIME_RUSTFLAGS= ;; x86_64*) export CFLAGS="-I/usr/include/x86_64-linux-gnu/" ARCH=x86 + RUNTIME_RUSTFLAGS= ;; *) ;; esac -case "${1}" in +case "${TARGET}" in x86_64-unknown-linux-gnu*) env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \ cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/x86-intel.xml \ --skip "crates/intrinsic-test/missing_${ARCH}_common.txt" \ - --skip "crates/intrinsic-test/missing_${ARCH}_${2}.txt" \ - --target "${1}" \ + --skip "crates/intrinsic-test/missing_${ARCH}_${CC_KIND}.txt" \ + --target "${TARGET}" \ --cc-arg-style "${CC_ARG_STYLE}" ;; *) cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ --skip "crates/intrinsic-test/missing_${ARCH}_common.txt" \ - --skip "crates/intrinsic-test/missing_${ARCH}_${2}.txt" \ - --target "${1}" \ + --skip "crates/intrinsic-test/missing_${ARCH}_${CC_KIND}.txt" \ + --target "${TARGET}" \ --cc-arg-style "${CC_ARG_STYLE}" ;; esac -cargo test --manifest-path=rust_programs/Cargo.toml --target "${1}" --profile "${PROFILE}" --tests +RUSTFLAGS="${RUNTIME_RUSTFLAGS}" cargo test --manifest-path=rust_programs/Cargo.toml \ + --target "${TARGET}" --profile "${PROFILE}" --tests "$@" diff --git a/crates/core_arch/src/aarch64/sve/generated.rs b/crates/core_arch/src/aarch64/sve/generated.rs index 116adcf746..ac3070918a 100644 --- a/crates/core_arch/src/aarch64/sve/generated.rs +++ b/crates/core_arch/src/aarch64/sve/generated.rs @@ -35226,19 +35226,6 @@ pub fn svreinterpret_u64_u64(op: svuint64_t) -> svuint64_t { unsafe { crate::intrinsics::transmute_unchecked(op) } } #[doc = "Reverse all elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(rev))] -pub fn svrev_b8(op: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv16i1")] - fn _svrev_b8(op: svbool_t) -> svbool_t; - } - unsafe { _svrev_b8(op) } -} -#[doc = "Reverse all elements"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -35246,10 +35233,10 @@ pub fn svrev_b8(op: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_b16(op: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv8i1")] - fn _svrev_b16(op: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.b16")] + fn _svrev_b16(op: svbool_t) -> svbool_t; } - unsafe { _svrev_b16(op.sve_into()).sve_into() } + unsafe { _svrev_b16(op.sve_into()) } } #[doc = "Reverse all elements"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev_b32)"] @@ -35259,10 +35246,10 @@ pub fn svrev_b16(op: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_b32(op: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4i1")] - fn _svrev_b32(op: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.b32")] + fn _svrev_b32(op: svbool_t) -> svbool_t; } - unsafe { _svrev_b32(op.sve_into()).sve_into() } + unsafe { _svrev_b32(op.sve_into()) } } #[doc = "Reverse all elements"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev_b64)"] @@ -35272,10 +35259,10 @@ pub fn svrev_b32(op: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_b64(op: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2i1")] - fn _svrev_b64(op: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.b64")] + fn _svrev_b64(op: svbool_t) -> svbool_t; } - unsafe { _svrev_b64(op.sve_into()).sve_into() } + unsafe { _svrev_b64(op.sve_into()) } } #[doc = "Reverse all elements"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_f32])"] @@ -35391,6 +35378,19 @@ pub fn svrev_u32(op: svuint32_t) -> svuint32_t { pub fn svrev_u64(op: svuint64_t) -> svuint64_t { unsafe { svrev_s64(op.as_signed()).as_unsigned() } } +#[doc = "Reverse all elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(rev))] +pub fn svrev_b8(op: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv16i1")] + fn _svrev_b8(op: svbool_t) -> svbool_t; + } + unsafe { _svrev_b8(op) } +} #[doc = "Reverse bytes within elements"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_s16]_m)"] #[inline] @@ -43336,19 +43336,6 @@ pub fn svusmmla_s32(op1: svint32_t, op2: svuint8_t, op3: svint8_t) -> svint32_t unsafe { _svusmmla_s32(op1, op2.as_signed(), op3) } } #[doc = "Concatenate even elements from two inputs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(uzp1))] -pub fn svuzp1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv16i1")] - fn _svuzp1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; - } - unsafe { _svuzp1_b8(op1, op2) } -} -#[doc = "Concatenate even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -43356,10 +43343,10 @@ pub fn svuzp1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(uzp1))] pub fn svuzp1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv8i1")] - fn _svuzp1_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.b16")] + fn _svuzp1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svuzp1_b16(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svuzp1_b16(op1.sve_into(), op2.sve_into()) } } #[doc = "Concatenate even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1_b32)"] @@ -43369,10 +43356,10 @@ pub fn svuzp1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(uzp1))] pub fn svuzp1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv4i1")] - fn _svuzp1_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.b32")] + fn _svuzp1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svuzp1_b32(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svuzp1_b32(op1.sve_into(), op2.sve_into()) } } #[doc = "Concatenate even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1_b64)"] @@ -43382,10 +43369,10 @@ pub fn svuzp1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(uzp1))] pub fn svuzp1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv2i1")] - fn _svuzp1_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.b64")] + fn _svuzp1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svuzp1_b64(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svuzp1_b64(op1.sve_into(), op2.sve_into()) } } #[doc = "Concatenate even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_f32])"] @@ -43501,6 +43488,19 @@ pub fn svuzp1_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t { pub fn svuzp1_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svuzp1_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } +#[doc = "Concatenate even elements from two inputs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(uzp1))] +pub fn svuzp1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv16i1")] + fn _svuzp1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; + } + unsafe { _svuzp1_b8(op1, op2) } +} #[doc = "Concatenate even quadwords from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1q[_f32])"] #[inline] @@ -43616,19 +43616,6 @@ pub fn svuzp1q_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svuzp1q_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } #[doc = "Concatenate odd elements from two inputs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(uzp2))] -pub fn svuzp2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv16i1")] - fn _svuzp2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; - } - unsafe { _svuzp2_b8(op1, op2) } -} -#[doc = "Concatenate odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -43636,10 +43623,10 @@ pub fn svuzp2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(uzp2))] pub fn svuzp2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv8i1")] - fn _svuzp2_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.b16")] + fn _svuzp2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svuzp2_b16(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svuzp2_b16(op1.sve_into(), op2.sve_into()) } } #[doc = "Concatenate odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2_b32)"] @@ -43649,10 +43636,10 @@ pub fn svuzp2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(uzp2))] pub fn svuzp2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv4i1")] - fn _svuzp2_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.b32")] + fn _svuzp2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svuzp2_b32(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svuzp2_b32(op1.sve_into(), op2.sve_into()) } } #[doc = "Concatenate odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2_b64)"] @@ -43662,10 +43649,10 @@ pub fn svuzp2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(uzp2))] pub fn svuzp2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv2i1")] - fn _svuzp2_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.b64")] + fn _svuzp2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svuzp2_b64(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svuzp2_b64(op1.sve_into(), op2.sve_into()) } } #[doc = "Concatenate odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_f32])"] @@ -43781,6 +43768,19 @@ pub fn svuzp2_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t { pub fn svuzp2_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svuzp2_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } +#[doc = "Concatenate odd elements from two inputs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(uzp2))] +pub fn svuzp2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv16i1")] + fn _svuzp2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; + } + unsafe { _svuzp2_b8(op1, op2) } +} #[doc = "Concatenate odd quadwords from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2q[_f32])"] #[inline] @@ -44421,19 +44421,6 @@ pub fn svwrffr(op: svbool_t) { unsafe { _svwrffr(op) } } #[doc = "Interleave elements from low halves of two inputs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(zip1))] -pub fn svzip1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv16i1")] - fn _svzip1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; - } - unsafe { _svzip1_b8(op1, op2) } -} -#[doc = "Interleave elements from low halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -44441,10 +44428,10 @@ pub fn svzip1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(zip1))] pub fn svzip1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv8i1")] - fn _svzip1_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.b16")] + fn _svzip1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svzip1_b16(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svzip1_b16(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave elements from low halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1_b32)"] @@ -44454,10 +44441,10 @@ pub fn svzip1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(zip1))] pub fn svzip1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv4i1")] - fn _svzip1_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.b32")] + fn _svzip1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svzip1_b32(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svzip1_b32(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave elements from low halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1_b64)"] @@ -44467,10 +44454,10 @@ pub fn svzip1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(zip1))] pub fn svzip1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv2i1")] - fn _svzip1_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.b64")] + fn _svzip1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svzip1_b64(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svzip1_b64(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave elements from low halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_f32])"] @@ -44586,6 +44573,19 @@ pub fn svzip1_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t { pub fn svzip1_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svzip1_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } +#[doc = "Interleave elements from low halves of two inputs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(zip1))] +pub fn svzip1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv16i1")] + fn _svzip1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; + } + unsafe { _svzip1_b8(op1, op2) } +} #[doc = "Interleave quadwords from low halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1q[_f32])"] #[inline] @@ -44701,19 +44701,6 @@ pub fn svzip1q_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svzip1q_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } #[doc = "Interleave elements from high halves of two inputs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(zip2))] -pub fn svzip2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv16i1")] - fn _svzip2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; - } - unsafe { _svzip2_b8(op1, op2) } -} -#[doc = "Interleave elements from high halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -44721,10 +44708,10 @@ pub fn svzip2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(zip2))] pub fn svzip2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv8i1")] - fn _svzip2_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.b16")] + fn _svzip2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svzip2_b16(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svzip2_b16(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave elements from high halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2_b32)"] @@ -44734,10 +44721,10 @@ pub fn svzip2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(zip2))] pub fn svzip2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv4i1")] - fn _svzip2_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.b32")] + fn _svzip2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svzip2_b32(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svzip2_b32(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave elements from high halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2_b64)"] @@ -44747,10 +44734,10 @@ pub fn svzip2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(zip2))] pub fn svzip2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv2i1")] - fn _svzip2_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.b64")] + fn _svzip2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svzip2_b64(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svzip2_b64(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave elements from high halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_f32])"] @@ -44866,6 +44853,19 @@ pub fn svzip2_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t { pub fn svzip2_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svzip2_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } +#[doc = "Interleave elements from high halves of two inputs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(zip2))] +pub fn svzip2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv16i1")] + fn _svzip2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; + } + unsafe { _svzip2_b8(op1, op2) } +} #[doc = "Interleave quadwords from high halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2q[_f32])"] #[inline] diff --git a/crates/core_arch/src/aarch64/sve/mod.rs b/crates/core_arch/src/aarch64/sve/mod.rs index c6d5d57aca..f11ca660b1 100644 --- a/crates/core_arch/src/aarch64/sve/mod.rs +++ b/crates/core_arch/src/aarch64/sve/mod.rs @@ -28,6 +28,14 @@ pub(super) trait SveInto: Sized { unsafe fn sve_into(self) -> T; } +impl SveInto for T { + #[inline] + #[target_feature(enable = "sve")] + unsafe fn sve_into(self) -> T { + self + } +} + macro_rules! impl_sve_type { ($(($v:vis, $elem_type:ty, $name:ident, $elt:literal))*) => ($( #[doc = concat!("Scalable vector of type ", stringify!($elem_type))] diff --git a/crates/intrinsic-test/src/arm/config.rs b/crates/intrinsic-test/src/arm/config.rs deleted file mode 100644 index 7c26143622..0000000000 --- a/crates/intrinsic-test/src/arm/config.rs +++ /dev/null @@ -1,25 +0,0 @@ -pub const NOTICE: &str = "\ -// This is a transient test file, not intended for distribution. Some aspects of the -// test are derived from a JSON specification, published under the same license as the -// `intrinsic-test` crate.\n"; - -pub const PLATFORM_RUST_DEFINITIONS: &str = ""; - -pub const PLATFORM_RUST_CFGS: &str = r#" -#![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))] -#![cfg_attr(target_arch = "arm", feature(stdarch_aarch32_crc32))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))] -#![feature(stdarch_neon_f16)] - -#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] -use core_arch::arch::aarch64::*; - -#[cfg(target_arch = "arm")] -use core_arch::arch::arm::*; -"#; diff --git a/crates/intrinsic-test/src/arm/intrinsic.rs b/crates/intrinsic-test/src/arm/intrinsic.rs index a54e585719..bcbee3503b 100644 --- a/crates/intrinsic-test/src/arm/intrinsic.rs +++ b/crates/intrinsic-test/src/arm/intrinsic.rs @@ -2,9 +2,9 @@ use crate::common::intrinsic_helpers::IntrinsicType; use std::ops::{Deref, DerefMut}; #[derive(Debug, Clone, PartialEq)] -pub struct ArmIntrinsicType(pub IntrinsicType); +pub struct ArmType(pub IntrinsicType); -impl Deref for ArmIntrinsicType { +impl Deref for ArmType { type Target = IntrinsicType; fn deref(&self) -> &Self::Target { @@ -12,7 +12,7 @@ impl Deref for ArmIntrinsicType { } } -impl DerefMut for ArmIntrinsicType { +impl DerefMut for ArmType { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } diff --git a/crates/intrinsic-test/src/arm/json_parser.rs b/crates/intrinsic-test/src/arm/json_parser.rs index 06cf78a422..013c20f2db 100644 --- a/crates/intrinsic-test/src/arm/json_parser.rs +++ b/crates/intrinsic-test/src/arm/json_parser.rs @@ -1,4 +1,5 @@ -use super::intrinsic::ArmIntrinsicType; +use super::intrinsic::ArmType; +use crate::arm::Arm; use crate::arm::types::parse_intrinsic_type; use crate::common::argument::{Argument, ArgumentList}; use crate::common::constraint::Constraint; @@ -57,32 +58,24 @@ struct JsonIntrinsic { _instructions: Option>>, } -pub fn get_neon_intrinsics( - filename: &Path, -) -> Result>, Box> { +pub fn get_intrinsics(filename: &Path) -> Result>, Box> { let file = std::fs::File::open(filename)?; let reader = std::io::BufReader::new(file); let json: Vec = serde_json::from_reader(reader).expect("Couldn't parse JSON"); let parsed = json .into_iter() - .filter_map(|intr| { - if intr.simd_isa == "Neon" { - Some(json_to_intrinsic(intr).expect("Couldn't parse JSON")) - } else { - None - } - }) + .map(|intr| json_to_intrinsic(intr).expect("Couldn't parse JSON")) .collect(); Ok(parsed) } fn json_to_intrinsic( mut intr: JsonIntrinsic, -) -> Result, Box> { +) -> Result, Box> { let name = intr.name.replace(['[', ']'], ""); - let result_ty = ArmIntrinsicType(parse_intrinsic_type(&intr.return_type.value)?); + let result_ty = ArmType(parse_intrinsic_type(&intr.return_type.value)?); let args = intr .arguments @@ -120,11 +113,13 @@ fn json_to_intrinsic( } }); - let mut arg = Argument::::new( + let is_predicate = arg_name == "pg"; + let mut arg = Argument::::new( i, String::from(arg_name), - ArmIntrinsicType(arg_ty), + ArmType(arg_ty), constraint, + is_predicate, ); // The JSON doesn't list immediates as const @@ -138,13 +133,14 @@ fn json_to_intrinsic( }) .collect(); - let arguments = ArgumentList:: { args }; + let arguments = ArgumentList:: { args }; Ok(Intrinsic { name, arguments, results: result_ty, arch_tags: intr.architectures, + extension: intr.simd_isa, }) } diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 378f23ba7c..48fa88185e 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -1,45 +1,65 @@ -mod config; mod intrinsic; mod json_parser; mod types; -use crate::common::SupportedArchitectureTest; +use crate::common::argument::Argument; use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; -use crate::common::intrinsic_helpers::TypeKind; -use intrinsic::ArmIntrinsicType; -use json_parser::get_neon_intrinsics; +use crate::common::intrinsic_helpers::{SimdLen, TypeDefinition, TypeKind}; +use crate::common::values::test_values_array_name; +use crate::common::{PASSES, PREDICATE_LOCAL, SupportedArchitecture}; +use intrinsic::ArmType; +use json_parser::get_intrinsics; -pub struct ArmArchitectureTest { - intrinsics: Vec>, -} +#[derive(PartialEq)] +pub struct Arm(Vec>); -impl SupportedArchitectureTest for ArmArchitectureTest { - type IntrinsicImpl = ArmIntrinsicType; +impl SupportedArchitecture for Arm { + type Type = ArmType; - fn intrinsics(&self) -> &[Intrinsic] { - &self.intrinsics + fn intrinsics(&self) -> &[Intrinsic] { + &self.0 } - const NOTICE: &str = config::NOTICE; - - const PLATFORM_C_HEADERS: &[&str] = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"]; + const NOTICE: &str = r#" +// This is a transient test file, not intended for distribution. Some aspects of the +// test are derived from a JSON specification, published under the same license as the +// `intrinsic-test` crate. +"#; - const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; - const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; + const C_PRELUDE: &str = r#" +#include +#include +#include +#ifdef __ARM_FEATURE_SVE +#include +#endif +"#; + const RUST_PRELUDE: &str = RUST_PRELUDE; - fn arch_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> { + fn c_compiler_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> { // GCC uses an extra `-` in the arch name + let big_endian = cli_options.target.starts_with("aarch64_be"); + let a32 = cli_options.target.starts_with("armv7"); match cli_options.cc_arg_style { + CcArgStyle::Clang if !a32 && !big_endian => vec![ + "-march=armv8.6a+crypto+crc+dotprod+fp16+sve2-aes+sve2-sm4+sve2-sha3+sve2-bitperm+\ + f32mm+f64mm+sve2p1", + ], CcArgStyle::Clang => vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"], + CcArgStyle::Gcc if !a32 && !big_endian => vec![ + "-march=armv8.6-a+crypto+crc+dotprod+fp16+sha3+sm4+sve2-aes+sve2-sm4+sve2-sha3+\ + sve2-bitperm+f32mm+f64mm+sve2p1", + ], CcArgStyle::Gcc => vec!["-march=armv8.6-a+crypto+crc+dotprod+fp16+sha3+sm4"], } } fn create(cli_options: &ProcessedCli) -> Self { + let big_endian = cli_options.target.starts_with("aarch64_be"); let a32 = cli_options.target.starts_with("armv7"); let mut intrinsics = - get_neon_intrinsics(&cli_options.filename).expect("Error parsing input file"); + get_intrinsics(&cli_options.filename).expect("Error parsing input file"); intrinsics.sort_by(|a, b| a.name.cmp(&b.name)); intrinsics.dedup(); @@ -54,6 +74,53 @@ impl SupportedArchitectureTest for ArmArchitectureTest { // Skip bfloat intrinsics - not currently supported .filter(|i| i.results.kind() != TypeKind::BFloat) .filter(|i| !i.arguments.iter().any(|a| a.ty.kind() == TypeKind::BFloat)) + // Skip SVE intrinsics that have `f16` - not yet implemented! + .filter(|i| { + let has_f16_arg = i + .arguments + .iter() + .any(|a| a.ty.kind() == TypeKind::Float && a.ty.bit_len == Some(16)); + let has_sve_arg = i + .arguments + .iter() + .any(|a| a.ty.num_lanes() == SimdLen::Scalable); + !(has_f16_arg && has_sve_arg) + }) + .filter(|i| { + let has_f16_ret = + i.results.kind() == TypeKind::Float && i.results.bit_len == Some(16); + let has_sve_ret = i.results.num_lanes() == SimdLen::Scalable; + !(has_f16_ret && has_sve_ret) + }) + // Skip `svqcvtn{u,}n*_x2` intrinsics - not yet implemented! + .filter(|i| !(i.name.starts_with("svqcvtn") && i.name.ends_with("_x2"))) + // Skip `svqrshr{u,}n*_x2` intrinsics - not yet implemented! + .filter(|i| !(i.name.starts_with("svqrshrn") && i.name.ends_with("_x2"))) + .filter(|i| !(i.name.starts_with("svqrshrun") && i.name.ends_with("_x2"))) + // Skip `svclamp*` intrinsics - not yet implemented! + .filter(|i| !i.name.starts_with("svclamp")) + // Skip `svdot{_lane,}_{s,u}32_{s,u}16` intrinsics - not yet implemented! + .filter(|i| { + i.name != "svdot_lane_u32_u16" + && i.name != "svdot_lane_s32_s16" + && i.name != "svdot_u32_u16" + && i.name != "svdot_s32_s16" + }) + // Skip `svrevd*` intrinsics - not yet implemented! + .filter(|i| !i.name.starts_with("svrevd")) + // Skip `svpsel_lane_b*` intrinsics - not yet implemented! + .filter(|i| !i.name.starts_with("svpsel_lane_b")) + // Skip `svundef*` intrinsics - to avoid undefined behaviour in Rust, these return + // zeroed vectors in Rust, which are inherently going to be different than the + // undefined vectors returned by the C intrinsics. + .filter(|i| !i.name.starts_with("svundef")) + // Skip `sveorv` intrinsics - the code produced by `intrinsic-test` for these + // miscompiles and the Rust intrinsic call gets replaced by a constant zero (see + // llvm/llvm-project#203921). + .filter(|i| !i.name.starts_with("sveorv")) + // These load intrinsics expect each element in the scalable vector `bases` argument to + // be able to be cast to a pointer, which we don't support generating tests for yet. + .filter(|i| !(i.name.starts_with("svld") && i.name.contains("_gather_"))) // Skip pointers for now, we would probably need to look at the return // type to work out how many elements we need to point to. .filter(|i| !i.arguments.iter().any(|a| a.is_ptr())) @@ -63,9 +130,101 @@ impl SupportedArchitectureTest for ArmArchitectureTest { .filter(|i| !cli_options.skip.contains(&i.name)) // Skip A64-specific intrinsics on A32 .filter(|i| !(a32 && i.arch_tags == vec!["A64".to_string()])) + // Skip SVE intrinsics on big endian + .filter(|i| !(big_endian && (i.extension == "SVE" || i.extension == "SVE2"))) .take(sample_size) .collect::>(); - Self { intrinsics } + Self(intrinsics) + } + + fn predicate_function(size: u32) -> String { + format!("svptrue_b{size}()") + } + + fn load_call(arg: &Argument, idx: usize) -> String { + let name = arg.generate_name(); + let load = arg.ty.load_function(); + let ptr = format!( + "{vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _", + vals_name = test_values_array_name(&arg.ty) + ); + + match arg.ty.num_lanes() { + // If the load is of a `svbool_t`, then we load a `svint8_t` and + SimdLen::Scalable if matches!(arg.ty.kind(), TypeKind::Bool) => { + format!( + r#" +let {name} = {load}({PREDICATE_LOCAL}, {ptr}); +let {name} = svcmpne_n_s8({PREDICATE_LOCAL}, {name}, 0); + "# + ) + } + // If this load is of a scalable vector, then prepend an additional argument + // containing the predicate for the load. + SimdLen::Scalable => format!("let {name} = {load}({PREDICATE_LOCAL}, {ptr});"), + SimdLen::Fixed(..) => format!("let {name} = {load}({ptr});"), + } + } +} + +const RUST_PRELUDE: &str = r#" +#![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))] +#![cfg_attr(target_arch = "arm", feature(stdarch_aarch32_crc32))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))] +#![feature(stdarch_neon_f16)] +#![feature(stdarch_aarch64_sve)] + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +use core_arch::arch::aarch64::*; + +#[cfg(target_arch = "arm")] +use core_arch::arch::arm::*; + +const fn svpattern_from_i32(value: i32) -> svpattern { + match value { + 0 => svpattern::SV_POW2, + 1 => svpattern::SV_VL1, + 2 => svpattern::SV_VL2, + 3 => svpattern::SV_VL3, + 4 => svpattern::SV_VL4, + 5 => svpattern::SV_VL5, + 6 => svpattern::SV_VL6, + 7 => svpattern::SV_VL7, + 8 => svpattern::SV_VL8, + 9 => svpattern::SV_VL16, + 10 => svpattern::SV_VL32, + 11 => svpattern::SV_VL64, + 12 => svpattern::SV_VL128, + 13 => svpattern::SV_VL256, + 29 => svpattern::SV_MUL4, + 30 => svpattern::SV_MUL3, + 31 => svpattern::SV_ALL, + _ => unreachable!(), + } +} + +const fn svprfop_from_i32(value: i32) -> svprfop { + match value { + 0 => svprfop::SV_PLDL1KEEP, + 1 => svprfop::SV_PLDL1STRM, + 2 => svprfop::SV_PLDL2KEEP, + 3 => svprfop::SV_PLDL2STRM, + 4 => svprfop::SV_PLDL3KEEP, + 5 => svprfop::SV_PLDL3STRM, + 8 => svprfop::SV_PSTL1KEEP, + 9 => svprfop::SV_PSTL1STRM, + 10 => svprfop::SV_PSTL2KEEP, + 11 => svprfop::SV_PSTL2STRM, + 12 => svprfop::SV_PSTL3KEEP, + 13 => svprfop::SV_PSTL3STRM, + _ => unreachable!(), } } +"#; diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index cd420f1067..2e628aff92 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -1,28 +1,41 @@ -use super::intrinsic::ArmIntrinsicType; +use super::intrinsic::ArmType; +use crate::common::PREDICATE_LOCAL; use crate::common::intrinsic_helpers::{ - IntrinsicType, IntrinsicTypeDefinition, Sign, SimdLen, TypeKind, + IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind, default_fixed_vector_comparison, }; +use itertools::Itertools; -impl IntrinsicTypeDefinition for ArmIntrinsicType { +impl TypeDefinition for ArmType { /// Gets a string containing the typename for this type in C format. fn c_type(&self) -> String { let prefix = self.kind.c_prefix(); - if let Some(bit_len) = self.bit_len { - match (self.simd_len, self.vec_len) { - (None, None) => format!("{prefix}{bit_len}_t"), - (Some(SimdLen::Fixed(simd)), None) => format!("{prefix}{bit_len}x{simd}_t"), - (Some(SimdLen::Fixed(simd)), Some(vec)) => { - format!("{prefix}{bit_len}x{simd}x{vec}_t") - } - (Some(SimdLen::Scalable), None) => format!("sv{prefix}{bit_len}_t"), - (Some(SimdLen::Scalable), Some(vec)) => { - format!("sv{prefix}{bit_len}x{vec}_t") - } - (None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case + match (self.bit_len, self.simd_len, self.vec_len) { + // e.g. `bool` + (Some(_), None, None) if matches!(self.kind, TypeKind::Bool) => { + format!("{prefix}") } - } else { - todo!("{self:#?}") + // e.g. `float32_t`, `int64_t` + (Some(bit_len), None, None) => format!("{prefix}{bit_len}_t"), + // e.g. `float32x2_t`, `int64x2_t` + (Some(bit_len), Some(SimdLen::Fixed(simd)), None) => { + format!("{prefix}{bit_len}x{simd}_t") + } + // e.g. `float32x2x3_t`, `int64x2x3_t` + (Some(bit_len), Some(SimdLen::Fixed(simd)), Some(vec)) => { + format!("{prefix}{bit_len}x{simd}x{vec}_t") + } + // e.g. `svbool_t` + (Some(_), Some(SimdLen::Scalable), None) if matches!(self.kind, TypeKind::Bool) => { + format!("sv{prefix}_t") + } + // e.g. `svfloat32_t`, `svint64_t` + (Some(bit_len), Some(SimdLen::Scalable), None) => format!("sv{prefix}{bit_len}_t"), + // e.g. `svfloat32x3_t`, `svint64x3_t` + (Some(bit_len), Some(SimdLen::Scalable), Some(vec)) => { + format!("sv{prefix}{bit_len}x{vec}_t") + } + _ => todo!("{self:#?}"), } } @@ -30,50 +43,158 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { let rust_prefix = self.kind.rust_prefix(); let c_prefix = self.kind.c_prefix(); - if let Some(bit_len) = self.bit_len { - match (self.simd_len, self.vec_len) { - (None, None) => format!("{rust_prefix}{bit_len}"), - (Some(SimdLen::Fixed(simd)), None) => format!("{c_prefix}{bit_len}x{simd}_t"), - (Some(SimdLen::Fixed(simd)), Some(vec)) => { - format!("{c_prefix}{bit_len}x{simd}x{vec}_t") + match (self.bit_len, self.simd_len, self.vec_len) { + // e.g. `svpattern` + (None, _, _) => format!("{rust_prefix}"), + // e.g. `bool` + (Some(_), None, None) if matches!(self.kind, TypeKind::Bool) => { + format!("{rust_prefix}") + } + // e.g. `i32` + (Some(bit_len), None, None) => format!("{rust_prefix}{bit_len}"), + // e.g. `int32x2_t` + (Some(bit_len), Some(SimdLen::Fixed(simd)), None) => { + format!("{c_prefix}{bit_len}x{simd}_t") + } + // e.g. `int32x2x3_t` + (Some(bit_len), Some(SimdLen::Fixed(simd)), Some(vec)) => { + format!("{c_prefix}{bit_len}x{simd}x{vec}_t") + } + // e.g. `svbool_t` + (Some(_), Some(SimdLen::Scalable), None) if matches!(self.kind, TypeKind::Bool) => { + format!("sv{c_prefix}_t") + } + // e.g. `svint32_t` + (Some(bit_len), Some(SimdLen::Scalable), None) => format!("sv{c_prefix}{bit_len}_t"), + // e.g. `svint32x3_t` + (Some(bit_len), Some(SimdLen::Scalable), Some(vec)) => { + format!("sv{c_prefix}{bit_len}x{vec}_t") + } + (Some(_), None, Some(_)) => todo!("{self:#?}"), + } + } + + fn rust_scalar_type_for_test_value_array(&self) -> String { + if self.kind() == TypeKind::Bool && self.num_lanes() == SimdLen::Scalable { + let mut ty = self.clone(); + ty.kind = TypeKind::Int(Sign::Signed); + ty.rust_scalar_type() + } else { + self.rust_scalar_type() + } + } + + /// Determines the load function for this type. + fn load_function(&self) -> String { + if let Some(bl) = self.bit_len { + match self.num_lanes() { + SimdLen::Scalable => { + format!( + "svld{len}_{type}{bl}", + len = self.num_vectors(), + type = self.rust_intrinsic_name_prefix(), + ) } - (Some(SimdLen::Scalable), None) => format!("sv{c_prefix}{bit_len}_t"), - (Some(SimdLen::Scalable), Some(vec)) => { - format!("sv{c_prefix}{bit_len}x{vec}_t") + SimdLen::Fixed(num_lanes) => { + format!( + "vld{len}{quad}_{type}{bl}", + quad = if num_lanes * bl > 64 { "q" } else { "" }, + len = self.num_vectors(), + type = self.rust_intrinsic_name_prefix(), + ) } - (None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case } } else { - todo!("{self:#?}") + todo!("load_function IntrinsicType: {self:#?}") } } - /// Determines the load function for this type. - fn get_load_function(&self) -> String { - if let IntrinsicType { - kind: k, - bit_len: Some(bl), - vec_len, - .. - } = **self - { - let quad = if self.num_lanes() * bl > 64 { "q" } else { "" }; + fn comparison_function(&self) -> String { + if let SimdLen::Fixed(num_lanes) = self.num_lanes() { + return default_fixed_vector_comparison(self, num_lanes); + } + + if self.kind() == TypeKind::Bool { + // There isn't a `svcmpeq` for `svbool_t` and there aren't `svboolxN_t` types, so just + // do an XOR and test it is empty. + return format!( + r#" +let __eq = sveor_b_z({PREDICATE_LOCAL}, __rust_return_value, __c_return_value); +assert!(!svptest_any({PREDICATE_LOCAL}, __eq), "{{}}", id); + "# + ); + } + + // Returns `of` when `num_vectors == 1` otherwise returns the appropriate `svget` invocation + // for `of`. + let get = |num_vectors: u32, idx: u32, from: &'static str| -> String { + if num_vectors == 1 { + return from.to_string(); + } format!( - "vld{len}{quad}_{type}{size}", - type = match k { - TypeKind::Int(Sign::Unsigned) => "u", - TypeKind::Int(Sign::Signed) => "s", - TypeKind::Float => "f", - TypeKind::Poly => "p", - x => todo!("get_load_function TypeKind: {x:#?}"), - }, - size = bl, - quad = quad, - len = vec_len.unwrap_or(1), + "svget{num_vectors}_{ty}{bl}::<{idx}>({from})", + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), ) - } else { - todo!("get_load_function IntrinsicType: {self:#?}") + }; + + let n = self.num_vectors(); + (0..n) + .format_with("\n", |i, fmt| { + match self.kind() { + TypeKind::Float | TypeKind::BFloat => { + // Floats need special handling because `NaN != NaN` normally - this + // effectively does `(rust == c) || (isnan(rust) && isnan(c))` + fmt(&format_args!( + r#" +let __rust_eq_return_value = {rust_return_value}; +let __c_eq_return_value = {c_return_value}; +let __eq_sans_nan = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, __rust_eq_return_value, __c_eq_return_value); +let __rust_nan = svcmpuo_{ty}{bl}({PREDICATE_LOCAL}, __rust_eq_return_value, __rust_eq_return_value); +let __c_nan = svcmpuo_{ty}{bl}({PREDICATE_LOCAL}, __c_eq_return_value, __c_eq_return_value); +let __both_nan = svand_b_z({PREDICATE_LOCAL}, __rust_nan, __c_nan); +let __eq = svorr_b_z({PREDICATE_LOCAL}, __eq_sans_nan, __both_nan); +assert!(svptest_any(__pred, __eq), "{{}}-{i_plus_one}/{n}", id); +"#, + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), + rust_return_value = get(n, i, "__rust_return_value"), + c_return_value = get(n, i, "__c_return_value"), + i_plus_one = i + 1, // so that the output is "1/2" and "2/2" + )) + } + _ => { + // Most types can just use `svcmpeq` + fmt(&format_args!( + r#" +let __eq = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, {rust_return_value}, {c_return_value}); +assert!(svptest_any(__pred, __eq), "{{}}-{i_plus_one}/{n}", id); +"#, + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), + rust_return_value = get(n, i, "__rust_return_value"), + c_return_value = get(n, i, "__c_return_value"), + i_plus_one = i + 1, // so that the output is "1/2" and "2/2" + )) + } + } + }) + .to_string() + } +} + +impl ArmType { + /// Returns the Rust prefix for the name of an intrinsic with this type kind (i.e. `s` for + /// `i16`, or `u` for `u16`). For type kinds without any bit length at the end (e.g. `bool`), + /// returns the whole type name. + pub fn rust_intrinsic_name_prefix(&self) -> &str { + match self.kind() { + TypeKind::Char(Sign::Signed) => "s", + TypeKind::Int(Sign::Signed) => "s", + TypeKind::Poly => "p", + TypeKind::Bool => "s", + _ => self.kind.rust_prefix(), } } } diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index eaec5b71c4..10d9224183 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -1,35 +1,45 @@ use itertools::Itertools; +use crate::common::SupportedArchitecture; use crate::common::intrinsic_helpers::TypeKind; use crate::common::values::test_values_array_name; -use super::PASSES; use super::constraint::Constraint; -use super::intrinsic_helpers::IntrinsicTypeDefinition; +use super::intrinsic_helpers::TypeDefinition; +use super::{PASSES, PREDICATE_LOCAL}; /// An argument for the intrinsic. #[derive(Debug, PartialEq, Clone)] -pub struct Argument { +pub struct Argument { /// The argument's index in the intrinsic function call. pub pos: usize, /// The argument name. pub name: String, /// The type of the argument. - pub ty: T, + pub ty: A::Type, /// Any constraints that are on this argument pub constraint: Option, + /// Is the argument a predicate for a scalable intrinsic? + pub is_predicate: bool, } -impl Argument +impl Argument where - T: IntrinsicTypeDefinition, + A: SupportedArchitecture, { - pub fn new(pos: usize, name: String, ty: T, constraint: Option) -> Self { + pub fn new( + pos: usize, + name: String, + ty: A::Type, + constraint: Option, + is_predicate: bool, + ) -> Self { Argument { pos, name, ty, constraint, + is_predicate, } } @@ -37,8 +47,14 @@ where self.ty.c_type() } + /// Generates local variable name for the value passed to this argument pub fn generate_name(&self) -> String { - format!("{}_val", self.name) + // The same predicate is used for scalable intrinsic invocations + if self.is_predicate { + format!("{PREDICATE_LOCAL}") + } else { + format!("{}_val", self.name) + } } pub fn is_simd(&self) -> bool { @@ -63,13 +79,13 @@ where /// Arguments of an intrinsic - including parameters that end up being const generics. #[derive(Debug, PartialEq, Clone)] -pub struct ArgumentList { - pub args: Vec>, +pub struct ArgumentList { + pub args: Vec>, } -impl ArgumentList +impl ArgumentList where - T: IntrinsicTypeDefinition, + A: SupportedArchitecture, { /// Returns a string with the arguments in `self` as a parameter list for a wrapper fn /// definition in C (e.g. `$ty1 $arg1, $ty2 $arg2`). @@ -175,18 +191,14 @@ where pub fn load_values_rust(&self) -> String { self.iter() .filter(|&arg| !arg.has_constraint()) + .filter(|&arg| !arg.is_predicate) .enumerate() .map(|(idx, arg)| { if arg.is_simd() { - format!( - "let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", - name = arg.generate_name(), - vals_name = test_values_array_name(&arg.ty), - load = arg.ty.get_load_function(), - ) + A::load_call(arg, idx) } else { format!( - "let {name} = {vals_name}[(i+{idx}) % {PASSES}];\n", + "let {name} = {vals_name}[(i+{idx}) % {PASSES}];", name = arg.generate_name(), vals_name = test_values_array_name(&arg.ty), ) @@ -196,7 +208,7 @@ where } /// Returns an iterator over the contained arguments - pub fn iter(&self) -> std::slice::Iter<'_, Argument> { + pub fn iter(&self) -> std::slice::Iter<'_, Argument> { self.args.iter() } } diff --git a/crates/intrinsic-test/src/common/constraint.rs b/crates/intrinsic-test/src/common/constraint.rs index ab52d866ab..c7d37da2ad 100644 --- a/crates/intrinsic-test/src/common/constraint.rs +++ b/crates/intrinsic-test/src/common/constraint.rs @@ -1,6 +1,7 @@ -use serde::Deserialize; use std::ops::Range; +use serde::Deserialize; + /// Describes the values to test for a const generic parameter #[derive(Debug, PartialEq, Clone, Deserialize)] pub enum Constraint { @@ -23,21 +24,53 @@ pub enum Constraint { SvImmRotationAdd, } -impl Constraint { - /// Returns an iterator over the values of this constraint - pub fn iter(&self) -> Box + '_> { +/// Workaround to enable the `Constraint::into_iter` to return an iterator that implements `Clone`, +/// so that it can be used with `Itertools::multi_cartesian_product`. +/// +/// With the different iterator types, returning `Box + '_>` would the +/// idiomatic approach, but this can't be made to implement `Clone`. Given the limited number +/// of iterator types used and their relative lack of complexity, wrapping them all in an enum isn't +/// too bad. +#[derive(Clone)] +pub enum ConstraintIterator<'a> { + Once(std::iter::Once), + Range(std::ops::Range), + Copied(std::iter::Copied>), + Chain(std::iter::Chain, std::ops::RangeInclusive>), + StepBy(std::iter::StepBy>), +} + +impl<'a> Iterator for ConstraintIterator<'a> { + type Item = i64; + + fn next(&mut self) -> Option { + match self { + ConstraintIterator::Once(once) => once.next(), + ConstraintIterator::Range(range) => range.next(), + ConstraintIterator::Copied(copied) => copied.next(), + ConstraintIterator::Chain(chain) => chain.next(), + ConstraintIterator::StepBy(step_by) => step_by.next(), + } + } +} + +impl<'a> IntoIterator for &'a Constraint { + type Item = i64; + type IntoIter = ConstraintIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { match self { - Constraint::Equal(i) => Box::new(std::iter::once(*i)), - Constraint::Range(range) => Box::new(range.clone()), - Constraint::Set(items) => Box::new(items.iter().copied().chain(Range::default())), + Constraint::Equal(i) => ConstraintIterator::Once(std::iter::once(*i)), + Constraint::Range(range) => ConstraintIterator::Range(range.clone()), + Constraint::Set(items) => ConstraintIterator::Copied(items.iter().copied()), // These values are discriminants of the `svpattern` enum - Constraint::SvPattern => Box::new((0..=13).chain(29..=31)), + Constraint::SvPattern => ConstraintIterator::Chain((0..=13).chain(29..=31)), // These values are discriminants of the `svprfop` enum - Constraint::SvPrefetchOp => Box::new((0..=5).chain(8..=14)), + Constraint::SvPrefetchOp => ConstraintIterator::Chain((0..=5).chain(8..=14)), // Valid rotations for intrinsics operating on complex pairs: 0, 90, 180, 270 - Constraint::SvImmRotation => Box::new((0..=270).step_by(90)), + Constraint::SvImmRotation => ConstraintIterator::StepBy((0..=270).step_by(90)), // Valid rotations for `svcadd` and `svqcadd`: 0, 270 - Constraint::SvImmRotationAdd => Box::new((90..=270).step_by(180)), + Constraint::SvImmRotationAdd => ConstraintIterator::StepBy((90..=270).step_by(180)), } } } diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index 24756324c4..104226c5df 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -1,8 +1,8 @@ use itertools::Itertools; -use crate::common::intrinsic::Intrinsic; +use crate::common::{SupportedArchitecture, intrinsic::Intrinsic}; -use super::intrinsic_helpers::IntrinsicTypeDefinition; +use super::intrinsic_helpers::TypeDefinition; /// Generates a C source file containing wrapper functions around each specialisation of each /// intrinsic (that is, intrinsics with specific values for the the immediate arguments). Each @@ -14,39 +14,40 @@ use super::intrinsic_helpers::IntrinsicTypeDefinition; /// *__dst = __crc32cd(a, b); /// } /// ``` -pub fn write_wrapper_c( +pub fn write_wrapper_c( w: &mut impl std::io::Write, notice: &str, - platform_headers: &[&str], - intrinsics: &[Intrinsic], + prelude: &str, + intrinsics: &[Intrinsic], ) -> std::io::Result<()> { - write!(w, "{notice}")?; + write!( + w, + r#" +{notice} +#include +#include +{prelude} - writeln!(w, "#include ")?; - writeln!(w, "#include ")?; - - for header in platform_headers { - writeln!(w, "#include <{header}>")?; - } - - for intrinsic in intrinsics { - intrinsic.iter_specializations(|imm_values| { - writeln!( - w, - " +{intrinsics} +"#, + intrinsics = intrinsics.iter().format_with("", |intrinsic, fmt| { + fmt(&intrinsic + .specializations() + .format_with("\n", |imm_values, fmt| { + fmt(&format_args!( + " void {name}_wrapper{imm_arglist}({return_ty}* __dst{arglist}) {{ *__dst = {name}({params}); }}", - return_ty = intrinsic.results.c_type(), - name = intrinsic.name, - imm_arglist = imm_values - .iter() - .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), - arglist = intrinsic.arguments.as_non_imm_arglist_c(), - params = intrinsic.arguments.as_call_params_c(&imm_values) - ) - })?; - } - - Ok(()) + return_ty = intrinsic.results.c_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_c(), + params = intrinsic.arguments.as_call_params_c(&imm_values) + )) + })) + }), + ) } diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 02f6e40dc0..44128d43b9 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -2,12 +2,12 @@ use std::process::Command; use itertools::Itertools; -use super::intrinsic_helpers::IntrinsicTypeDefinition; -use crate::common::PASSES; +use super::intrinsic_helpers::TypeDefinition; use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use crate::common::values::{test_values_array_name, test_values_array_static}; +use crate::common::{PASSES, PREDICATE_LOCAL, SupportedArchitecture}; /// Rust definitions that are included verbatim in the generated source. In particular, defines /// a wrapper around float types that defines `NaN`s to be equal reflexively to enable @@ -32,12 +32,6 @@ macro_rules! wrap_partialeq { wrap_partialeq!(NanEqF16(f16), NanEqF32(f32), NanEqF64(f64)); "#; -macro_rules! concatln { - ($($lines:expr),* $(,)?) => { - concat!($( $lines, "\n" ),*) - }; -} - /// Run rustfmt on the generated source code pub fn run_rustfmt(source_path: &str) { let output = Command::new("rustfmt") @@ -65,11 +59,14 @@ pub fn write_bin_cargo_toml( w: &mut impl std::io::Write, module_count: usize, ) -> std::io::Result<()> { - write!(w, concatln!("[workspace]", "members = ["))?; - for i in 0..module_count { - writeln!(w, " \"mod_{i}\",")?; - } - writeln!(w, "]") + write!( + w, + r#" +[workspace] +members = [{members}] +"#, + members = (0..module_count).format_with(",", |i, fmt| fmt(&format_args!("\"mod_{i}\""))) + ) } /// Writes a `Cargo.toml` for a crate with name `name` to `w` that will contain a single Rust source @@ -77,21 +74,20 @@ pub fn write_bin_cargo_toml( pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { write!( w, - concatln!( - "[package]", - "name = \"{name}\"", - "version = \"{version}\"", - "authors = [{authors}]", - "license = \"{license}\"", - "edition = \"2018\"", - "", - "[dependencies]", - "core_arch = {{ path = \"../../crates/core_arch\" }}", - "", - "[build-dependencies]", - "cc = \"1\"" - ), - name = name, + r#" +[package] +name = "{name}" +version = "{version}" +authors = [{authors}] +license = "{license}" +edition = "2018" + +[dependencies] +core_arch = {{ path = "../../crates/core_arch" }} + +[build-dependencies] +cc = "1" +"#, version = env!("CARGO_PKG_VERSION"), authors = env!("CARGO_PKG_AUTHORS") .split(":") @@ -102,36 +98,39 @@ pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io: /// Writes a Rust source file into `w` with common definitions, static arrays with test values, /// declarations of C wrapper functions for FFI and Rust test functions. -pub fn write_lib_rs( +pub fn write_lib_rs( w: &mut impl std::io::Write, - notice: &str, - cfg: &str, - definitions: &str, i: usize, - intrinsics: &[Intrinsic], + intrinsics: &[Intrinsic], ) -> std::io::Result<()> { - write!(w, "{notice}")?; - - writeln!(w, "#![feature(simd_ffi)]")?; - writeln!(w, "#![feature(f16)]")?; - writeln!(w, "#![allow(unused)]")?; - - // Cargo will spam the logs if these warnings are not silenced. - writeln!(w, "#![allow(non_upper_case_globals)]")?; - writeln!(w, "#![allow(non_camel_case_types)]")?; - writeln!(w, "#![allow(non_snake_case)]")?; - - writeln!(w, "{cfg}")?; - - writeln!(w, "{}", COMMON_RUST_DEFINITIONS)?; - - writeln!(w, "{definitions}")?; + writeln!( + w, + r#" +{notice} +#![feature(simd_ffi)] +#![feature(f16)] +#![allow(unused)] + +// Cargo will spam the logs if these warnings are not silenced. +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +{prelude} +{COMMON_RUST_DEFINITIONS} +"#, + notice = A::NOTICE, + prelude = A::RUST_PRELUDE, + )?; let mut seen = std::collections::HashSet::new(); for intrinsic in intrinsics { for arg in &intrinsic.arguments.args { - if !arg.has_constraint() { + // Skip arguments with constraints as these correspond to generic instantiatons, and + // predicates for scalable intrinsics as the same predicate is used for all intrinsics + // under test. + if !arg.has_constraint() && !arg.is_predicate { let name = test_values_array_name(&arg.ty); if seen.insert(name) { @@ -157,9 +156,9 @@ pub fn write_lib_rs( /// (first loop) `PASSES` number of times (second loop). For a given iteration of a given /// specialisation, test values are loaded for each argument and passed to the Rust intrinsic /// and the C wrapper function, and the results are compared. -fn generate_rust_test_loop( +fn generate_rust_test_loop( w: &mut impl std::io::Write, - intrinsic: &Intrinsic, + intrinsic: &Intrinsic, ) -> std::io::Result<()> { let intrinsic_name = &intrinsic.name; @@ -173,93 +172,88 @@ fn generate_rust_test_loop( coerce += ") -> _"; c_coerce += ")"; - if intrinsic - .arguments - .iter() - .filter(|arg| arg.has_constraint()) - .count() - == 0 - { - writeln!( - w, - " let specializations = [(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)];" - )?; - } else { - writeln!(w, " let specializations = [")?; - - intrinsic.iter_specializations(|imm_values| { - writeln!( - w, - " (\"{const_args}\", {intrinsic_name}::<{const_args}> as unsafe {coerce}, {intrinsic_name}_wrapper_{c_const_args} as unsafe extern \"C\" {c_coerce}),", - const_args = imm_values.iter().join(","), - c_const_args = imm_values.iter().join("_"), - ) - })?; - - writeln!(w, " ];")?; - } - - let (cast_prefix, cast_suffix) = if intrinsic.results.is_simd() { - ( - format!( - "std::mem::transmute::<_, [{}; {}]>(", - intrinsic.results.rust_scalar_type().replace("f", "NanEqF"), - intrinsic.results.num_lanes() * intrinsic.results.num_vectors() - ), - ")", - ) - } else if intrinsic.results.kind == TypeKind::Float { - ( - match intrinsic.results.inner_size() { - 16 => format!("NanEqF16("), - 32 => format!("NanEqF32("), - 64 => format!("NanEqF64("), - _ => unimplemented!(), - }, - ")", - ) - } else { - ("".to_string(), "") - }; - write!( w, - concatln!( - " for (id, rust, c) in specializations {{", - " for i in 0..{passes} {{", - " unsafe {{", - "{loaded_args}", - " let __rust_return_value = rust({rust_args});", - "", - " let mut __c_return_value = std::mem::MaybeUninit::uninit();", - " c(__c_return_value.as_mut_ptr(){c_args});", - " let __c_return_value = __c_return_value.assume_init();", - "", - " assert_eq!({cast_prefix}__rust_return_value{cast_suffix}, {cast_prefix}__c_return_value{cast_suffix}, \"{{id}}\");", - " }}", - " }}", - " }}", - ), + r#" +let specializations = [{specializations}]; +for (id, rust, c) in specializations {{ + for i in 0..{PASSES} {{ + unsafe {{ + {predicate} + {loaded_args} + let __rust_return_value = rust({rust_args}); + + let mut __c_return_value = std::mem::MaybeUninit::uninit(); + c(__c_return_value.as_mut_ptr(){c_args}); + let __c_return_value = __c_return_value.assume_init(); + + {comparison} + }} + }} +}} +"#, + specializations = intrinsic + .specializations() + .format_with(",", |imm_values, fmt| { + if imm_values.is_empty() { + fmt(&format_args!( + "(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)" + )) + } else { + let constraint_args = intrinsic.arguments.iter().filter(|a| a.has_constraint()); + fmt(&format_args!( + r#" + ( + "{const_args}", + {intrinsic_name}::<{const_args}> as unsafe {coerce}, + {intrinsic_name}_wrapper_{c_const_args} as unsafe extern "C" {c_coerce} + ) + "#, + const_args = imm_values + .iter() + .zip(constraint_args) + .map(|(imm_val, arg)| { + match arg.ty.kind() { + TypeKind::SvPattern | TypeKind::SvPrefetchOp => { + format!("{{ {}_from_i32({imm_val}) }}", arg.ty.kind()) + } + _ => imm_val.to_string(), + } + }) + .join(","), + c_const_args = imm_values.iter().join("_"), + )) + } + }), loaded_args = intrinsic.arguments.load_values_rust(), rust_args = intrinsic.arguments.as_call_param_rust(), c_args = intrinsic.arguments.as_c_call_param_rust(), - passes = PASSES, - cast_prefix = cast_prefix, - cast_suffix = cast_suffix, + predicate = if intrinsic.has_scalable_argument_or_result() { + format!( + "let {PREDICATE_LOCAL} = {pred};", + pred = A::predicate_function(intrinsic.results.inner_size()), + ) + } else { + "".to_string() + }, + comparison = intrinsic.results.comparison_function(), ) } /// Writes a test function for an given intrinsic to `w`, with a body generated by /// `generate_rust_test_loop`. -fn create_rust_test( +fn create_rust_test( w: &mut impl std::io::Write, - intrinsic: &Intrinsic, + intrinsic: &Intrinsic, ) -> std::io::Result<()> { trace!("generating `{}`", intrinsic.name); write!( w, - concatln!("#[test]", "fn test_{intrinsic_name}() {{"), + r#" +#[test] +fn test_{intrinsic_name}() {{ +"#, intrinsic_name = intrinsic.name, )?; @@ -272,37 +266,36 @@ fn create_rust_test( /// Writes an `extern "C"` block with function declarations for each of the C wrapper functions into /// `w`. -pub fn write_bindings_rust( +pub fn write_bindings_rust( w: &mut impl std::io::Write, i: usize, - intrinsics: &[Intrinsic], + intrinsics: &[Intrinsic], ) -> std::io::Result<()> { write!( w, - concatln!( - "#[allow(improper_ctypes)]", - "#[link(name = \"wrapper_{i}\")]", - "unsafe extern \"C\" {{" - ), - i = i - )?; - - for intrinsic in intrinsics { - intrinsic.iter_specializations(|imm_values| { - writeln!( - w, - " fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});", - return_ty = intrinsic.results.rust_type(), - name = intrinsic.name, - imm_arglist = imm_values - .iter() - .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), - arglist = intrinsic.arguments.as_non_imm_arglist_rust(), - ) - })?; - } - - writeln!(w, "}}") + r#" +#[allow(improper_ctypes)] +#[link(name = "wrapper_{i}")] +unsafe extern "C" {{ + {definitions} +}} +"#, + definitions = intrinsics.iter().format_with("", |intrinsic, fmt| { + fmt(&intrinsic + .specializations() + .format_with("\n", |imm_values, fmt| { + fmt(&format_args!( + "fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});", + return_ty = intrinsic.results.rust_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_rust(), + )) + })) + }) + ) } /// Writes a `build.rs` into `w` for each test crate that compiles the corresponding C source code @@ -326,32 +319,22 @@ pub fn write_build_rs( write!( w, - concatln!( - "fn main() {{", - " cc::Build::new()", - " .file(\"../../c_programs/wrapper_{i}.c\")", - " .opt_level(2)", - " .flags(&[", - ), - i = i - )?; - - let compiler_specific_flags = match cli_options.cc_arg_style { - CcArgStyle::Gcc => GCC_FLAGS, - CcArgStyle::Clang => CLANG_FLAGS, - }; - - for flag in COMMON_FLAGS - .iter() - .chain(compiler_specific_flags) - .chain(arch_flags) - { - writeln!(w, "\"{flag}\",")?; - } - - write!( - w, - concatln!(" ])", " .compile(\"wrapper_{i}\");", "}}"), - i = i + r#" +fn main() {{ + cc::Build::new() + .file("../../c_programs/wrapper_{i}.c") + .opt_level(2) + .flags(&[{flags}]) + .compile("wrapper_{i}"); +}} +"#, + flags = COMMON_FLAGS + .iter() + .chain(match cli_options.cc_arg_style { + CcArgStyle::Gcc => GCC_FLAGS, + CcArgStyle::Clang => CLANG_FLAGS, + }) + .chain(arch_flags) + .format_with(",", |flag, fmt| fmt(&format_args!("\"{flag}\""))), ) } diff --git a/crates/intrinsic-test/src/common/intrinsic.rs b/crates/intrinsic-test/src/common/intrinsic.rs index d69644388a..d5d903d941 100644 --- a/crates/intrinsic-test/src/common/intrinsic.rs +++ b/crates/intrinsic-test/src/common/intrinsic.rs @@ -1,46 +1,27 @@ -use crate::common::constraint::Constraint; - use super::argument::ArgumentList; -use super::intrinsic_helpers::IntrinsicTypeDefinition; +use crate::common::{SupportedArchitecture, intrinsic_helpers::SimdLen}; +use itertools::Itertools; /// An intrinsic #[derive(Debug, PartialEq, Clone)] -pub struct Intrinsic { +pub struct Intrinsic { /// The function name of this intrinsic. pub name: String, /// Any arguments for this intrinsic. - pub arguments: ArgumentList, + pub arguments: ArgumentList, /// The return type of this intrinsic. - pub results: T, + pub results: A::Type, /// Any architecture-specific tags. pub arch_tags: Vec, -} -/// Invokes `f` for each combination of the values in the constraint ranges. -/// -/// For example, given `constraints=[Equal(0), Range(1..2), Set([3, 4])]` and `imm_values=[]`, this -/// produces the four calls to `f`: `f([0, 1, 3])`, `f([0, 1, 4])`, `f([0, 2, 3])`, `f([0, 2, 4])`. -fn recurse_specializations<'a, E>( - constraints: &mut (impl Iterator + Clone), - imm_values: &mut Vec, - f: &mut impl FnMut(&[i64]) -> Result<(), E>, -) -> Result<(), E> { - if let Some(current) = constraints.next() { - for i in current.iter() { - imm_values.push(i); - recurse_specializations(&mut constraints.clone(), imm_values, f)?; - imm_values.pop(); - } - Ok(()) - } else { - f(&imm_values) - } + /// Specific extension that the intrinsic is from + pub extension: String, } -impl Intrinsic { +impl Intrinsic { /// Invokes `f` for "specialisation" of the intrinsic - a specific instantiation of the /// constant generics of the intrinsic. `f` takes a slice where the `i`th element corresponds /// to the value of the `i`th const generic argument of the intrinsic. @@ -48,17 +29,20 @@ impl Intrinsic { /// For an intrinsic with three arguments with constraints `Equal(0)`, `Range(1..2)`, /// `Set([3, 4])` respectively, this would produce four calls to `f`: `f(0, 1, 3)`, /// `f(0, 1, 4)`, `f(0, 2, 3)`, `f(0, 2, 4)`. - pub fn iter_specializations( - &self, - mut f: impl FnMut(&[i64]) -> Result<(), E>, - ) -> Result<(), E> { - recurse_specializations( - &mut self + pub fn specializations(&self) -> impl Iterator> { + self.arguments + .iter() + .filter_map(|arg| arg.constraint.as_ref()) + .map(|constraint| constraint.into_iter()) + .multi_cartesian_product() + } + + /// Returns `true` if this intrinsic has any argument or result types that are scalable vectors + pub fn has_scalable_argument_or_result(&self) -> bool { + self.results.num_lanes() == SimdLen::Scalable + || self .arguments .iter() - .filter_map(|arg| arg.constraint.as_ref()), - &mut Vec::new(), - &mut f, - ) + .any(|a| a.ty.num_lanes() == SimdLen::Scalable) } } diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs index ca5aeba86d..4c229eeba7 100644 --- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -1,6 +1,6 @@ use std::cmp; use std::fmt; -use std::ops::Deref; +use std::ops::DerefMut; use std::str::FromStr; #[derive(Debug, PartialEq, Copy, Clone)] @@ -90,9 +90,13 @@ impl TypeKind { } } - /// Returns the Rust prefix for this type kind i.e. `i`, `u`, or `f`. + /// Returns the Rust prefix for this type kind (i.e. `i` for `i16`, or `u` for `u16`). For type + /// kinds without any bit length at the end (e.g. `bool`), returns the whole type name. pub fn rust_prefix(&self) -> &str { match self { + Self::Bool => "bool", + Self::SvPattern => "svpattern", + Self::SvPrefetchOp => "svprfop", Self::BFloat => "bf", Self::Float => "f", Self::Int(Sign::Signed) => "i", @@ -101,7 +105,7 @@ impl TypeKind { Self::Char(Sign::Unsigned) => "u", Self::Char(Sign::Signed) => "i", Self::Mask => "u", - _ => unreachable!("Unused type kind: {self:#?}"), + _ => unreachable!("type kind without Rust prefix: {self:#?}"), } } } @@ -169,14 +173,8 @@ impl IntrinsicType { } /// Returns the number of lanes of the type - pub fn num_lanes(&self) -> u32 { - self.simd_len - .as_ref() - .map(|len| match len { - SimdLen::Scalable => unimplemented!(), - SimdLen::Fixed(len) => *len, - }) - .unwrap_or(1) + pub fn num_lanes(&self) -> SimdLen { + self.simd_len.unwrap_or(SimdLen::Fixed(1)) } /// Returns the number of vectors of the type @@ -195,9 +193,17 @@ impl IntrinsicType { } } -pub trait IntrinsicTypeDefinition: Deref { +pub trait TypeDefinition: Clone + DerefMut { /// Determines the load function for this type. - fn get_load_function(&self) -> String; + fn load_function(&self) -> String; + + /// Determines the comparison function for this type. + fn comparison_function(&self) -> String { + match self.num_lanes() { + SimdLen::Scalable => unimplemented!("architecture-specific"), + SimdLen::Fixed(num_lanes) => default_fixed_vector_comparison(self, num_lanes), + } + } /// Gets a string containing the typename for this type in C. fn c_type(&self) -> String; @@ -208,14 +214,55 @@ pub trait IntrinsicTypeDefinition: Deref { /// Gets a string containing the name of the scalar type corresponding to this type if it is a /// vector. fn rust_scalar_type(&self) -> String { - if self.is_simd() { - format!( - "{prefix}{bits}", - prefix = self.kind().rust_prefix(), - bits = self.inner_size() - ) - } else { - self.rust_type() - } + let mut ty = self.clone(); + ty.simd_len = None; + ty.vec_len = None; + ty.rust_type() } + + /// Gets a string containing the name of the scalar type corresponding to this type that should + /// be used as the element type for the test value array. + fn rust_scalar_type_for_test_value_array(&self) -> String { + self.rust_scalar_type() + } +} + +/// Returns the default comparison between results of an intrinsic - casting the vectors to arrays +/// and using `assert_eq` - using `NanEqF*` where required for floats. +pub(crate) fn default_fixed_vector_comparison( + ty: &Ty, + num_lanes: u32, +) -> String { + let (cast_prefix, cast_suffix) = if ty.is_simd() { + ( + format!( + "std::mem::transmute::<_, [{}; {}]>(", + ty.rust_scalar_type().replace("f", "NanEqF"), + num_lanes * ty.num_vectors() + ), + ")", + ) + } else if ty.kind == TypeKind::Float { + ( + match ty.inner_size() { + 16 => format!("NanEqF16("), + 32 => format!("NanEqF32("), + 64 => format!("NanEqF64("), + _ => unimplemented!(), + }, + ")", + ) + } else { + ("".to_string(), "") + }; + + format!( + r#" +assert_eq!( + {cast_prefix}__rust_return_value{cast_suffix}, + {cast_prefix}__c_return_value{cast_suffix}, + "{{id}}" +); +"#, + ) } diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index b577491454..bd1bc85fbf 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -5,12 +5,14 @@ use rayon::prelude::*; use cli::ProcessedCli; use crate::common::{ + argument::Argument, gen_c::write_wrapper_c, gen_rust::{ run_rustfmt, write_bin_cargo_toml, write_build_rs, write_lib_cargo_toml, write_lib_rs, }, intrinsic::Intrinsic, - intrinsic_helpers::IntrinsicTypeDefinition, + intrinsic_helpers::TypeDefinition, + values::test_values_array_name, }; pub mod argument; @@ -18,10 +20,16 @@ pub mod cli; pub mod constraint; pub mod intrinsic; pub mod intrinsic_helpers; +pub mod values; mod gen_c; mod gen_rust; -mod values; + +/// Many scalable intrinsics take a predicate argument and for the purposes of intrinsic testing, +/// a predicate that enables all lanes is used for all of these intrinsic calls (i.e. loading inputs, +/// result comparison, and the intrinsic under test). This constant defines the name of the local +/// variable that contains that predicate. +pub const PREDICATE_LOCAL: &'static str = "__pred"; // The number of times each intrinsic will be called - influences the generation of the // test arrays to minimise repeated testing of the same test values. @@ -29,21 +37,19 @@ pub(crate) const PASSES: u32 = 20; /// Architectures must support this trait /// to be successfully tested. -pub trait SupportedArchitectureTest { - type IntrinsicImpl: IntrinsicTypeDefinition + Sync; +pub trait SupportedArchitecture: Sized { + type Type: TypeDefinition + std::fmt::Debug + PartialEq + Sync; - fn intrinsics(&self) -> &[Intrinsic]; + fn intrinsics(&self) -> &[Intrinsic]; fn create(cli_options: &ProcessedCli) -> Self; const NOTICE: &str; - const PLATFORM_C_HEADERS: &[&str]; - - const PLATFORM_RUST_CFGS: &str; - const PLATFORM_RUST_DEFINITIONS: &str; + const C_PRELUDE: &str; + const RUST_PRELUDE: &str; - fn arch_flags(&self, cli_options: &ProcessedCli) -> Vec<&str>; + fn c_compiler_flags(&self, cli_options: &ProcessedCli) -> Vec<&str>; fn generate_c_file(&self) { let (max_chunk_size, _chunk_count) = manual_chunk(self.intrinsics().len()); @@ -55,14 +61,14 @@ pub trait SupportedArchitectureTest { .map(|(i, chunk)| { let c_filename = format!("c_programs/wrapper_{i}.c"); let mut file = File::create(&c_filename).unwrap(); - write_wrapper_c(&mut file, Self::NOTICE, Self::PLATFORM_C_HEADERS, chunk) + write_wrapper_c(&mut file, Self::NOTICE, Self::C_PRELUDE, chunk) }) .collect::>() .unwrap(); } fn generate_rust_file(&self, cli_options: &ProcessedCli) { - let arch_flags = self.arch_flags(cli_options); + let arch_flags = self.c_compiler_flags(cli_options); std::fs::create_dir_all("rust_programs").unwrap(); @@ -81,14 +87,7 @@ pub trait SupportedArchitectureTest { trace!("generating `{rust_filename}`"); let mut file = File::create(&rust_filename)?; - write_lib_rs( - &mut file, - Self::NOTICE, - Self::PLATFORM_RUST_CFGS, - Self::PLATFORM_RUST_DEFINITIONS, - i, - chunk, - )?; + write_lib_rs(&mut file, i, chunk)?; run_rustfmt(&rust_filename); let toml_filename = format!("rust_programs/mod_{i}/Cargo.toml"); @@ -109,6 +108,19 @@ pub trait SupportedArchitectureTest { .collect::>() .unwrap(); } + + /// Return a call to a intrinsic to generate a predicate, if reqd. + fn predicate_function(_: u32) -> String; + + /// Return a call loading `arg`. Can assume that `arg.is_simd()` holds. + fn load_call(arg: &Argument, idx: usize) -> String { + format!( + "let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);", + name = arg.generate_name(), + vals_name = test_values_array_name(&arg.ty), + load = arg.ty.load_function(), + ) + } } pub fn manual_chunk(intrinsic_count: usize) -> (usize, usize) { diff --git a/crates/intrinsic-test/src/common/values.rs b/crates/intrinsic-test/src/common/values.rs index 4c3dd078e0..d1f333fd19 100644 --- a/crates/intrinsic-test/src/common/values.rs +++ b/crates/intrinsic-test/src/common/values.rs @@ -2,7 +2,7 @@ use itertools::Itertools as _; use crate::common::{ PASSES, - intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, SimdLen, TypeKind}, + intrinsic_helpers::{IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind}, }; /// Maximum size of a SVE vector @@ -18,15 +18,15 @@ pub const MAX_SVE_BITS: u32 = 2048; /// 0x80, 0x3b, 0xff, /// ]; /// ``` -pub fn test_values_array_static( +pub fn test_values_array_static( w: &mut impl std::io::Write, ty: &T, ) -> std::io::Result<()> { writeln!( w, - "static {name}: [{ty}; {load_size}] = {values};\n", + "static {name}: [{ty}; {load_size}] = {values};", name = test_values_array_name(ty), - ty = ty.rust_scalar_type(), + ty = ty.rust_scalar_type_for_test_value_array(), load_size = test_values_array_length(&ty), values = test_values_array(&ty) ) @@ -34,7 +34,7 @@ pub fn test_values_array_static( /// Returns a string with the name of the static variable containing test values for intrinsic /// arguments of this type. -pub fn test_values_array_name(ty: &T) -> String { +pub fn test_values_array_name(ty: &T) -> String { format!( "{ty}_{load_size}", ty = ty.rust_scalar_type().to_uppercase(), @@ -50,8 +50,16 @@ pub fn test_values_array_name(ty: &T) -> String { /// which is then printed as a hex value in the generated code (and if identified as a negative /// value, with the appropriate minus and corrected hex pattern). Calls to `fN::from_bits` are /// generated for floats. +/// +/// An exception to the above is when `ty` is a boolean, where this function returns +/// `[true, false]` - as there are only ever two values for a boolean. This only works because the +/// generated accesses to the test value array is always modulo the length of the test value array. pub fn test_values_array(ty: &IntrinsicType) -> String { let (bit_len, kind) = match ty { + IntrinsicType { + kind: TypeKind::Bool, + .. + } => (1, TypeKind::Bool), IntrinsicType { kind: TypeKind::Float, bit_len: Some(bit_len), @@ -75,6 +83,9 @@ pub fn test_values_array(ty: &IntrinsicType) -> String { let src = bit_pattern_for_test_values_array(bit_len, i); assert!(src == 0 || src.ilog2() < bit_len); match kind { + TypeKind::Bool if ty.num_lanes() != SimdLen::Scalable => { + fmt(&format_args!("{}", if src == 1 { true } else { false })) + } TypeKind::Float => fmt(&format_args!("f{bit_len}::from_bits({src:#x})")), TypeKind::Vector | TypeKind::Int(Sign::Signed) if (src >> (bit_len - 1)) != 0 => { // `src` is a two's complement representation of a negative value. @@ -134,7 +145,8 @@ pub fn test_values_array_length(ty: &IntrinsicType) -> u32 { pub fn bit_pattern_for_test_values_array(bits: u32, index: u32) -> u64 { let index = index as usize; match bits { - bits @ (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8) => BIT_PATTERNS_8[index % (1 << bits)].into(), + 1 => BIT_PATTERNS_1[index % BIT_PATTERNS_1.len()].into(), + bits @ (2 | 3 | 4 | 5 | 6 | 7 | 8) => BIT_PATTERNS_8[index % (1 << bits)].into(), 16 => BIT_PATTERNS_16[index % BIT_PATTERNS_16.len()].into(), 32 => BIT_PATTERNS_32[index % BIT_PATTERNS_32.len()].into(), 64 => BIT_PATTERNS_64[index % BIT_PATTERNS_64.len()], @@ -142,6 +154,9 @@ pub fn bit_pattern_for_test_values_array(bits: u32, index: u32) -> u64 { } } +// Contains every possible 1-bit value in order +pub const BIT_PATTERNS_1: &[u8] = &[0x0, 0x1]; + // Contains every possible 8-bit value in order pub const BIT_PATTERNS_8: &[u8] = &[ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs index 4c0136041f..e25eb48a45 100644 --- a/crates/intrinsic-test/src/main.rs +++ b/crates/intrinsic-test/src/main.rs @@ -5,10 +5,10 @@ mod arm; mod common; mod x86; -use arm::ArmArchitectureTest; -use common::SupportedArchitectureTest; +use arm::Arm; +use common::SupportedArchitecture; use common::cli::{Cli, ProcessedCli}; -use x86::X86ArchitectureTest; +use x86::X86; fn main() { pretty_env_logger::init(); @@ -18,21 +18,15 @@ fn main() { if processed_cli_options.target.starts_with("arm") | processed_cli_options.target.starts_with("aarch64") { - run( - ArmArchitectureTest::create(&processed_cli_options), - processed_cli_options, - ) + run(Arm::create(&processed_cli_options), processed_cli_options) } else if processed_cli_options.target.starts_with("x86") { - run( - X86ArchitectureTest::create(&processed_cli_options), - processed_cli_options, - ) + run(X86::create(&processed_cli_options), processed_cli_options) } else { unimplemented!("Unsupported target {}", processed_cli_options.target) } } -fn run(test_environment: impl SupportedArchitectureTest, processed_cli_options: ProcessedCli) { +fn run(test_environment: impl SupportedArchitecture, processed_cli_options: ProcessedCli) { info!("building C binaries"); test_environment.generate_c_file(); diff --git a/crates/intrinsic-test/src/x86/config.rs b/crates/intrinsic-test/src/x86/config.rs deleted file mode 100644 index 68737ab5ac..0000000000 --- a/crates/intrinsic-test/src/x86/config.rs +++ /dev/null @@ -1,138 +0,0 @@ -pub const NOTICE: &str = "\ -// This is a transient test file, not intended for distribution. Some aspects of the -// test are derived from an XML specification, published under the same license as the -// `intrinsic-test` crate.\n"; - -pub const PLATFORM_RUST_DEFINITIONS: &str = r#" -use core_arch::arch::x86_64::*; - -#[inline] -unsafe fn _mm_loadu_ph_to___m128i(mem_addr: *const f16) -> __m128i { - _mm_castph_si128(_mm_loadu_ph(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_ph_to___m256i(mem_addr: *const f16) -> __m256i { - _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_ph_to___mm512i(mem_addr: *const f16) -> __m512i { - _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) -} - - -#[inline] -unsafe fn _mm_loadu_ps_to___m128h(mem_addr: *const f32) -> __m128h { - _mm_castps_ph(_mm_loadu_ps(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_ps_to___m256h(mem_addr: *const f32) -> __m256h { - _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_ps_to___m512h(mem_addr: *const f32) -> __m512h { - _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi16_to___m128d(mem_addr: *const i16) -> __m128d { - _mm_castsi128_pd(_mm_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi16_to___m256d(mem_addr: *const i16) -> __m256d { - _mm256_castsi256_pd(_mm256_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi16_to___m512d(mem_addr: *const i16) -> __m512d { - _mm512_castsi512_pd(_mm512_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi32_to___m128d(mem_addr: *const i32) -> __m128d { - _mm_castsi128_pd(_mm_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi32_to___m256d(mem_addr: *const i32) -> __m256d { - _mm256_castsi256_pd(_mm256_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi32_to___m512d(mem_addr: *const i32) -> __m512d { - _mm512_castsi512_pd(_mm512_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi64_to___m128d(mem_addr: *const i64) -> __m128d { - _mm_castsi128_pd(_mm_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi64_to___m256d(mem_addr: *const i64) -> __m256d { - _mm256_castsi256_pd(_mm256_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi64_to___m512d(mem_addr: *const i64) -> __m512d { - _mm512_castsi512_pd(_mm512_loadu_epi64(mem_addr)) -} - -// === -#[inline] -unsafe fn _mm_loadu_epi16_to___m128(mem_addr: *const i16) -> __m128 { - _mm_castsi128_ps(_mm_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi16_to___m256(mem_addr: *const i16) -> __m256 { - _mm256_castsi256_ps(_mm256_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi16_to___m512(mem_addr: *const i16) -> __m512 { - _mm512_castsi512_ps(_mm512_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi32_to___m128(mem_addr: *const i32) -> __m128 { - _mm_castsi128_ps(_mm_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi32_to___m256(mem_addr: *const i32) -> __m256 { - _mm256_castsi256_ps(_mm256_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi32_to___m512(mem_addr: *const i32) -> __m512 { - _mm512_castsi512_ps(_mm512_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi64_to___m128(mem_addr: *const i64) -> __m128 { - _mm_castsi128_ps(_mm_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi64_to___m256(mem_addr: *const i64) -> __m256 { - _mm256_castsi256_ps(_mm256_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 { - _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr)) -} - -"#; - -pub const PLATFORM_RUST_CFGS: &str = r#" -#![feature(stdarch_x86_avx512_bf16)] -#![feature(stdarch_x86_avx512_f16)] -#![feature(stdarch_x86_rtm)] -#![feature(x86_amx_intrinsics)] -"#; diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index 288bd8bdf8..36f4fee437 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -1,35 +1,38 @@ -mod config; mod constraint; mod intrinsic; mod types; mod xml_parser; -use crate::common::SupportedArchitectureTest; +use crate::common::SupportedArchitecture; use crate::common::cli::ProcessedCli; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use intrinsic::X86IntrinsicType; use xml_parser::get_xml_intrinsics; -pub struct X86ArchitectureTest { - intrinsics: Vec>, +pub struct X86 { + intrinsics: Vec>, } -impl SupportedArchitectureTest for X86ArchitectureTest { - type IntrinsicImpl = X86IntrinsicType; +impl SupportedArchitecture for X86 { + type Type = X86IntrinsicType; - fn intrinsics(&self) -> &[Intrinsic] { + fn intrinsics(&self) -> &[Intrinsic] { &self.intrinsics } - const NOTICE: &str = config::NOTICE; + const NOTICE: &str = r#" +// This is a transient test file, not intended for distribution. Some aspects of the +// test are derived from an XML specification, published under the same license as the +// `intrinsic-test` crate. +"#; - const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h"]; + const C_PRELUDE: &str = r#" +#include +"#; + const RUST_PRELUDE: &str = RUST_PRELUDE; - const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; - const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - - fn arch_flags(&self, _cli_options: &ProcessedCli) -> Vec<&str> { + fn c_compiler_flags(&self, _cli_options: &ProcessedCli) -> Vec<&str> { vec![ "-maes", "-mf16c", @@ -96,4 +99,139 @@ impl SupportedArchitectureTest for X86ArchitectureTest { Self { intrinsics } } + + fn predicate_function(_: u32) -> String { + unimplemented!("no scalable vectors on x86") + } +} + +const RUST_PRELUDE: &str = r#" +#![feature(stdarch_x86_avx512_bf16)] +#![feature(stdarch_x86_avx512_f16)] +#![feature(stdarch_x86_rtm)] +#![feature(x86_amx_intrinsics)] + +use core_arch::arch::x86_64::*; + +#[inline] +unsafe fn _mm_loadu_ph_to___m128i(mem_addr: *const f16) -> __m128i { + _mm_castph_si128(_mm_loadu_ph(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_ph_to___m256i(mem_addr: *const f16) -> __m256i { + _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_ph_to___mm512i(mem_addr: *const f16) -> __m512i { + _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) +} + + +#[inline] +unsafe fn _mm_loadu_ps_to___m128h(mem_addr: *const f32) -> __m128h { + _mm_castps_ph(_mm_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_ps_to___m256h(mem_addr: *const f32) -> __m256h { + _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_ps_to___m512h(mem_addr: *const f32) -> __m512h { + _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi16_to___m128d(mem_addr: *const i16) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi16_to___m256d(mem_addr: *const i16) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi16_to___m512d(mem_addr: *const i16) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi32_to___m128d(mem_addr: *const i32) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi32_to___m256d(mem_addr: *const i32) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi32_to___m512d(mem_addr: *const i32) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi64_to___m128d(mem_addr: *const i64) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi64_to___m256d(mem_addr: *const i64) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi64_to___m512d(mem_addr: *const i64) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi64(mem_addr)) +} + +// === +#[inline] +unsafe fn _mm_loadu_epi16_to___m128(mem_addr: *const i16) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi16_to___m256(mem_addr: *const i16) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi16_to___m512(mem_addr: *const i16) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi32_to___m128(mem_addr: *const i32) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi32_to___m256(mem_addr: *const i32) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi32_to___m512(mem_addr: *const i32) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi64_to___m128(mem_addr: *const i64) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi64_to___m256(mem_addr: *const i64) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr)) } +"#; diff --git a/crates/intrinsic-test/src/x86/types.rs b/crates/intrinsic-test/src/x86/types.rs index a0e14c77d6..2cba54a73f 100644 --- a/crates/intrinsic-test/src/x86/types.rs +++ b/crates/intrinsic-test/src/x86/types.rs @@ -3,12 +3,10 @@ use std::str::FromStr; use itertools::Itertools; use super::intrinsic::X86IntrinsicType; -use crate::common::intrinsic_helpers::{ - IntrinsicType, IntrinsicTypeDefinition, Sign, SimdLen, TypeKind, -}; +use crate::common::intrinsic_helpers::{IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind}; use crate::x86::xml_parser::Parameter; -impl IntrinsicTypeDefinition for X86IntrinsicType { +impl TypeDefinition for X86IntrinsicType { /// Gets a string containing the type in C format. /// This function assumes that this value is present in the metadata hashmap. fn c_type(&self) -> String { @@ -50,7 +48,7 @@ impl IntrinsicTypeDefinition for X86IntrinsicType { } /// Determines the load function for this type. - fn get_load_function(&self) -> String { + fn load_function(&self) -> String { let type_value = self.param.type_data.clone(); if type_value.len() == 0 { unimplemented!("the value for key 'type' is not present!"); diff --git a/crates/intrinsic-test/src/x86/xml_parser.rs b/crates/intrinsic-test/src/x86/xml_parser.rs index 6006d7919f..2296dffb64 100644 --- a/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/crates/intrinsic-test/src/x86/xml_parser.rs @@ -1,6 +1,7 @@ use crate::common::argument::{Argument, ArgumentList}; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; +use crate::x86::X86; use crate::x86::constraint::map_constraints; use regex::Regex; @@ -56,13 +57,13 @@ pub struct Parameter { pub fn get_xml_intrinsics( filename: &Path, -) -> Result>, Box> { +) -> Result>, Box> { let file = std::fs::File::open(filename)?; let reader = std::io::BufReader::new(file); let data: Data = quick_xml::de::from_reader(reader).expect("failed to deserialize the source XML file"); - let parsed_intrinsics: Vec> = data + let parsed_intrinsics: Vec> = data .intrinsics .into_iter() .filter(|intrinsic| { @@ -84,9 +85,7 @@ pub fn get_xml_intrinsics( Ok(parsed_intrinsics) } -fn xml_to_intrinsic( - intr: XMLIntrinsic, -) -> Result, Box> { +fn xml_to_intrinsic(intr: XMLIntrinsic) -> Result, Box> { let name = intr.name; let result = X86IntrinsicType::from_param(&intr.return_data); let args_check = intr.parameters.into_iter().enumerate().map(|(i, param)| { @@ -100,13 +99,13 @@ fn xml_to_intrinsic( param.imm_width }; let constraint = map_constraints(&name, ¶m.imm_type, effective_imm_width); - let arg = Argument::::new( + Some(Argument::::new( i, param.var_name.clone(), ty.unwrap(), constraint, - ); - Some(arg) + false, + )) } }); @@ -125,8 +124,8 @@ fn xml_to_intrinsic( // if one of the args has etype="MASK" and type="__md", // then set the bit_len and simd_len accordingly let re = Regex::new(r"__m\d+").unwrap(); - let is_mask = |arg: &Argument| arg.ty.param.etype.as_str() == "MASK"; - let is_vector = |arg: &Argument| re.is_match(arg.ty.param.type_data.as_str()); + let is_mask = |arg: &Argument| arg.ty.param.etype.as_str() == "MASK"; + let is_vector = |arg: &Argument| re.is_match(arg.ty.param.type_data.as_str()); let pos = args_test.position(|arg| is_mask(arg) && is_vector(arg)); if let Some(index) = pos { args[index].ty.bit_len = args[0].ty.bit_len; @@ -134,7 +133,7 @@ fn xml_to_intrinsic( args.iter_mut().for_each(|arg| arg.ty.update_simd_len()); - let arguments = ArgumentList:: { args }; + let arguments = ArgumentList:: { args }; if let Err(message) = result { return Err(Box::from(message)); @@ -145,5 +144,6 @@ fn xml_to_intrinsic( arguments, results: result.unwrap(), arch_tags: intr.cpuid, + extension: intr.tech, }) } diff --git a/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml index 1f65732412..138d5ba311 100644 --- a/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml @@ -1021,7 +1021,7 @@ intrinsics: doc: Interleave elements from low halves of two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [zip1] compose: - LLVMLink: { name: "zip1.{sve_type}" } @@ -1031,10 +1031,13 @@ intrinsics: doc: Interleave elements from low halves of two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [zip1] compose: - - LLVMLink: { name: "zip1.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.zip1.b{size}" + arguments: ["op1: svbool_t", "op2: svbool_t"] + return_type: "svbool_t" - name: svzip1q[_{type}] attr: [*sve-unstable] @@ -1052,7 +1055,7 @@ intrinsics: doc: Interleave elements from high halves of two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [zip2] compose: - LLVMLink: { name: "zip2.{sve_type}" } @@ -1062,10 +1065,13 @@ intrinsics: doc: Interleave elements from high halves of two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [zip2] compose: - - LLVMLink: { name: "zip2.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.zip2.b{size}" + arguments: ["op1: svbool_t", "op2: svbool_t"] + return_type: "svbool_t" - name: svzip2q[_{type}] attr: [*sve-unstable] @@ -1083,7 +1089,7 @@ intrinsics: doc: Concatenate even elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [uzp1] compose: - LLVMLink: { name: "uzp1.{sve_type}" } @@ -1093,10 +1099,13 @@ intrinsics: doc: Concatenate even elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [uzp1] compose: - - LLVMLink: { name: "uzp1.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.uzp1.b{size}" + arguments: ["op1: svbool_t", "op2: svbool_t"] + return_type: "svbool_t" - name: svuzp1q[_{type}] attr: [*sve-unstable] @@ -1114,7 +1123,7 @@ intrinsics: doc: Concatenate odd elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [uzp2] compose: - LLVMLink: { name: "uzp2.{sve_type}" } @@ -1124,10 +1133,13 @@ intrinsics: doc: Concatenate odd elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [uzp2] compose: - - LLVMLink: { name: "uzp2.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.uzp2.b{size}" + arguments: ["op1: svbool_t", "op2: svbool_t"] + return_type: "svbool_t" - name: svuzp2q[_{type}] attr: [*sve-unstable] @@ -1207,7 +1219,7 @@ intrinsics: doc: Reverse all elements arguments: ["op: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [rev] compose: - LLVMLink: { name: "llvm.vector.reverse.{sve_type}" } @@ -1217,10 +1229,13 @@ intrinsics: doc: Reverse all elements arguments: ["op: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [rev] compose: - - LLVMLink: { name: "llvm.vector.reverse.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.rev.b{size}" + arguments: ["op: svbool_t"] + return_type: "svbool_t" - name: svrevb[_{type}]{_mxz} attr: [*sve-unstable] diff --git a/crates/stdarch-gen-arm/src/intrinsic.rs b/crates/stdarch-gen-arm/src/intrinsic.rs index 72fb97fee1..f96f05dfec 100644 --- a/crates/stdarch-gen-arm/src/intrinsic.rs +++ b/crates/stdarch-gen-arm/src/intrinsic.rs @@ -1604,6 +1604,7 @@ impl Intrinsic { (Some(BaseTypeKind::Float), Some(BaseTypeKind::Float)) => ex, (Some(BaseTypeKind::UInt), Some(BaseTypeKind::UInt)) => ex, (Some(BaseTypeKind::Poly), Some(BaseTypeKind::Poly)) => ex, + (Some(BaseTypeKind::Bool), Some(BaseTypeKind::Bool)) => ex, (None, None) => ex, _ => unreachable!( diff --git a/intrinsics_data/arm_intrinsics.json b/intrinsics_data/arm_intrinsics.json index fab6da7f2c..7f749fe4d8 100644 --- a/intrinsics_data/arm_intrinsics.json +++ b/intrinsics_data/arm_intrinsics.json @@ -51174,6 +51174,12 @@ "svfloat16x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svfloat16_t" }, @@ -51188,6 +51194,12 @@ "svfloat32x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svfloat32_t" }, @@ -51202,6 +51214,12 @@ "svfloat64x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svfloat64_t" }, @@ -51216,6 +51234,12 @@ "svint16x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint16_t" }, @@ -51230,6 +51254,12 @@ "svint32x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint32_t" }, @@ -51244,6 +51274,12 @@ "svint64x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint64_t" }, @@ -51258,6 +51294,12 @@ "svint8x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint8_t" }, @@ -51272,6 +51314,12 @@ "svuint16x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint16_t" }, @@ -51286,6 +51334,12 @@ "svuint32x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint32_t" }, @@ -51300,6 +51354,12 @@ "svuint64x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint64_t" }, @@ -51314,6 +51374,12 @@ "svuint8x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint8_t" }, @@ -51328,6 +51394,12 @@ "svfloat16x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svfloat16_t" }, @@ -51342,6 +51414,12 @@ "svfloat32x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svfloat32_t" }, @@ -51356,6 +51434,12 @@ "svfloat64x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svfloat64_t" }, @@ -51370,6 +51454,12 @@ "svint16x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint16_t" }, @@ -51384,6 +51474,12 @@ "svint32x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint32_t" }, @@ -51398,6 +51494,12 @@ "svint64x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint64_t" }, @@ -51412,6 +51514,12 @@ "svint8x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint8_t" }, @@ -51426,6 +51534,12 @@ "svuint16x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint16_t" }, @@ -51440,6 +51554,12 @@ "svuint32x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint32_t" }, @@ -51454,6 +51574,12 @@ "svuint64x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint64_t" }, @@ -51468,6 +51594,12 @@ "svuint8x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint8_t" }, @@ -51482,6 +51614,12 @@ "svfloat16x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svfloat16_t" }, @@ -51496,6 +51634,12 @@ "svfloat32x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svfloat32_t" }, @@ -51510,6 +51654,12 @@ "svfloat64x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svfloat64_t" }, @@ -51524,6 +51674,12 @@ "svint16x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint16_t" }, @@ -51538,6 +51694,12 @@ "svint32x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint32_t" }, @@ -51552,6 +51714,12 @@ "svint64x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint64_t" }, @@ -51566,6 +51734,12 @@ "svint8x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint8_t" }, @@ -51580,6 +51754,12 @@ "svuint16x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint16_t" }, @@ -51594,6 +51774,12 @@ "svuint32x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint32_t" }, @@ -51608,6 +51794,12 @@ "svuint64x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint64_t" }, @@ -51622,6 +51814,12 @@ "svuint8x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint8_t" }, @@ -162999,6 +163197,12 @@ "uint64_t imm_index", "svfloat16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svfloat16x2_t" }, @@ -163014,6 +163218,12 @@ "uint64_t imm_index", "svfloat32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svfloat32x2_t" }, @@ -163029,6 +163239,12 @@ "uint64_t imm_index", "svfloat64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svfloat64x2_t" }, @@ -163044,6 +163260,12 @@ "uint64_t imm_index", "svint16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint16x2_t" }, @@ -163059,6 +163281,12 @@ "uint64_t imm_index", "svint32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint32x2_t" }, @@ -163074,6 +163302,12 @@ "uint64_t imm_index", "svint64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint64x2_t" }, @@ -163089,6 +163323,12 @@ "uint64_t imm_index", "svint8_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint8x2_t" }, @@ -163104,6 +163344,12 @@ "uint64_t imm_index", "svuint16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint16x2_t" }, @@ -163119,6 +163365,12 @@ "uint64_t imm_index", "svuint32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint32x2_t" }, @@ -163134,6 +163386,12 @@ "uint64_t imm_index", "svuint64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint64x2_t" }, @@ -163149,6 +163407,12 @@ "uint64_t imm_index", "svuint8_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint8x2_t" }, @@ -163164,6 +163428,12 @@ "uint64_t imm_index", "svfloat16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svfloat16x3_t" }, @@ -163179,6 +163449,12 @@ "uint64_t imm_index", "svfloat32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svfloat32x3_t" }, @@ -163194,6 +163470,12 @@ "uint64_t imm_index", "svfloat64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svfloat64x3_t" }, @@ -163209,6 +163491,12 @@ "uint64_t imm_index", "svint16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint16x3_t" }, @@ -163224,6 +163512,12 @@ "uint64_t imm_index", "svint32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint32x3_t" }, @@ -163239,6 +163533,12 @@ "uint64_t imm_index", "svint64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint64x3_t" }, @@ -163254,6 +163554,12 @@ "uint64_t imm_index", "svint8_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint8x3_t" }, @@ -163269,6 +163575,12 @@ "uint64_t imm_index", "svuint16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint16x3_t" }, @@ -163284,6 +163596,12 @@ "uint64_t imm_index", "svuint32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint32x3_t" }, @@ -163299,6 +163617,12 @@ "uint64_t imm_index", "svuint64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint64x3_t" }, @@ -163314,6 +163638,12 @@ "uint64_t imm_index", "svuint8_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint8x3_t" }, @@ -163329,6 +163659,12 @@ "uint64_t imm_index", "svfloat16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svfloat16x4_t" }, @@ -163344,6 +163680,12 @@ "uint64_t imm_index", "svfloat32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svfloat32x4_t" }, @@ -163359,6 +163701,12 @@ "uint64_t imm_index", "svfloat64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svfloat64x4_t" }, @@ -163374,6 +163722,12 @@ "uint64_t imm_index", "svint16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint16x4_t" }, @@ -163389,6 +163743,12 @@ "uint64_t imm_index", "svint32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint32x4_t" }, @@ -163404,6 +163764,12 @@ "uint64_t imm_index", "svint64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint64x4_t" }, @@ -163419,6 +163785,12 @@ "uint64_t imm_index", "svint8_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint8x4_t" }, @@ -163434,6 +163806,12 @@ "uint64_t imm_index", "svuint16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint16x4_t" }, @@ -163449,6 +163827,12 @@ "uint64_t imm_index", "svuint32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint32x4_t" }, @@ -163464,6 +163848,12 @@ "uint64_t imm_index", "svuint64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint64x4_t" }, @@ -163479,6 +163869,12 @@ "uint64_t imm_index", "svuint8_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint8x4_t" },