From 1dd1cccb6762e5e874cbaf49feb16118c7d0a75c Mon Sep 17 00:00:00 2001 From: David Wood Date: Wed, 20 May 2026 12:12:58 +0000 Subject: [PATCH 01/32] intrinsic-test: remove `concatln!` This macro isn't necessary and just makes the generated code being written harder to read compared to multi-line strings. --- crates/intrinsic-test/src/common/gen_rust.rs | 180 +++++++++---------- 1 file changed, 84 insertions(+), 96 deletions(-) diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 02f6e40dc0..132ede8d9b 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -32,12 +32,6 @@ macro_rules! wrap_partialeq { wrap_partialeq!(NanEqF16(f16), NanEqF32(f32), NanEqF64(f64)); "#; -macro_rules! concatln { - ($($lines:expr),* $(,)?) => { - concat!($( $lines, "\n" ),*) - }; -} - /// Run rustfmt on the generated source code pub fn run_rustfmt(source_path: &str) { let output = Command::new("rustfmt") @@ -65,11 +59,14 @@ pub fn write_bin_cargo_toml( w: &mut impl std::io::Write, module_count: usize, ) -> std::io::Result<()> { - write!(w, concatln!("[workspace]", "members = ["))?; - for i in 0..module_count { - writeln!(w, " \"mod_{i}\",")?; - } - writeln!(w, "]") + write!( + w, + r#" +[workspace] +members = [{members}] +"#, + members = (0..module_count).format_with(",", |i, fmt| fmt(&format_args!("\"mod_{i}\""))) + ) } /// Writes a `Cargo.toml` for a crate with name `name` to `w` that will contain a single Rust source @@ -77,21 +74,20 @@ pub fn write_bin_cargo_toml( pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { write!( w, - concatln!( - "[package]", - "name = \"{name}\"", - "version = \"{version}\"", - "authors = [{authors}]", - "license = \"{license}\"", - "edition = \"2018\"", - "", - "[dependencies]", - "core_arch = {{ path = \"../../crates/core_arch\" }}", - "", - "[build-dependencies]", - "cc = \"1\"" - ), - name = name, + r#" +[package] +name = "{name}" +version = "{version}" +authors = [{authors}] +license = "{license}" +edition = "2018" + +[dependencies] +core_arch = {{ path = "../../crates/core_arch" }} + +[build-dependencies] +cc = "1" +"#, version = env!("CARGO_PKG_VERSION"), authors = env!("CARGO_PKG_AUTHORS") .split(":") @@ -110,22 +106,25 @@ pub fn write_lib_rs( i: usize, intrinsics: &[Intrinsic], ) -> std::io::Result<()> { - write!(w, "{notice}")?; - - writeln!(w, "#![feature(simd_ffi)]")?; - writeln!(w, "#![feature(f16)]")?; - writeln!(w, "#![allow(unused)]")?; - - // Cargo will spam the logs if these warnings are not silenced. - writeln!(w, "#![allow(non_upper_case_globals)]")?; - writeln!(w, "#![allow(non_camel_case_types)]")?; - writeln!(w, "#![allow(non_snake_case)]")?; - - writeln!(w, "{cfg}")?; - - writeln!(w, "{}", COMMON_RUST_DEFINITIONS)?; - - writeln!(w, "{definitions}")?; + writeln!( + w, + r#" +{notice} +#![feature(simd_ffi)] +#![feature(f16)] +#![allow(unused)] + +// Cargo will spam the logs if these warnings are not silenced. +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +{cfg} +{COMMON_RUST_DEFINITIONS} + +{definitions} +"# + )?; let mut seen = std::collections::HashSet::new(); @@ -224,28 +223,25 @@ fn generate_rust_test_loop( write!( w, - concatln!( - " for (id, rust, c) in specializations {{", - " for i in 0..{passes} {{", - " unsafe {{", - "{loaded_args}", - " let __rust_return_value = rust({rust_args});", - "", - " let mut __c_return_value = std::mem::MaybeUninit::uninit();", - " c(__c_return_value.as_mut_ptr(){c_args});", - " let __c_return_value = __c_return_value.assume_init();", - "", - " assert_eq!({cast_prefix}__rust_return_value{cast_suffix}, {cast_prefix}__c_return_value{cast_suffix}, \"{{id}}\");", - " }}", - " }}", - " }}", - ), + r#" +for (id, rust, c) in specializations {{ + for i in 0..{PASSES} {{ + unsafe {{ + {loaded_args} + let __rust_return_value = rust({rust_args}); + + let mut __c_return_value = std::mem::MaybeUninit::uninit(); + c(__c_return_value.as_mut_ptr(){c_args}); + let __c_return_value = __c_return_value.assume_init(); + + assert_eq!({cast_prefix}__rust_return_value{cast_suffix}, {cast_prefix}__c_return_value{cast_suffix}, "{{id}}"); + }} + }} +}} +"#, loaded_args = intrinsic.arguments.load_values_rust(), rust_args = intrinsic.arguments.as_call_param_rust(), c_args = intrinsic.arguments.as_c_call_param_rust(), - passes = PASSES, - cast_prefix = cast_prefix, - cast_suffix = cast_suffix, ) } @@ -259,7 +255,10 @@ fn create_rust_test( write!( w, - concatln!("#[test]", "fn test_{intrinsic_name}() {{"), + r#" +#[test] +fn test_{intrinsic_name}() {{ +"#, intrinsic_name = intrinsic.name, )?; @@ -279,19 +278,18 @@ pub fn write_bindings_rust( ) -> std::io::Result<()> { write!( w, - concatln!( - "#[allow(improper_ctypes)]", - "#[link(name = \"wrapper_{i}\")]", - "unsafe extern \"C\" {{" - ), - i = i + r#" +#[allow(improper_ctypes)] +#[link(name = "wrapper_{i}")] +unsafe extern "C" {{ +"#, )?; for intrinsic in intrinsics { intrinsic.iter_specializations(|imm_values| { writeln!( w, - " fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});", + "fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});", return_ty = intrinsic.results.rust_type(), name = intrinsic.name, imm_arglist = imm_values @@ -326,32 +324,22 @@ pub fn write_build_rs( write!( w, - concatln!( - "fn main() {{", - " cc::Build::new()", - " .file(\"../../c_programs/wrapper_{i}.c\")", - " .opt_level(2)", - " .flags(&[", - ), - i = i - )?; - - let compiler_specific_flags = match cli_options.cc_arg_style { - CcArgStyle::Gcc => GCC_FLAGS, - CcArgStyle::Clang => CLANG_FLAGS, - }; - - for flag in COMMON_FLAGS - .iter() - .chain(compiler_specific_flags) - .chain(arch_flags) - { - writeln!(w, "\"{flag}\",")?; - } - - write!( - w, - concatln!(" ])", " .compile(\"wrapper_{i}\");", "}}"), - i = i + r#" +fn main() {{ + cc::Build::new() + .file("../../c_programs/wrapper_{i}.c") + .opt_level(2) + .flags(&[{flags}]) + .compile("wrapper_{i}"); +}} +"#, + flags = COMMON_FLAGS + .iter() + .chain(match cli_options.cc_arg_style { + CcArgStyle::Gcc => GCC_FLAGS, + CcArgStyle::Clang => CLANG_FLAGS, + }) + .chain(arch_flags) + .format_with(",", |flag, fmt| fmt(&format_args!("\"{flag}\""))), ) } From b89077b7cf287bbff980cae2074140ca39912256 Mon Sep 17 00:00:00 2001 From: David Wood Date: Wed, 20 May 2026 12:31:56 +0000 Subject: [PATCH 02/32] intrinsic-test: specializations as iterator Replacing `iter_specializations` (which repeatedly invokes a callback) with an iterator implementation allows `Itertools::format_with` to be used more broadly, which in turn allows disparate string interpolation to be combined and hopefully provide greater context to the reader. --- .../intrinsic-test/src/common/constraint.rs | 55 +++++++++--- crates/intrinsic-test/src/common/gen_c.rs | 57 +++++++------ crates/intrinsic-test/src/common/gen_rust.rs | 84 +++++++++---------- crates/intrinsic-test/src/common/intrinsic.rs | 41 ++------- 4 files changed, 123 insertions(+), 114 deletions(-) diff --git a/crates/intrinsic-test/src/common/constraint.rs b/crates/intrinsic-test/src/common/constraint.rs index ab52d866ab..c7d37da2ad 100644 --- a/crates/intrinsic-test/src/common/constraint.rs +++ b/crates/intrinsic-test/src/common/constraint.rs @@ -1,6 +1,7 @@ -use serde::Deserialize; use std::ops::Range; +use serde::Deserialize; + /// Describes the values to test for a const generic parameter #[derive(Debug, PartialEq, Clone, Deserialize)] pub enum Constraint { @@ -23,21 +24,53 @@ pub enum Constraint { SvImmRotationAdd, } -impl Constraint { - /// Returns an iterator over the values of this constraint - pub fn iter(&self) -> Box + '_> { +/// Workaround to enable the `Constraint::into_iter` to return an iterator that implements `Clone`, +/// so that it can be used with `Itertools::multi_cartesian_product`. +/// +/// With the different iterator types, returning `Box + '_>` would the +/// idiomatic approach, but this can't be made to implement `Clone`. Given the limited number +/// of iterator types used and their relative lack of complexity, wrapping them all in an enum isn't +/// too bad. +#[derive(Clone)] +pub enum ConstraintIterator<'a> { + Once(std::iter::Once), + Range(std::ops::Range), + Copied(std::iter::Copied>), + Chain(std::iter::Chain, std::ops::RangeInclusive>), + StepBy(std::iter::StepBy>), +} + +impl<'a> Iterator for ConstraintIterator<'a> { + type Item = i64; + + fn next(&mut self) -> Option { + match self { + ConstraintIterator::Once(once) => once.next(), + ConstraintIterator::Range(range) => range.next(), + ConstraintIterator::Copied(copied) => copied.next(), + ConstraintIterator::Chain(chain) => chain.next(), + ConstraintIterator::StepBy(step_by) => step_by.next(), + } + } +} + +impl<'a> IntoIterator for &'a Constraint { + type Item = i64; + type IntoIter = ConstraintIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { match self { - Constraint::Equal(i) => Box::new(std::iter::once(*i)), - Constraint::Range(range) => Box::new(range.clone()), - Constraint::Set(items) => Box::new(items.iter().copied().chain(Range::default())), + Constraint::Equal(i) => ConstraintIterator::Once(std::iter::once(*i)), + Constraint::Range(range) => ConstraintIterator::Range(range.clone()), + Constraint::Set(items) => ConstraintIterator::Copied(items.iter().copied()), // These values are discriminants of the `svpattern` enum - Constraint::SvPattern => Box::new((0..=13).chain(29..=31)), + Constraint::SvPattern => ConstraintIterator::Chain((0..=13).chain(29..=31)), // These values are discriminants of the `svprfop` enum - Constraint::SvPrefetchOp => Box::new((0..=5).chain(8..=14)), + Constraint::SvPrefetchOp => ConstraintIterator::Chain((0..=5).chain(8..=14)), // Valid rotations for intrinsics operating on complex pairs: 0, 90, 180, 270 - Constraint::SvImmRotation => Box::new((0..=270).step_by(90)), + Constraint::SvImmRotation => ConstraintIterator::StepBy((0..=270).step_by(90)), // Valid rotations for `svcadd` and `svqcadd`: 0, 270 - Constraint::SvImmRotationAdd => Box::new((90..=270).step_by(180)), + Constraint::SvImmRotationAdd => ConstraintIterator::StepBy((90..=270).step_by(180)), } } } diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index 24756324c4..6cd7c6f03e 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -20,33 +20,40 @@ pub fn write_wrapper_c( platform_headers: &[&str], intrinsics: &[Intrinsic], ) -> std::io::Result<()> { - write!(w, "{notice}")?; + write!( + w, + r#" +{notice} +#include +#include +{platform_headers} - writeln!(w, "#include ")?; - writeln!(w, "#include ")?; - - for header in platform_headers { - writeln!(w, "#include <{header}>")?; - } - - for intrinsic in intrinsics { - intrinsic.iter_specializations(|imm_values| { - writeln!( - w, - " +{intrinsics} +"#, + platform_headers = + platform_headers + .iter() + .format_with("\n", |header, fmt| fmt(&format_args!( + "#include <{header}>" + ))), + intrinsics = intrinsics.iter().format_with("", |intrinsic, fmt| { + fmt(&intrinsic + .specializations() + .format_with("\n", |imm_values, fmt| { + fmt(&format_args!( + " void {name}_wrapper{imm_arglist}({return_ty}* __dst{arglist}) {{ *__dst = {name}({params}); }}", - return_ty = intrinsic.results.c_type(), - name = intrinsic.name, - imm_arglist = imm_values - .iter() - .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), - arglist = intrinsic.arguments.as_non_imm_arglist_c(), - params = intrinsic.arguments.as_call_params_c(&imm_values) - ) - })?; - } - - Ok(()) + return_ty = intrinsic.results.c_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_c(), + params = intrinsic.arguments.as_call_params_c(&imm_values) + )) + })) + }), + ) } diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 132ede8d9b..f2ab2b69ed 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -172,32 +172,6 @@ fn generate_rust_test_loop( coerce += ") -> _"; c_coerce += ")"; - if intrinsic - .arguments - .iter() - .filter(|arg| arg.has_constraint()) - .count() - == 0 - { - writeln!( - w, - " let specializations = [(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)];" - )?; - } else { - writeln!(w, " let specializations = [")?; - - intrinsic.iter_specializations(|imm_values| { - writeln!( - w, - " (\"{const_args}\", {intrinsic_name}::<{const_args}> as unsafe {coerce}, {intrinsic_name}_wrapper_{c_const_args} as unsafe extern \"C\" {c_coerce}),", - const_args = imm_values.iter().join(","), - c_const_args = imm_values.iter().join("_"), - ) - })?; - - writeln!(w, " ];")?; - } - let (cast_prefix, cast_suffix) = if intrinsic.results.is_simd() { ( format!( @@ -224,6 +198,7 @@ fn generate_rust_test_loop( write!( w, r#" +let specializations = [{specializations}]; for (id, rust, c) in specializations {{ for i in 0..{PASSES} {{ unsafe {{ @@ -239,6 +214,27 @@ for (id, rust, c) in specializations {{ }} }} "#, + specializations = intrinsic + .specializations() + .format_with(",", |imm_values, fmt| { + if imm_values.is_empty() { + fmt(&format_args!( + "(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)" + )) + } else { + fmt(&format_args!( + r#" + ( + "{const_args}", + {intrinsic_name}::<{const_args}> as unsafe {coerce}, + {intrinsic_name}_wrapper_{c_const_args} as unsafe extern "C" {c_coerce} + ) + "#, + const_args = imm_values.iter().join(","), + c_const_args = imm_values.iter().join("_"), + )) + } + }), loaded_args = intrinsic.arguments.load_values_rust(), rust_args = intrinsic.arguments.as_call_param_rust(), c_args = intrinsic.arguments.as_c_call_param_rust(), @@ -282,25 +278,25 @@ pub fn write_bindings_rust( #[allow(improper_ctypes)] #[link(name = "wrapper_{i}")] unsafe extern "C" {{ + {definitions} +}} "#, - )?; - - for intrinsic in intrinsics { - intrinsic.iter_specializations(|imm_values| { - writeln!( - w, - "fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});", - return_ty = intrinsic.results.rust_type(), - name = intrinsic.name, - imm_arglist = imm_values - .iter() - .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), - arglist = intrinsic.arguments.as_non_imm_arglist_rust(), - ) - })?; - } - - writeln!(w, "}}") + definitions = intrinsics.iter().format_with("", |intrinsic, fmt| { + fmt(&intrinsic + .specializations() + .format_with("\n", |imm_values, fmt| { + fmt(&format_args!( + "fn {name}_wrapper{imm_arglist}(__dst: *mut {return_ty}{arglist});", + return_ty = intrinsic.results.rust_type(), + name = intrinsic.name, + imm_arglist = imm_values + .iter() + .format_with("", |i, fmt| fmt(&format_args!("_{i}"))), + arglist = intrinsic.arguments.as_non_imm_arglist_rust(), + )) + })) + }) + ) } /// Writes a `build.rs` into `w` for each test crate that compiles the corresponding C source code diff --git a/crates/intrinsic-test/src/common/intrinsic.rs b/crates/intrinsic-test/src/common/intrinsic.rs index d69644388a..f7eb462068 100644 --- a/crates/intrinsic-test/src/common/intrinsic.rs +++ b/crates/intrinsic-test/src/common/intrinsic.rs @@ -1,4 +1,4 @@ -use crate::common::constraint::Constraint; +use itertools::Itertools; use super::argument::ArgumentList; use super::intrinsic_helpers::IntrinsicTypeDefinition; @@ -19,27 +19,6 @@ pub struct Intrinsic { pub arch_tags: Vec, } -/// Invokes `f` for each combination of the values in the constraint ranges. -/// -/// For example, given `constraints=[Equal(0), Range(1..2), Set([3, 4])]` and `imm_values=[]`, this -/// produces the four calls to `f`: `f([0, 1, 3])`, `f([0, 1, 4])`, `f([0, 2, 3])`, `f([0, 2, 4])`. -fn recurse_specializations<'a, E>( - constraints: &mut (impl Iterator + Clone), - imm_values: &mut Vec, - f: &mut impl FnMut(&[i64]) -> Result<(), E>, -) -> Result<(), E> { - if let Some(current) = constraints.next() { - for i in current.iter() { - imm_values.push(i); - recurse_specializations(&mut constraints.clone(), imm_values, f)?; - imm_values.pop(); - } - Ok(()) - } else { - f(&imm_values) - } -} - impl Intrinsic { /// Invokes `f` for "specialisation" of the intrinsic - a specific instantiation of the /// constant generics of the intrinsic. `f` takes a slice where the `i`th element corresponds @@ -48,17 +27,11 @@ impl Intrinsic { /// For an intrinsic with three arguments with constraints `Equal(0)`, `Range(1..2)`, /// `Set([3, 4])` respectively, this would produce four calls to `f`: `f(0, 1, 3)`, /// `f(0, 1, 4)`, `f(0, 2, 3)`, `f(0, 2, 4)`. - pub fn iter_specializations( - &self, - mut f: impl FnMut(&[i64]) -> Result<(), E>, - ) -> Result<(), E> { - recurse_specializations( - &mut self - .arguments - .iter() - .filter_map(|arg| arg.constraint.as_ref()), - &mut Vec::new(), - &mut f, - ) + pub fn specializations(&self) -> impl Iterator> { + self.arguments + .iter() + .filter_map(|arg| arg.constraint.as_ref()) + .map(|constraint| constraint.into_iter()) + .multi_cartesian_product() } } From ad492c9db38a621f353682e02a01433ff2adbd1c Mon Sep 17 00:00:00 2001 From: David Wood Date: Tue, 2 Jun 2026 12:39:02 +0000 Subject: [PATCH 03/32] intrinsic-test: shorten various type names This isn't strictly necessary but these type names were longer than they needed to be. --- crates/intrinsic-test/src/arm/intrinsic.rs | 6 +++--- crates/intrinsic-test/src/arm/json_parser.rs | 18 +++++++----------- crates/intrinsic-test/src/arm/mod.rs | 18 ++++++++---------- crates/intrinsic-test/src/arm/types.rs | 8 +++----- crates/intrinsic-test/src/common/argument.rs | 10 +++++----- crates/intrinsic-test/src/common/gen_c.rs | 4 ++-- crates/intrinsic-test/src/common/gen_rust.rs | 10 +++++----- crates/intrinsic-test/src/common/intrinsic.rs | 6 +++--- .../src/common/intrinsic_helpers.rs | 2 +- crates/intrinsic-test/src/common/mod.rs | 8 ++++---- crates/intrinsic-test/src/common/values.rs | 6 +++--- crates/intrinsic-test/src/main.rs | 18 ++++++------------ crates/intrinsic-test/src/x86/mod.rs | 8 ++++---- crates/intrinsic-test/src/x86/types.rs | 6 ++---- 14 files changed, 56 insertions(+), 72 deletions(-) diff --git a/crates/intrinsic-test/src/arm/intrinsic.rs b/crates/intrinsic-test/src/arm/intrinsic.rs index a54e585719..bcbee3503b 100644 --- a/crates/intrinsic-test/src/arm/intrinsic.rs +++ b/crates/intrinsic-test/src/arm/intrinsic.rs @@ -2,9 +2,9 @@ use crate::common::intrinsic_helpers::IntrinsicType; use std::ops::{Deref, DerefMut}; #[derive(Debug, Clone, PartialEq)] -pub struct ArmIntrinsicType(pub IntrinsicType); +pub struct ArmType(pub IntrinsicType); -impl Deref for ArmIntrinsicType { +impl Deref for ArmType { type Target = IntrinsicType; fn deref(&self) -> &Self::Target { @@ -12,7 +12,7 @@ impl Deref for ArmIntrinsicType { } } -impl DerefMut for ArmIntrinsicType { +impl DerefMut for ArmType { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } diff --git a/crates/intrinsic-test/src/arm/json_parser.rs b/crates/intrinsic-test/src/arm/json_parser.rs index 06cf78a422..9ca604a884 100644 --- a/crates/intrinsic-test/src/arm/json_parser.rs +++ b/crates/intrinsic-test/src/arm/json_parser.rs @@ -1,4 +1,4 @@ -use super::intrinsic::ArmIntrinsicType; +use super::intrinsic::ArmType; use crate::arm::types::parse_intrinsic_type; use crate::common::argument::{Argument, ArgumentList}; use crate::common::constraint::Constraint; @@ -59,7 +59,7 @@ struct JsonIntrinsic { pub fn get_neon_intrinsics( filename: &Path, -) -> Result>, Box> { +) -> Result>, Box> { let file = std::fs::File::open(filename)?; let reader = std::io::BufReader::new(file); let json: Vec = serde_json::from_reader(reader).expect("Couldn't parse JSON"); @@ -79,10 +79,10 @@ pub fn get_neon_intrinsics( fn json_to_intrinsic( mut intr: JsonIntrinsic, -) -> Result, Box> { +) -> Result, Box> { let name = intr.name.replace(['[', ']'], ""); - let result_ty = ArmIntrinsicType(parse_intrinsic_type(&intr.return_type.value)?); + let result_ty = ArmType(parse_intrinsic_type(&intr.return_type.value)?); let args = intr .arguments @@ -120,12 +120,8 @@ fn json_to_intrinsic( } }); - let mut arg = Argument::::new( - i, - String::from(arg_name), - ArmIntrinsicType(arg_ty), - constraint, - ); + let mut arg = + Argument::::new(i, String::from(arg_name), ArmType(arg_ty), constraint); // The JSON doesn't list immediates as const let IntrinsicType { @@ -138,7 +134,7 @@ fn json_to_intrinsic( }) .collect(); - let arguments = ArgumentList:: { args }; + let arguments = ArgumentList:: { args }; Ok(Intrinsic { name, diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 378f23ba7c..cce33a3921 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -3,22 +3,20 @@ mod intrinsic; mod json_parser; mod types; -use crate::common::SupportedArchitectureTest; +use crate::common::SupportedArchitecture; use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; -use intrinsic::ArmIntrinsicType; +use intrinsic::ArmType; use json_parser::get_neon_intrinsics; -pub struct ArmArchitectureTest { - intrinsics: Vec>, -} +pub struct Arm(Vec>); -impl SupportedArchitectureTest for ArmArchitectureTest { - type IntrinsicImpl = ArmIntrinsicType; +impl SupportedArchitecture for Arm { + type Type = ArmType; - fn intrinsics(&self) -> &[Intrinsic] { - &self.intrinsics + fn intrinsics(&self) -> &[Intrinsic] { + &self.0 } const NOTICE: &str = config::NOTICE; @@ -66,6 +64,6 @@ impl SupportedArchitectureTest for ArmArchitectureTest { .take(sample_size) .collect::>(); - Self { intrinsics } + Self(intrinsics) } } diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index cd420f1067..44af107eb9 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -1,9 +1,7 @@ -use super::intrinsic::ArmIntrinsicType; -use crate::common::intrinsic_helpers::{ - IntrinsicType, IntrinsicTypeDefinition, Sign, SimdLen, TypeKind, -}; +use super::intrinsic::ArmType; +use crate::common::intrinsic_helpers::{IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind}; -impl IntrinsicTypeDefinition for ArmIntrinsicType { +impl TypeDefinition for ArmType { /// Gets a string containing the typename for this type in C format. fn c_type(&self) -> String { let prefix = self.kind.c_prefix(); diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index eaec5b71c4..ea58d47615 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -5,11 +5,11 @@ use crate::common::values::test_values_array_name; use super::PASSES; use super::constraint::Constraint; -use super::intrinsic_helpers::IntrinsicTypeDefinition; +use super::intrinsic_helpers::TypeDefinition; /// An argument for the intrinsic. #[derive(Debug, PartialEq, Clone)] -pub struct Argument { +pub struct Argument { /// The argument's index in the intrinsic function call. pub pos: usize, /// The argument name. @@ -22,7 +22,7 @@ pub struct Argument { impl Argument where - T: IntrinsicTypeDefinition, + T: TypeDefinition, { pub fn new(pos: usize, name: String, ty: T, constraint: Option) -> Self { Argument { @@ -63,13 +63,13 @@ where /// Arguments of an intrinsic - including parameters that end up being const generics. #[derive(Debug, PartialEq, Clone)] -pub struct ArgumentList { +pub struct ArgumentList { pub args: Vec>, } impl ArgumentList where - T: IntrinsicTypeDefinition, + T: TypeDefinition, { /// Returns a string with the arguments in `self` as a parameter list for a wrapper fn /// definition in C (e.g. `$ty1 $arg1, $ty2 $arg2`). diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index 6cd7c6f03e..b88a5af9eb 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -2,7 +2,7 @@ use itertools::Itertools; use crate::common::intrinsic::Intrinsic; -use super::intrinsic_helpers::IntrinsicTypeDefinition; +use super::intrinsic_helpers::TypeDefinition; /// Generates a C source file containing wrapper functions around each specialisation of each /// intrinsic (that is, intrinsics with specific values for the the immediate arguments). Each @@ -14,7 +14,7 @@ use super::intrinsic_helpers::IntrinsicTypeDefinition; /// *__dst = __crc32cd(a, b); /// } /// ``` -pub fn write_wrapper_c( +pub fn write_wrapper_c( w: &mut impl std::io::Write, notice: &str, platform_headers: &[&str], diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index f2ab2b69ed..d85b8425c2 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -2,7 +2,7 @@ use std::process::Command; use itertools::Itertools; -use super::intrinsic_helpers::IntrinsicTypeDefinition; +use super::intrinsic_helpers::TypeDefinition; use crate::common::PASSES; use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; @@ -98,7 +98,7 @@ cc = "1" /// Writes a Rust source file into `w` with common definitions, static arrays with test values, /// declarations of C wrapper functions for FFI and Rust test functions. -pub fn write_lib_rs( +pub fn write_lib_rs( w: &mut impl std::io::Write, notice: &str, cfg: &str, @@ -156,7 +156,7 @@ pub fn write_lib_rs( /// (first loop) `PASSES` number of times (second loop). For a given iteration of a given /// specialisation, test values are loaded for each argument and passed to the Rust intrinsic /// and the C wrapper function, and the results are compared. -fn generate_rust_test_loop( +fn generate_rust_test_loop( w: &mut impl std::io::Write, intrinsic: &Intrinsic, ) -> std::io::Result<()> { @@ -243,7 +243,7 @@ for (id, rust, c) in specializations {{ /// Writes a test function for an given intrinsic to `w`, with a body generated by /// `generate_rust_test_loop`. -fn create_rust_test( +fn create_rust_test( w: &mut impl std::io::Write, intrinsic: &Intrinsic, ) -> std::io::Result<()> { @@ -267,7 +267,7 @@ fn test_{intrinsic_name}() {{ /// Writes an `extern "C"` block with function declarations for each of the C wrapper functions into /// `w`. -pub fn write_bindings_rust( +pub fn write_bindings_rust( w: &mut impl std::io::Write, i: usize, intrinsics: &[Intrinsic], diff --git a/crates/intrinsic-test/src/common/intrinsic.rs b/crates/intrinsic-test/src/common/intrinsic.rs index f7eb462068..034e2fbdec 100644 --- a/crates/intrinsic-test/src/common/intrinsic.rs +++ b/crates/intrinsic-test/src/common/intrinsic.rs @@ -1,11 +1,11 @@ use itertools::Itertools; use super::argument::ArgumentList; -use super::intrinsic_helpers::IntrinsicTypeDefinition; +use super::intrinsic_helpers::TypeDefinition; /// An intrinsic #[derive(Debug, PartialEq, Clone)] -pub struct Intrinsic { +pub struct Intrinsic { /// The function name of this intrinsic. pub name: String, @@ -19,7 +19,7 @@ pub struct Intrinsic { pub arch_tags: Vec, } -impl Intrinsic { +impl Intrinsic { /// Invokes `f` for "specialisation" of the intrinsic - a specific instantiation of the /// constant generics of the intrinsic. `f` takes a slice where the `i`th element corresponds /// to the value of the `i`th const generic argument of the intrinsic. diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs index ca5aeba86d..8e5d55ff3b 100644 --- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -195,7 +195,7 @@ impl IntrinsicType { } } -pub trait IntrinsicTypeDefinition: Deref { +pub trait TypeDefinition: Deref { /// Determines the load function for this type. fn get_load_function(&self) -> String; diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index b577491454..78720d6bc5 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -10,7 +10,7 @@ use crate::common::{ run_rustfmt, write_bin_cargo_toml, write_build_rs, write_lib_cargo_toml, write_lib_rs, }, intrinsic::Intrinsic, - intrinsic_helpers::IntrinsicTypeDefinition, + intrinsic_helpers::TypeDefinition, }; pub mod argument; @@ -29,10 +29,10 @@ pub(crate) const PASSES: u32 = 20; /// Architectures must support this trait /// to be successfully tested. -pub trait SupportedArchitectureTest { - type IntrinsicImpl: IntrinsicTypeDefinition + Sync; +pub trait SupportedArchitecture { + type Type: TypeDefinition + Sync; - fn intrinsics(&self) -> &[Intrinsic]; + fn intrinsics(&self) -> &[Intrinsic]; fn create(cli_options: &ProcessedCli) -> Self; diff --git a/crates/intrinsic-test/src/common/values.rs b/crates/intrinsic-test/src/common/values.rs index 4c3dd078e0..a3c0e09797 100644 --- a/crates/intrinsic-test/src/common/values.rs +++ b/crates/intrinsic-test/src/common/values.rs @@ -2,7 +2,7 @@ use itertools::Itertools as _; use crate::common::{ PASSES, - intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, SimdLen, TypeKind}, + intrinsic_helpers::{IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind}, }; /// Maximum size of a SVE vector @@ -18,7 +18,7 @@ pub const MAX_SVE_BITS: u32 = 2048; /// 0x80, 0x3b, 0xff, /// ]; /// ``` -pub fn test_values_array_static( +pub fn test_values_array_static( w: &mut impl std::io::Write, ty: &T, ) -> std::io::Result<()> { @@ -34,7 +34,7 @@ pub fn test_values_array_static( /// Returns a string with the name of the static variable containing test values for intrinsic /// arguments of this type. -pub fn test_values_array_name(ty: &T) -> String { +pub fn test_values_array_name(ty: &T) -> String { format!( "{ty}_{load_size}", ty = ty.rust_scalar_type().to_uppercase(), diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs index 4c0136041f..e25eb48a45 100644 --- a/crates/intrinsic-test/src/main.rs +++ b/crates/intrinsic-test/src/main.rs @@ -5,10 +5,10 @@ mod arm; mod common; mod x86; -use arm::ArmArchitectureTest; -use common::SupportedArchitectureTest; +use arm::Arm; +use common::SupportedArchitecture; use common::cli::{Cli, ProcessedCli}; -use x86::X86ArchitectureTest; +use x86::X86; fn main() { pretty_env_logger::init(); @@ -18,21 +18,15 @@ fn main() { if processed_cli_options.target.starts_with("arm") | processed_cli_options.target.starts_with("aarch64") { - run( - ArmArchitectureTest::create(&processed_cli_options), - processed_cli_options, - ) + run(Arm::create(&processed_cli_options), processed_cli_options) } else if processed_cli_options.target.starts_with("x86") { - run( - X86ArchitectureTest::create(&processed_cli_options), - processed_cli_options, - ) + run(X86::create(&processed_cli_options), processed_cli_options) } else { unimplemented!("Unsupported target {}", processed_cli_options.target) } } -fn run(test_environment: impl SupportedArchitectureTest, processed_cli_options: ProcessedCli) { +fn run(test_environment: impl SupportedArchitecture, processed_cli_options: ProcessedCli) { info!("building C binaries"); test_environment.generate_c_file(); diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index 288bd8bdf8..ae6d460809 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -4,19 +4,19 @@ mod intrinsic; mod types; mod xml_parser; -use crate::common::SupportedArchitectureTest; +use crate::common::SupportedArchitecture; use crate::common::cli::ProcessedCli; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use intrinsic::X86IntrinsicType; use xml_parser::get_xml_intrinsics; -pub struct X86ArchitectureTest { +pub struct X86 { intrinsics: Vec>, } -impl SupportedArchitectureTest for X86ArchitectureTest { - type IntrinsicImpl = X86IntrinsicType; +impl SupportedArchitecture for X86 { + type Type = X86IntrinsicType; fn intrinsics(&self) -> &[Intrinsic] { &self.intrinsics diff --git a/crates/intrinsic-test/src/x86/types.rs b/crates/intrinsic-test/src/x86/types.rs index a0e14c77d6..b7e063e228 100644 --- a/crates/intrinsic-test/src/x86/types.rs +++ b/crates/intrinsic-test/src/x86/types.rs @@ -3,12 +3,10 @@ use std::str::FromStr; use itertools::Itertools; use super::intrinsic::X86IntrinsicType; -use crate::common::intrinsic_helpers::{ - IntrinsicType, IntrinsicTypeDefinition, Sign, SimdLen, TypeKind, -}; +use crate::common::intrinsic_helpers::{IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind}; use crate::x86::xml_parser::Parameter; -impl IntrinsicTypeDefinition for X86IntrinsicType { +impl TypeDefinition for X86IntrinsicType { /// Gets a string containing the type in C format. /// This function assumes that this value is present in the metadata hashmap. fn c_type(&self) -> String { From 90f4c152a75b9ecfcfef7342a348d59e64129a92 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 04/32] intrinsic-test: impl `get_load_function` for SVE Updates `get_load_function` to return `svld{n}_{ty}` when loading a scalable vector type. Caller of `get_load_function` will still need updated to handle passing the predicate arguments to these load functions. --- crates/intrinsic-test/src/arm/types.rs | 55 +++++++++++-------- crates/intrinsic-test/src/common/gen_rust.rs | 2 +- .../src/common/intrinsic_helpers.rs | 19 ++++--- 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index 44af107eb9..43fdaf40fd 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -48,34 +48,45 @@ impl TypeDefinition for ArmType { /// Determines the load function for this type. fn get_load_function(&self) -> String { - if let IntrinsicType { - kind: k, - bit_len: Some(bl), - vec_len, - .. - } = **self - { - let quad = if self.num_lanes() * bl > 64 { "q" } else { "" }; - - format!( - "vld{len}{quad}_{type}{size}", - type = match k { - TypeKind::Int(Sign::Unsigned) => "u", - TypeKind::Int(Sign::Signed) => "s", - TypeKind::Float => "f", - TypeKind::Poly => "p", - x => todo!("get_load_function TypeKind: {x:#?}"), - }, - size = bl, - quad = quad, - len = vec_len.unwrap_or(1), - ) + if let Some(bl) = self.bit_len { + match self.num_lanes() { + SimdLen::Scalable => { + format!( + "svld{len}_{type}{bl}", + len = self.num_vectors(), + type = self.rust_intrinsic_name_prefix(), + ) + } + SimdLen::Fixed(num_lanes) => { + format!( + "vld{len}{quad}_{type}{bl}", + quad = if num_lanes * bl > 64 { "q" } else { "" }, + len = self.num_vectors(), + type = self.rust_intrinsic_name_prefix(), + ) + } + } } else { todo!("get_load_function IntrinsicType: {self:#?}") } } } +impl ArmType { + /// Returns the Rust prefix for the name of an intrinsic with this type kind (i.e. `s` for + /// `i16`, or `u` for `u16`). For type kinds without any bit length at the end (e.g. `bool`), + /// returns the whole type name. + pub fn rust_intrinsic_name_prefix(&self) -> &str { + match self.kind() { + TypeKind::Char(Sign::Signed) => "s", + TypeKind::Int(Sign::Signed) => "s", + TypeKind::Poly => "p", + TypeKind::Bool => "s", + _ => self.kind.rust_prefix(), + } + } +} + pub fn parse_intrinsic_type(s: &str) -> Result { const CONST_STR: &str = "const"; const ENUM_STR: &str = "enum "; diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index d85b8425c2..ea3664a059 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -177,7 +177,7 @@ fn generate_rust_test_loop( format!( "std::mem::transmute::<_, [{}; {}]>(", intrinsic.results.rust_scalar_type().replace("f", "NanEqF"), - intrinsic.results.num_lanes() * intrinsic.results.num_vectors() + intrinsic.results.num_lanes().expect_fixed() * intrinsic.results.num_vectors() ), ")", ) diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs index 8e5d55ff3b..231c63fc9e 100644 --- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -112,6 +112,15 @@ pub enum SimdLen { Fixed(u32), } +impl SimdLen { + pub fn expect_fixed(&self) -> u32 { + match self { + SimdLen::Fixed(lanes) => *lanes, + SimdLen::Scalable => panic!("`expect_fixed` with scalable length"), + } + } +} + impl std::fmt::Display for SimdLen { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -169,14 +178,8 @@ impl IntrinsicType { } /// Returns the number of lanes of the type - pub fn num_lanes(&self) -> u32 { - self.simd_len - .as_ref() - .map(|len| match len { - SimdLen::Scalable => unimplemented!(), - SimdLen::Fixed(len) => *len, - }) - .unwrap_or(1) + pub fn num_lanes(&self) -> SimdLen { + self.simd_len.unwrap_or(SimdLen::Fixed(1)) } /// Returns the number of vectors of the type From 816205578fca178c9fe29232ecacec4bf30842ab Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 05/32] intrinsic-test: skip unimplemented SVE intrinsics Various SVE intrinsics are not yet implemented in stdarch, but are present in the `arm_intrinsics.json` and so should be skipped. --- crates/intrinsic-test/src/arm/mod.rs | 38 +++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index cce33a3921..5e8f966103 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -6,7 +6,7 @@ mod types; use crate::common::SupportedArchitecture; use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; -use crate::common::intrinsic_helpers::TypeKind; +use crate::common::intrinsic_helpers::{SimdLen, TypeKind}; use intrinsic::ArmType; use json_parser::get_neon_intrinsics; @@ -52,6 +52,42 @@ impl SupportedArchitecture for Arm { // Skip bfloat intrinsics - not currently supported .filter(|i| i.results.kind() != TypeKind::BFloat) .filter(|i| !i.arguments.iter().any(|a| a.ty.kind() == TypeKind::BFloat)) + // Skip SVE intrinsics that have `f16` - not yet implemented! + .filter(|i| { + let has_f16_arg = i + .arguments + .iter() + .any(|a| a.ty.kind() == TypeKind::Float && a.ty.bit_len == Some(16)); + let has_sve_arg = i + .arguments + .iter() + .any(|a| a.ty.num_lanes() == SimdLen::Scalable); + !(has_f16_arg && has_sve_arg) + }) + .filter(|i| { + let has_f16_ret = + i.results.kind() == TypeKind::Float && i.results.bit_len == Some(16); + let has_sve_ret = i.results.num_lanes() == SimdLen::Scalable; + !(has_f16_ret && has_sve_ret) + }) + // Skip `svqcvtn{u,}n*_x2` intrinsics - not yet implemented! + .filter(|i| !(i.name.starts_with("svqcvtn") && i.name.ends_with("_x2"))) + // Skip `svqrshr{u,}n*_x2` intrinsics - not yet implemented! + .filter(|i| !(i.name.starts_with("svqrshrn") && i.name.ends_with("_x2"))) + .filter(|i| !(i.name.starts_with("svqrshrun") && i.name.ends_with("_x2"))) + // Skip `svclamp*` intrinsics - not yet implemented! + .filter(|i| !i.name.starts_with("svclamp")) + // Skip `svdot{_lane,}_{s,u}32_{s,u}16` intrinsics - not yet implemented! + .filter(|i| { + i.name != "svdot_lane_u32_u16" + && i.name != "svdot_lane_s32_s16" + && i.name != "svdot_u32_u16" + && i.name != "svdot_s32_s16" + }) + // Skip `svrevd*` intrinsics - not yet implemented! + .filter(|i| !i.name.starts_with("svrevd")) + // Skip `svpsel_lane_b*` intrinsics - not yet implemented! + .filter(|i| !i.name.starts_with("svpsel_lane_b")) // Skip pointers for now, we would probably need to look at the return // type to work out how many elements we need to point to. .filter(|i| !i.arguments.iter().any(|a| a.is_ptr())) From 6ac8095657db29cd4bc789d9b4be66fc8651e893 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 06/32] intrinsic-test: use `arm_sve.h` and target feats Updates the headers used by generated C code and the target feature flags passed to the C compiler to enable SVE. --- crates/intrinsic-test/src/arm/config.rs | 1 + crates/intrinsic-test/src/arm/mod.rs | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/intrinsic-test/src/arm/config.rs b/crates/intrinsic-test/src/arm/config.rs index 7c26143622..15bd238d7a 100644 --- a/crates/intrinsic-test/src/arm/config.rs +++ b/crates/intrinsic-test/src/arm/config.rs @@ -16,6 +16,7 @@ pub const PLATFORM_RUST_CFGS: &str = r#" #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))] #![feature(stdarch_neon_f16)] +#![feature(stdarch_aarch64_sve)] #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] use core_arch::arch::aarch64::*; diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 5e8f966103..2a0ba05072 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -21,15 +21,25 @@ impl SupportedArchitecture for Arm { const NOTICE: &str = config::NOTICE; - const PLATFORM_C_HEADERS: &[&str] = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"]; + const PLATFORM_C_HEADERS: &[&str] = &["arm_sve.h", "arm_neon.h", "arm_acle.h", "arm_fp16.h"]; const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; fn arch_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> { // GCC uses an extra `-` in the arch name + let big_endian = cli_options.target.starts_with("aarch64_be"); + let a32 = cli_options.target.starts_with("armv7"); match cli_options.cc_arg_style { + CcArgStyle::Clang if !a32 && !big_endian => vec![ + "-march=armv8.6a+crypto+crc+dotprod+fp16+sve2-aes+sve2-sm4+sve2-sha3+sve2-bitperm+\ + f32mm+f64mm+sve2p1", + ], CcArgStyle::Clang => vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"], + CcArgStyle::Gcc if !a32 && !big_endian => vec![ + "-march=armv8.6-a+crypto+crc+dotprod+fp16+sha3+sm4+sve2-aes+sve2-sm4+sve2-sha3+\ + sve2-bitperm+f32mm+f64mm+sve2p1", + ], CcArgStyle::Gcc => vec!["-march=armv8.6-a+crypto+crc+dotprod+fp16+sha3+sm4"], } } From 067a7aad6f4f4c0a3d6346802d0b54079788a762 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 07/32] intrinsic-test: `bool` test values Some SVE intrinsics take booleans as arguments, so there is a need to support generating a test value array for booleans. --- crates/intrinsic-test/src/common/values.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/crates/intrinsic-test/src/common/values.rs b/crates/intrinsic-test/src/common/values.rs index a3c0e09797..ad3cb58fb9 100644 --- a/crates/intrinsic-test/src/common/values.rs +++ b/crates/intrinsic-test/src/common/values.rs @@ -50,7 +50,15 @@ pub fn test_values_array_name(ty: &T) -> String { /// which is then printed as a hex value in the generated code (and if identified as a negative /// value, with the appropriate minus and corrected hex pattern). Calls to `fN::from_bits` are /// generated for floats. +/// +/// An exception to the above is when `ty` is a boolean, where this function returns +/// `[true, false]` - as there are only ever two values for a boolean. This only works because the +/// generated accesses to the test value array is always modulo the length of the test value array. pub fn test_values_array(ty: &IntrinsicType) -> String { + if ty.kind() == TypeKind::Bool { + return "[true, false]".to_string(); + } + let (bit_len, kind) = match ty { IntrinsicType { kind: TypeKind::Float, @@ -105,7 +113,15 @@ pub fn test_values_array(ty: &IntrinsicType) -> String { /// /// For scalable vectors (only SVE is currently supported), assume that the length of the vector is /// the maximum supported by the architecture. +/// +/// An exception to the above is when `ty` is a boolean, where this function returns two - as +/// there are only ever two values for a boolean. This only works because the generated accesses to +/// the test value array is always modulo this length. pub fn test_values_array_length(ty: &IntrinsicType) -> u32 { + if ty.kind() == TypeKind::Bool { + return 2; + } + let IntrinsicType { simd_len, vec_len, .. } = ty; From 4f98769246bc067f28115b0d550e30b1730eaf81 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 08/32] intrinsic-test: values for enum-typed constraints Constraints that correspond to enum types - such as `svpattern` and `svprfop` - need to be converted to the enum type in order to be used in a generic instantiation - so introduce a const function for both types that provides this mapping. --- crates/intrinsic-test/src/arm/config.rs | 41 ++++++++++++++++++++ crates/intrinsic-test/src/common/gen_rust.rs | 14 ++++++- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/crates/intrinsic-test/src/arm/config.rs b/crates/intrinsic-test/src/arm/config.rs index 15bd238d7a..da36a5f915 100644 --- a/crates/intrinsic-test/src/arm/config.rs +++ b/crates/intrinsic-test/src/arm/config.rs @@ -23,4 +23,45 @@ use core_arch::arch::aarch64::*; #[cfg(target_arch = "arm")] use core_arch::arch::arm::*; + +const fn svpattern_from_i32(value: i32) -> svpattern { + match value { + 0 => svpattern::SV_POW2, + 1 => svpattern::SV_VL1, + 2 => svpattern::SV_VL2, + 3 => svpattern::SV_VL3, + 4 => svpattern::SV_VL4, + 5 => svpattern::SV_VL5, + 6 => svpattern::SV_VL6, + 7 => svpattern::SV_VL7, + 8 => svpattern::SV_VL8, + 9 => svpattern::SV_VL16, + 10 => svpattern::SV_VL32, + 11 => svpattern::SV_VL64, + 12 => svpattern::SV_VL128, + 13 => svpattern::SV_VL256, + 29 => svpattern::SV_MUL4, + 30 => svpattern::SV_MUL3, + 31 => svpattern::SV_ALL, + _ => unreachable!(), + } +} + +const fn svprfop_from_i32(value: i32) -> svprfop { + match value { + 0 => svprfop::SV_PLDL1KEEP, + 1 => svprfop::SV_PLDL1STRM, + 2 => svprfop::SV_PLDL2KEEP, + 3 => svprfop::SV_PLDL2STRM, + 4 => svprfop::SV_PLDL3KEEP, + 5 => svprfop::SV_PLDL3STRM, + 8 => svprfop::SV_PSTL1KEEP, + 9 => svprfop::SV_PSTL1STRM, + 10 => svprfop::SV_PSTL2KEEP, + 11 => svprfop::SV_PSTL2STRM, + 12 => svprfop::SV_PSTL3KEEP, + 13 => svprfop::SV_PSTL3STRM, + _ => unreachable!(), + } +} "#; diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index ea3664a059..782440dca6 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -222,6 +222,7 @@ for (id, rust, c) in specializations {{ "(\"\", {intrinsic_name}, {intrinsic_name}_wrapper)" )) } else { + let constraint_args = intrinsic.arguments.iter().filter(|a| a.has_constraint()); fmt(&format_args!( r#" ( @@ -230,7 +231,18 @@ for (id, rust, c) in specializations {{ {intrinsic_name}_wrapper_{c_const_args} as unsafe extern "C" {c_coerce} ) "#, - const_args = imm_values.iter().join(","), + const_args = imm_values + .iter() + .zip(constraint_args) + .map(|(imm_val, arg)| { + match arg.ty.kind() { + TypeKind::SvPattern | TypeKind::SvPrefetchOp => { + format!("{{ {}_from_i32({imm_val}) }}", arg.ty.kind()) + } + _ => imm_val.to_string(), + } + }) + .join(","), c_const_args = imm_values.iter().join("_"), )) } From 9f84831d7cb6d4bf3e3187ee7667d71d527794cf Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 09/32] intrinsic-test: simplify type printing A small refactoring to make the type printing logic slightly cleaner and with greater code re-use. --- crates/intrinsic-test/src/arm/types.rs | 80 ++++++++++++------- .../src/common/intrinsic_helpers.rs | 25 +++--- 2 files changed, 64 insertions(+), 41 deletions(-) diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index 43fdaf40fd..25dc70fde7 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -6,21 +6,32 @@ impl TypeDefinition for ArmType { fn c_type(&self) -> String { let prefix = self.kind.c_prefix(); - if let Some(bit_len) = self.bit_len { - match (self.simd_len, self.vec_len) { - (None, None) => format!("{prefix}{bit_len}_t"), - (Some(SimdLen::Fixed(simd)), None) => format!("{prefix}{bit_len}x{simd}_t"), - (Some(SimdLen::Fixed(simd)), Some(vec)) => { - format!("{prefix}{bit_len}x{simd}x{vec}_t") - } - (Some(SimdLen::Scalable), None) => format!("sv{prefix}{bit_len}_t"), - (Some(SimdLen::Scalable), Some(vec)) => { - format!("sv{prefix}{bit_len}x{vec}_t") - } - (None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case + match (self.bit_len, self.simd_len, self.vec_len) { + // e.g. `bool` + (Some(_), None, None) if matches!(self.kind, TypeKind::Bool) => { + format!("{prefix}") } - } else { - todo!("{self:#?}") + // e.g. `float32_t`, `int64_t` + (Some(bit_len), None, None) => format!("{prefix}{bit_len}_t"), + // e.g. `float32x2_t`, `int64x2_t` + (Some(bit_len), Some(SimdLen::Fixed(simd)), None) => { + format!("{prefix}{bit_len}x{simd}_t") + } + // e.g. `float32x2x3_t`, `int64x2x3_t` + (Some(bit_len), Some(SimdLen::Fixed(simd)), Some(vec)) => { + format!("{prefix}{bit_len}x{simd}x{vec}_t") + } + // e.g. `svbool_t` + (Some(_), Some(SimdLen::Scalable), None) if matches!(self.kind, TypeKind::Bool) => { + format!("sv{prefix}_t") + } + // e.g. `svfloat32_t`, `svint64_t` + (Some(bit_len), Some(SimdLen::Scalable), None) => format!("sv{prefix}{bit_len}_t"), + // e.g. `svfloat32x3_t`, `svint64x3_t` + (Some(bit_len), Some(SimdLen::Scalable), Some(vec)) => { + format!("sv{prefix}{bit_len}x{vec}_t") + } + _ => todo!("{self:#?}"), } } @@ -28,21 +39,34 @@ impl TypeDefinition for ArmType { let rust_prefix = self.kind.rust_prefix(); let c_prefix = self.kind.c_prefix(); - if let Some(bit_len) = self.bit_len { - match (self.simd_len, self.vec_len) { - (None, None) => format!("{rust_prefix}{bit_len}"), - (Some(SimdLen::Fixed(simd)), None) => format!("{c_prefix}{bit_len}x{simd}_t"), - (Some(SimdLen::Fixed(simd)), Some(vec)) => { - format!("{c_prefix}{bit_len}x{simd}x{vec}_t") - } - (Some(SimdLen::Scalable), None) => format!("sv{c_prefix}{bit_len}_t"), - (Some(SimdLen::Scalable), Some(vec)) => { - format!("sv{c_prefix}{bit_len}x{vec}_t") - } - (None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case + match (self.bit_len, self.simd_len, self.vec_len) { + // e.g. `svpattern` + (None, _, _) => format!("{rust_prefix}"), + // e.g. `bool` + (Some(_), None, None) if matches!(self.kind, TypeKind::Bool) => { + format!("{rust_prefix}") } - } else { - todo!("{self:#?}") + // e.g. `i32` + (Some(bit_len), None, None) => format!("{rust_prefix}{bit_len}"), + // e.g. `int32x2_t` + (Some(bit_len), Some(SimdLen::Fixed(simd)), None) => { + format!("{c_prefix}{bit_len}x{simd}_t") + } + // e.g. `int32x2x3_t` + (Some(bit_len), Some(SimdLen::Fixed(simd)), Some(vec)) => { + format!("{c_prefix}{bit_len}x{simd}x{vec}_t") + } + // e.g. `svbool_t` + (Some(_), Some(SimdLen::Scalable), None) if matches!(self.kind, TypeKind::Bool) => { + format!("sv{c_prefix}_t") + } + // e.g. `svint32_t` + (Some(bit_len), Some(SimdLen::Scalable), None) => format!("sv{c_prefix}{bit_len}_t"), + // e.g. `svint32x3_t` + (Some(bit_len), Some(SimdLen::Scalable), Some(vec)) => { + format!("sv{c_prefix}{bit_len}x{vec}_t") + } + (Some(_), None, Some(_)) => todo!("{self:#?}"), } } diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs index 231c63fc9e..e2e2bbfb23 100644 --- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -1,6 +1,6 @@ use std::cmp; use std::fmt; -use std::ops::Deref; +use std::ops::DerefMut; use std::str::FromStr; #[derive(Debug, PartialEq, Copy, Clone)] @@ -90,9 +90,13 @@ impl TypeKind { } } - /// Returns the Rust prefix for this type kind i.e. `i`, `u`, or `f`. + /// Returns the Rust prefix for this type kind (i.e. `i` for `i16`, or `u` for `u16`). For type + /// kinds without any bit length at the end (e.g. `bool`), returns the whole type name. pub fn rust_prefix(&self) -> &str { match self { + Self::Bool => "bool", + Self::SvPattern => "svpattern", + Self::SvPrefetchOp => "svprfop", Self::BFloat => "bf", Self::Float => "f", Self::Int(Sign::Signed) => "i", @@ -101,7 +105,7 @@ impl TypeKind { Self::Char(Sign::Unsigned) => "u", Self::Char(Sign::Signed) => "i", Self::Mask => "u", - _ => unreachable!("Unused type kind: {self:#?}"), + _ => unreachable!("type kind without Rust prefix: {self:#?}"), } } } @@ -198,7 +202,7 @@ impl IntrinsicType { } } -pub trait TypeDefinition: Deref { +pub trait TypeDefinition: Clone + DerefMut { /// Determines the load function for this type. fn get_load_function(&self) -> String; @@ -211,14 +215,9 @@ pub trait TypeDefinition: Deref { /// Gets a string containing the name of the scalar type corresponding to this type if it is a /// vector. fn rust_scalar_type(&self) -> String { - if self.is_simd() { - format!( - "{prefix}{bits}", - prefix = self.kind().rust_prefix(), - bits = self.inner_size() - ) - } else { - self.rust_type() - } + let mut ty = self.clone(); + ty.simd_len = None; + ty.vec_len = None; + ty.rust_type() } } From 793a0836e8beda73c3e0084d95cb3932acccaf61 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 10/32] intrinsic-test: no test values for `svbool_t` Predicate arguments of type `svbool_t` do not need test value arrays to be generated as the same enable-all-lanes predicate will be passed to all invocations of the intrinsic under test. There is no `svld1` equivalent for `svbool_t` that could be used even if there were test values to use. --- crates/intrinsic-test/src/common/argument.rs | 10 +++++++++- crates/intrinsic-test/src/common/gen_rust.rs | 7 ++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index ea58d47615..344376da61 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -1,6 +1,6 @@ use itertools::Itertools; -use crate::common::intrinsic_helpers::TypeKind; +use crate::common::intrinsic_helpers::{SimdLen, TypeKind}; use crate::common::values::test_values_array_name; use super::PASSES; @@ -53,6 +53,11 @@ where self.constraint.is_some() } + /// Is this argument of type `svbool_t` (or otherwise a scalable bool)? + pub fn is_scalable_bool(&self) -> bool { + self.ty.kind == TypeKind::Bool && self.ty.num_lanes() == SimdLen::Scalable + } + /// Should this argument be passed by reference in C wrapper function declarations? /// /// SIMD types and `f16` are currently passed by reference. @@ -175,6 +180,9 @@ where pub fn load_values_rust(&self) -> String { self.iter() .filter(|&arg| !arg.has_constraint()) + // FIXME(davidtwco): Need test values for `svbool_t` when the argument is *not* a + // predicate. + .filter(|&arg| !arg.is_scalable_bool()) .enumerate() .map(|(idx, arg)| { if arg.is_simd() { diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 782440dca6..db8de03e5f 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -130,7 +130,12 @@ pub fn write_lib_rs( for intrinsic in intrinsics { for arg in &intrinsic.arguments.args { - if !arg.has_constraint() { + // Skip arguments with constraints as these correspond to generic instantiatons, and + // arguments of scalable bool types as the same predicate is used for all intrinsics + // under test. + // FIXME(davidtwco): Need test values for `svbool_t` when the argument is *not* a + // predicate. + if !arg.has_constraint() && !arg.is_scalable_bool() { let name = test_values_array_name(&arg.ty); if seen.insert(name) { From 38e5756843e8008f86d123d29ce4b68d95ca1baf Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 11/32] intrinsic-test: introduce `get_comparison_function` Introduces a per-architecture abstraction over how intrinsic results are compared, so that later commits can implement Arm-specific comparison logic for SVE. --- crates/intrinsic-test/src/common/gen_rust.rs | 26 +-------- .../src/common/intrinsic_helpers.rs | 57 ++++++++++++++++--- 2 files changed, 50 insertions(+), 33 deletions(-) diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index db8de03e5f..444637b48f 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -177,29 +177,6 @@ fn generate_rust_test_loop( coerce += ") -> _"; c_coerce += ")"; - let (cast_prefix, cast_suffix) = if intrinsic.results.is_simd() { - ( - format!( - "std::mem::transmute::<_, [{}; {}]>(", - intrinsic.results.rust_scalar_type().replace("f", "NanEqF"), - intrinsic.results.num_lanes().expect_fixed() * intrinsic.results.num_vectors() - ), - ")", - ) - } else if intrinsic.results.kind == TypeKind::Float { - ( - match intrinsic.results.inner_size() { - 16 => format!("NanEqF16("), - 32 => format!("NanEqF32("), - 64 => format!("NanEqF64("), - _ => unimplemented!(), - }, - ")", - ) - } else { - ("".to_string(), "") - }; - write!( w, r#" @@ -214,7 +191,7 @@ for (id, rust, c) in specializations {{ c(__c_return_value.as_mut_ptr(){c_args}); let __c_return_value = __c_return_value.assume_init(); - assert_eq!({cast_prefix}__rust_return_value{cast_suffix}, {cast_prefix}__c_return_value{cast_suffix}, "{{id}}"); + {comparison} }} }} }} @@ -255,6 +232,7 @@ for (id, rust, c) in specializations {{ loaded_args = intrinsic.arguments.load_values_rust(), rust_args = intrinsic.arguments.as_call_param_rust(), c_args = intrinsic.arguments.as_c_call_param_rust(), + comparison = intrinsic.results.get_comparison_function(), ) } diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs index e2e2bbfb23..895d66914f 100644 --- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -116,15 +116,6 @@ pub enum SimdLen { Fixed(u32), } -impl SimdLen { - pub fn expect_fixed(&self) -> u32 { - match self { - SimdLen::Fixed(lanes) => *lanes, - SimdLen::Scalable => panic!("`expect_fixed` with scalable length"), - } - } -} - impl std::fmt::Display for SimdLen { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -206,6 +197,14 @@ pub trait TypeDefinition: Clone + DerefMut { /// Determines the load function for this type. fn get_load_function(&self) -> String; + /// Determines the comparison function for this type. + fn get_comparison_function(&self) -> String { + match self.num_lanes() { + SimdLen::Scalable => unimplemented!("architecture-specific"), + SimdLen::Fixed(num_lanes) => default_fixed_vector_comparison(self, num_lanes), + } + } + /// Gets a string containing the typename for this type in C. fn c_type(&self) -> String; @@ -221,3 +220,43 @@ pub trait TypeDefinition: Clone + DerefMut { ty.rust_type() } } + +/// Returns the default comparison between results of an intrinsic - casting the vectors to arrays +/// and using `assert_eq` - using `NanEqF*` where required for floats. +pub(crate) fn default_fixed_vector_comparison( + ty: &Ty, + num_lanes: u32, +) -> String { + let (cast_prefix, cast_suffix) = if ty.is_simd() { + ( + format!( + "std::mem::transmute::<_, [{}; {}]>(", + ty.rust_scalar_type().replace("f", "NanEqF"), + num_lanes * ty.num_vectors() + ), + ")", + ) + } else if ty.kind == TypeKind::Float { + ( + match ty.inner_size() { + 16 => format!("NanEqF16("), + 32 => format!("NanEqF32("), + 64 => format!("NanEqF64("), + _ => unimplemented!(), + }, + ")", + ) + } else { + ("".to_string(), "") + }; + + format!( + r#" +assert_eq!( + {cast_prefix}__rust_return_value{cast_suffix}, + {cast_prefix}__c_return_value{cast_suffix}, + "{{id}}" +); +"#, + ) +} From 72abb1d024d0e0628dad697becc4bd51dc1e75aa Mon Sep 17 00:00:00 2001 From: David Wood Date: Tue, 2 Jun 2026 13:36:57 +0000 Subject: [PATCH 12/32] intrinsic-test: intrisic generic over arch not type Refactoring enabling accessing architecture-specific behaviour that isn't associated with either of the return or argument types. --- crates/intrinsic-test/src/arm/json_parser.rs | 5 ++-- crates/intrinsic-test/src/arm/mod.rs | 5 ++-- crates/intrinsic-test/src/common/gen_c.rs | 6 ++--- crates/intrinsic-test/src/common/gen_rust.rs | 26 +++++++++---------- crates/intrinsic-test/src/common/intrinsic.rs | 13 +++++----- crates/intrinsic-test/src/common/mod.rs | 13 +++------- crates/intrinsic-test/src/x86/mod.rs | 4 +-- crates/intrinsic-test/src/x86/xml_parser.rs | 9 +++---- 8 files changed, 37 insertions(+), 44 deletions(-) diff --git a/crates/intrinsic-test/src/arm/json_parser.rs b/crates/intrinsic-test/src/arm/json_parser.rs index 9ca604a884..58e25b46ea 100644 --- a/crates/intrinsic-test/src/arm/json_parser.rs +++ b/crates/intrinsic-test/src/arm/json_parser.rs @@ -1,4 +1,5 @@ use super::intrinsic::ArmType; +use crate::arm::Arm; use crate::arm::types::parse_intrinsic_type; use crate::common::argument::{Argument, ArgumentList}; use crate::common::constraint::Constraint; @@ -59,7 +60,7 @@ struct JsonIntrinsic { pub fn get_neon_intrinsics( filename: &Path, -) -> Result>, Box> { +) -> Result>, Box> { let file = std::fs::File::open(filename)?; let reader = std::io::BufReader::new(file); let json: Vec = serde_json::from_reader(reader).expect("Couldn't parse JSON"); @@ -79,7 +80,7 @@ pub fn get_neon_intrinsics( fn json_to_intrinsic( mut intr: JsonIntrinsic, -) -> Result, Box> { +) -> Result, Box> { let name = intr.name.replace(['[', ']'], ""); let result_ty = ArmType(parse_intrinsic_type(&intr.return_type.value)?); diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 2a0ba05072..871a8713b3 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -10,12 +10,13 @@ use crate::common::intrinsic_helpers::{SimdLen, TypeKind}; use intrinsic::ArmType; use json_parser::get_neon_intrinsics; -pub struct Arm(Vec>); +#[derive(PartialEq)] +pub struct Arm(Vec>); impl SupportedArchitecture for Arm { type Type = ArmType; - fn intrinsics(&self) -> &[Intrinsic] { + fn intrinsics(&self) -> &[Intrinsic] { &self.0 } diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index b88a5af9eb..2023bf9953 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -1,6 +1,6 @@ use itertools::Itertools; -use crate::common::intrinsic::Intrinsic; +use crate::common::{SupportedArchitecture, intrinsic::Intrinsic}; use super::intrinsic_helpers::TypeDefinition; @@ -14,11 +14,11 @@ use super::intrinsic_helpers::TypeDefinition; /// *__dst = __crc32cd(a, b); /// } /// ``` -pub fn write_wrapper_c( +pub fn write_wrapper_c( w: &mut impl std::io::Write, notice: &str, platform_headers: &[&str], - intrinsics: &[Intrinsic], + intrinsics: &[Intrinsic], ) -> std::io::Result<()> { write!( w, diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 444637b48f..29cb7bb4ce 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -3,11 +3,11 @@ use std::process::Command; use itertools::Itertools; use super::intrinsic_helpers::TypeDefinition; -use crate::common::PASSES; use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use crate::common::values::{test_values_array_name, test_values_array_static}; +use crate::common::{PASSES, SupportedArchitecture}; /// Rust definitions that are included verbatim in the generated source. In particular, defines /// a wrapper around float types that defines `NaN`s to be equal reflexively to enable @@ -98,13 +98,10 @@ cc = "1" /// Writes a Rust source file into `w` with common definitions, static arrays with test values, /// declarations of C wrapper functions for FFI and Rust test functions. -pub fn write_lib_rs( +pub fn write_lib_rs( w: &mut impl std::io::Write, - notice: &str, - cfg: &str, - definitions: &str, i: usize, - intrinsics: &[Intrinsic], + intrinsics: &[Intrinsic], ) -> std::io::Result<()> { writeln!( w, @@ -123,7 +120,10 @@ pub fn write_lib_rs( {COMMON_RUST_DEFINITIONS} {definitions} -"# +"#, + notice = A::NOTICE, + cfg = A::PLATFORM_RUST_CFGS, + definitions = A::PLATFORM_RUST_DEFINITIONS, )?; let mut seen = std::collections::HashSet::new(); @@ -161,9 +161,9 @@ pub fn write_lib_rs( /// (first loop) `PASSES` number of times (second loop). For a given iteration of a given /// specialisation, test values are loaded for each argument and passed to the Rust intrinsic /// and the C wrapper function, and the results are compared. -fn generate_rust_test_loop( +fn generate_rust_test_loop( w: &mut impl std::io::Write, - intrinsic: &Intrinsic, + intrinsic: &Intrinsic, ) -> std::io::Result<()> { let intrinsic_name = &intrinsic.name; @@ -238,9 +238,9 @@ for (id, rust, c) in specializations {{ /// Writes a test function for an given intrinsic to `w`, with a body generated by /// `generate_rust_test_loop`. -fn create_rust_test( +fn create_rust_test( w: &mut impl std::io::Write, - intrinsic: &Intrinsic, + intrinsic: &Intrinsic, ) -> std::io::Result<()> { trace!("generating `{}`", intrinsic.name); @@ -262,10 +262,10 @@ fn test_{intrinsic_name}() {{ /// Writes an `extern "C"` block with function declarations for each of the C wrapper functions into /// `w`. -pub fn write_bindings_rust( +pub fn write_bindings_rust( w: &mut impl std::io::Write, i: usize, - intrinsics: &[Intrinsic], + intrinsics: &[Intrinsic], ) -> std::io::Result<()> { write!( w, diff --git a/crates/intrinsic-test/src/common/intrinsic.rs b/crates/intrinsic-test/src/common/intrinsic.rs index 034e2fbdec..10557871d0 100644 --- a/crates/intrinsic-test/src/common/intrinsic.rs +++ b/crates/intrinsic-test/src/common/intrinsic.rs @@ -1,25 +1,24 @@ -use itertools::Itertools; - use super::argument::ArgumentList; -use super::intrinsic_helpers::TypeDefinition; +use crate::common::SupportedArchitecture; +use itertools::Itertools; /// An intrinsic #[derive(Debug, PartialEq, Clone)] -pub struct Intrinsic { +pub struct Intrinsic { /// The function name of this intrinsic. pub name: String, /// Any arguments for this intrinsic. - pub arguments: ArgumentList, + pub arguments: ArgumentList, /// The return type of this intrinsic. - pub results: T, + pub results: A::Type, /// Any architecture-specific tags. pub arch_tags: Vec, } -impl Intrinsic { +impl Intrinsic { /// Invokes `f` for "specialisation" of the intrinsic - a specific instantiation of the /// constant generics of the intrinsic. `f` takes a slice where the `i`th element corresponds /// to the value of the `i`th const generic argument of the intrinsic. diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 78720d6bc5..aee2da630a 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -29,10 +29,10 @@ pub(crate) const PASSES: u32 = 20; /// Architectures must support this trait /// to be successfully tested. -pub trait SupportedArchitecture { +pub trait SupportedArchitecture: Sized { type Type: TypeDefinition + Sync; - fn intrinsics(&self) -> &[Intrinsic]; + fn intrinsics(&self) -> &[Intrinsic]; fn create(cli_options: &ProcessedCli) -> Self; @@ -81,14 +81,7 @@ pub trait SupportedArchitecture { trace!("generating `{rust_filename}`"); let mut file = File::create(&rust_filename)?; - write_lib_rs( - &mut file, - Self::NOTICE, - Self::PLATFORM_RUST_CFGS, - Self::PLATFORM_RUST_DEFINITIONS, - i, - chunk, - )?; + write_lib_rs(&mut file, i, chunk)?; run_rustfmt(&rust_filename); let toml_filename = format!("rust_programs/mod_{i}/Cargo.toml"); diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index ae6d460809..ec198d9218 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -12,13 +12,13 @@ use intrinsic::X86IntrinsicType; use xml_parser::get_xml_intrinsics; pub struct X86 { - intrinsics: Vec>, + intrinsics: Vec>, } impl SupportedArchitecture for X86 { type Type = X86IntrinsicType; - fn intrinsics(&self) -> &[Intrinsic] { + fn intrinsics(&self) -> &[Intrinsic] { &self.intrinsics } diff --git a/crates/intrinsic-test/src/x86/xml_parser.rs b/crates/intrinsic-test/src/x86/xml_parser.rs index 6006d7919f..dd4870712b 100644 --- a/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/crates/intrinsic-test/src/x86/xml_parser.rs @@ -1,6 +1,7 @@ use crate::common::argument::{Argument, ArgumentList}; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; +use crate::x86::X86; use crate::x86::constraint::map_constraints; use regex::Regex; @@ -56,13 +57,13 @@ pub struct Parameter { pub fn get_xml_intrinsics( filename: &Path, -) -> Result>, Box> { +) -> Result>, Box> { let file = std::fs::File::open(filename)?; let reader = std::io::BufReader::new(file); let data: Data = quick_xml::de::from_reader(reader).expect("failed to deserialize the source XML file"); - let parsed_intrinsics: Vec> = data + let parsed_intrinsics: Vec> = data .intrinsics .into_iter() .filter(|intrinsic| { @@ -84,9 +85,7 @@ pub fn get_xml_intrinsics( Ok(parsed_intrinsics) } -fn xml_to_intrinsic( - intr: XMLIntrinsic, -) -> Result, Box> { +fn xml_to_intrinsic(intr: XMLIntrinsic) -> Result, Box> { let name = intr.name; let result = X86IntrinsicType::from_param(&intr.return_data); let args_check = intr.parameters.into_iter().enumerate().map(|(i, param)| { From c1aaac9ffa904ab833533cb90897e724638a1627 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 13/32] intrinsic-test: introduce `get_predicate_function` Support defining a local variable containing the predicate that will be used with all subsequent scalable vector intrinsics. --- crates/intrinsic-test/src/arm/mod.rs | 4 ++++ crates/intrinsic-test/src/common/gen_rust.rs | 11 ++++++++++- crates/intrinsic-test/src/common/intrinsic.rs | 11 ++++++++++- crates/intrinsic-test/src/common/mod.rs | 9 +++++++++ crates/intrinsic-test/src/x86/mod.rs | 4 ++++ 5 files changed, 37 insertions(+), 2 deletions(-) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 871a8713b3..df78efaa41 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -113,4 +113,8 @@ impl SupportedArchitecture for Arm { Self(intrinsics) } + + fn get_predicate_function(_: u32) -> String { + todo!("implemented in a later commit") + } } diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 29cb7bb4ce..63edf9bd7d 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -7,7 +7,7 @@ use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use crate::common::values::{test_values_array_name, test_values_array_static}; -use crate::common::{PASSES, SupportedArchitecture}; +use crate::common::{PASSES, PREDICATE_LOCAL, SupportedArchitecture}; /// Rust definitions that are included verbatim in the generated source. In particular, defines /// a wrapper around float types that defines `NaN`s to be equal reflexively to enable @@ -184,6 +184,7 @@ let specializations = [{specializations}]; for (id, rust, c) in specializations {{ for i in 0..{PASSES} {{ unsafe {{ + {predicate} {loaded_args} let __rust_return_value = rust({rust_args}); @@ -232,6 +233,14 @@ for (id, rust, c) in specializations {{ loaded_args = intrinsic.arguments.load_values_rust(), rust_args = intrinsic.arguments.as_call_param_rust(), c_args = intrinsic.arguments.as_c_call_param_rust(), + predicate = if intrinsic.has_scalable_argument_or_result() { + format!( + "let {PREDICATE_LOCAL} = {pred};", + pred = A::get_predicate_function(intrinsic.results.inner_size()), + ) + } else { + "".to_string() + }, comparison = intrinsic.results.get_comparison_function(), ) } diff --git a/crates/intrinsic-test/src/common/intrinsic.rs b/crates/intrinsic-test/src/common/intrinsic.rs index 10557871d0..ac15d23166 100644 --- a/crates/intrinsic-test/src/common/intrinsic.rs +++ b/crates/intrinsic-test/src/common/intrinsic.rs @@ -1,5 +1,5 @@ use super::argument::ArgumentList; -use crate::common::SupportedArchitecture; +use crate::common::{SupportedArchitecture, intrinsic_helpers::SimdLen}; use itertools::Itertools; /// An intrinsic @@ -33,4 +33,13 @@ impl Intrinsic { .map(|constraint| constraint.into_iter()) .multi_cartesian_product() } + + /// Returns `true` if this intrinsic has any argument or result types that are scalable vectors + pub fn has_scalable_argument_or_result(&self) -> bool { + self.results.num_lanes() == SimdLen::Scalable + || self + .arguments + .iter() + .any(|a| a.ty.num_lanes() == SimdLen::Scalable) + } } diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index aee2da630a..391f3c666a 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -23,6 +23,12 @@ mod gen_c; mod gen_rust; mod values; +/// Many scalable intrinsics take a predicate argument and for the purposes of intrinsic testing, +/// a predicate that enables all lanes is used for all of these intrinsic calls (i.e. loading inputs, +/// result comparison, and the intrinsic under test). This constant defines the name of the local +/// variable that contains that predicate. +pub const PREDICATE_LOCAL: &'static str = "__pred"; + // The number of times each intrinsic will be called - influences the generation of the // test arrays to minimise repeated testing of the same test values. pub(crate) const PASSES: u32 = 20; @@ -102,6 +108,9 @@ pub trait SupportedArchitecture: Sized { .collect::>() .unwrap(); } + + /// Return a call to a intrinsic to generate a predicate, if reqd. + fn get_predicate_function(_: u32) -> String; } pub fn manual_chunk(intrinsic_count: usize) -> (usize, usize) { diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index ec198d9218..85f92aaeed 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -96,4 +96,8 @@ impl SupportedArchitecture for X86 { Self { intrinsics } } + + fn get_predicate_function(_: u32) -> String { + unimplemented!("no scalable vectors on x86") + } } From 9901dfede3ad616be936986ef691e681ff106b8b Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 14/32] intrinsic-test: sve comparison and predicates Implementation of `get_comparison_function` and `get_predicate_function` for SVE which uses the relevant SVE intrinsics. --- crates/intrinsic-test/src/arm/mod.rs | 4 +- crates/intrinsic-test/src/arm/types.rs | 58 +++++++++++++++++++- crates/intrinsic-test/src/common/argument.rs | 19 ++++++- 3 files changed, 75 insertions(+), 6 deletions(-) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index df78efaa41..44c8d5369e 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -114,7 +114,7 @@ impl SupportedArchitecture for Arm { Self(intrinsics) } - fn get_predicate_function(_: u32) -> String { - todo!("implemented in a later commit") + fn get_predicate_function(size: u32) -> String { + format!("svptrue_b{size}()") } } diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index 25dc70fde7..0e7a083da1 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -1,5 +1,9 @@ use super::intrinsic::ArmType; -use crate::common::intrinsic_helpers::{IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind}; +use crate::common::PREDICATE_LOCAL; +use crate::common::intrinsic_helpers::{ + IntrinsicType, Sign, SimdLen, TypeDefinition, TypeKind, default_fixed_vector_comparison, +}; +use itertools::Itertools; impl TypeDefinition for ArmType { /// Gets a string containing the typename for this type in C format. @@ -94,6 +98,58 @@ impl TypeDefinition for ArmType { todo!("get_load_function IntrinsicType: {self:#?}") } } + + fn get_comparison_function(&self) -> String { + match self.num_lanes() { + SimdLen::Scalable => { + // There isn't a `svcmpeq` for `svbool_t`, so do an XOR instead and test it is + // empty.. + if self.kind() == TypeKind::Bool { + return format!( + r#" +let eq = sveor_b_z({PREDICATE_LOCAL}, __rust_return_value, __c_return_value); +assert!(!svptest_any({PREDICATE_LOCAL}, eq), "{{}}", id); + "#, + ); + } + + // Use `svcmpeq` to compare the return values of Rust and C invocations + match self.num_vectors() { + 1 => { + format!( + r#" +let eq = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, __rust_return_value, __c_return_value); +assert!(svptest_any(__pred, eq), "{{}}", id); + "#, + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), + ) + } + // For tuples of vectors, do multiple comparisons, each with a `svget` to + // extract the Nth vector. + n @ (2 | 3 | 4) => (0..n) + .format_with("\n", |i, fmt| { + fmt(&format_args!( + r#" +let eq = svcmpeq_{ty}{bl}( + {PREDICATE_LOCAL}, + svget{n}_{ty}{bl}::<{i}>(__rust_return_value), + svget{n}_{ty}{bl}::<{i}>(__c_return_value) +); +assert!(svptest_any(__pred, eq), "{{}}-{i_plus_one}/{n}", id); + "#, + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), + i_plus_one = i + 1, // so that the output is "1/2" and "2/2" + )) + }) + .to_string(), + _ => unreachable!(), + } + } + SimdLen::Fixed(num_lanes) => default_fixed_vector_comparison(self, num_lanes), + } + } } impl ArmType { diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index 344376da61..a8aa70e658 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -3,9 +3,9 @@ use itertools::Itertools; use crate::common::intrinsic_helpers::{SimdLen, TypeKind}; use crate::common::values::test_values_array_name; -use super::PASSES; use super::constraint::Constraint; use super::intrinsic_helpers::TypeDefinition; +use super::{PASSES, PREDICATE_LOCAL}; /// An argument for the intrinsic. #[derive(Debug, PartialEq, Clone)] @@ -37,8 +37,15 @@ where self.ty.c_type() } + /// Generates local variable name for the value passed to this argument pub fn generate_name(&self) -> String { - format!("{}_val", self.name) + // The same predicate is used for scalable intrinsic invocations + // FIXME(davidtwco): Only for predicate arguments, not all boolean arguments + if self.is_scalable_bool() { + format!("{PREDICATE_LOCAL}") + } else { + format!("{}_val", self.name) + } } pub fn is_simd(&self) -> bool { @@ -186,8 +193,14 @@ where .enumerate() .map(|(idx, arg)| { if arg.is_simd() { + // If this load is of a scalable vector, then prepend an additional argument + // containing the predicate for the load. + let pred_arg = match arg.ty.num_lanes() { + SimdLen::Scalable => format!("{PREDICATE_LOCAL},"), + SimdLen::Fixed(..) => "".to_string(), + }; format!( - "let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", + "let {name} = {load}({pred_arg}{vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", name = arg.generate_name(), vals_name = test_values_array_name(&arg.ty), load = arg.ty.get_load_function(), From cff3e8438f342de1d7569a0c0c85cdd768ad4d96 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 4 Jun 2026 12:48:11 +0000 Subject: [PATCH 15/32] intrinsic-test: rename `get_*_function` fns --- crates/intrinsic-test/src/arm/mod.rs | 2 +- crates/intrinsic-test/src/arm/types.rs | 6 +++--- crates/intrinsic-test/src/common/argument.rs | 2 +- crates/intrinsic-test/src/common/gen_rust.rs | 4 ++-- crates/intrinsic-test/src/common/intrinsic_helpers.rs | 4 ++-- crates/intrinsic-test/src/common/mod.rs | 2 +- crates/intrinsic-test/src/x86/mod.rs | 2 +- crates/intrinsic-test/src/x86/types.rs | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 44c8d5369e..2f98bb3a3e 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -114,7 +114,7 @@ impl SupportedArchitecture for Arm { Self(intrinsics) } - fn get_predicate_function(size: u32) -> String { + fn predicate_function(size: u32) -> String { format!("svptrue_b{size}()") } } diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index 0e7a083da1..d19560231b 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -75,7 +75,7 @@ impl TypeDefinition for ArmType { } /// Determines the load function for this type. - fn get_load_function(&self) -> String { + fn load_function(&self) -> String { if let Some(bl) = self.bit_len { match self.num_lanes() { SimdLen::Scalable => { @@ -95,11 +95,11 @@ impl TypeDefinition for ArmType { } } } else { - todo!("get_load_function IntrinsicType: {self:#?}") + todo!("load_function IntrinsicType: {self:#?}") } } - fn get_comparison_function(&self) -> String { + fn comparison_function(&self) -> String { match self.num_lanes() { SimdLen::Scalable => { // There isn't a `svcmpeq` for `svbool_t`, so do an XOR instead and test it is diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index a8aa70e658..664aa945f1 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -203,7 +203,7 @@ where "let {name} = {load}({pred_arg}{vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", name = arg.generate_name(), vals_name = test_values_array_name(&arg.ty), - load = arg.ty.get_load_function(), + load = arg.ty.load_function(), ) } else { format!( diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 63edf9bd7d..101ce016c8 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -236,12 +236,12 @@ for (id, rust, c) in specializations {{ predicate = if intrinsic.has_scalable_argument_or_result() { format!( "let {PREDICATE_LOCAL} = {pred};", - pred = A::get_predicate_function(intrinsic.results.inner_size()), + pred = A::predicate_function(intrinsic.results.inner_size()), ) } else { "".to_string() }, - comparison = intrinsic.results.get_comparison_function(), + comparison = intrinsic.results.comparison_function(), ) } diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs index 895d66914f..37cb242d77 100644 --- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -195,10 +195,10 @@ impl IntrinsicType { pub trait TypeDefinition: Clone + DerefMut { /// Determines the load function for this type. - fn get_load_function(&self) -> String; + fn load_function(&self) -> String; /// Determines the comparison function for this type. - fn get_comparison_function(&self) -> String { + fn comparison_function(&self) -> String { match self.num_lanes() { SimdLen::Scalable => unimplemented!("architecture-specific"), SimdLen::Fixed(num_lanes) => default_fixed_vector_comparison(self, num_lanes), diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 391f3c666a..2e3db8187d 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -110,7 +110,7 @@ pub trait SupportedArchitecture: Sized { } /// Return a call to a intrinsic to generate a predicate, if reqd. - fn get_predicate_function(_: u32) -> String; + fn predicate_function(_: u32) -> String; } pub fn manual_chunk(intrinsic_count: usize) -> (usize, usize) { diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index 85f92aaeed..0ccb911bf2 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -97,7 +97,7 @@ impl SupportedArchitecture for X86 { Self { intrinsics } } - fn get_predicate_function(_: u32) -> String { + fn predicate_function(_: u32) -> String { unimplemented!("no scalable vectors on x86") } } diff --git a/crates/intrinsic-test/src/x86/types.rs b/crates/intrinsic-test/src/x86/types.rs index b7e063e228..2cba54a73f 100644 --- a/crates/intrinsic-test/src/x86/types.rs +++ b/crates/intrinsic-test/src/x86/types.rs @@ -48,7 +48,7 @@ impl TypeDefinition for X86IntrinsicType { } /// Determines the load function for this type. - fn get_load_function(&self) -> String { + fn load_function(&self) -> String { let type_value = self.param.type_data.clone(); if type_value.len() == 0 { unimplemented!("the value for key 'type' is not present!"); From b63e9495e5434385c424de0d64c1331ca4f68ce4 Mon Sep 17 00:00:00 2001 From: David Wood Date: Tue, 9 Jun 2026 08:12:08 +0000 Subject: [PATCH 16/32] intrinsic-test: arg generic over arch not type Refactoring enabling accessing architecture-specific behaviour that isn't associated with the specific argument type. --- crates/intrinsic-test/src/arm/json_parser.rs | 4 ++-- crates/intrinsic-test/src/common/argument.rs | 21 ++++++++++--------- crates/intrinsic-test/src/common/intrinsic.rs | 2 +- crates/intrinsic-test/src/common/mod.rs | 2 +- crates/intrinsic-test/src/x86/xml_parser.rs | 13 ++++-------- 5 files changed, 19 insertions(+), 23 deletions(-) diff --git a/crates/intrinsic-test/src/arm/json_parser.rs b/crates/intrinsic-test/src/arm/json_parser.rs index 58e25b46ea..b4e21a5757 100644 --- a/crates/intrinsic-test/src/arm/json_parser.rs +++ b/crates/intrinsic-test/src/arm/json_parser.rs @@ -122,7 +122,7 @@ fn json_to_intrinsic( }); let mut arg = - Argument::::new(i, String::from(arg_name), ArmType(arg_ty), constraint); + Argument::::new(i, String::from(arg_name), ArmType(arg_ty), constraint); // The JSON doesn't list immediates as const let IntrinsicType { @@ -135,7 +135,7 @@ fn json_to_intrinsic( }) .collect(); - let arguments = ArgumentList:: { args }; + let arguments = ArgumentList:: { args }; Ok(Intrinsic { name, diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index 664aa945f1..bac6f3b425 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -1,5 +1,6 @@ use itertools::Itertools; +use crate::common::SupportedArchitecture; use crate::common::intrinsic_helpers::{SimdLen, TypeKind}; use crate::common::values::test_values_array_name; @@ -9,22 +10,22 @@ use super::{PASSES, PREDICATE_LOCAL}; /// An argument for the intrinsic. #[derive(Debug, PartialEq, Clone)] -pub struct Argument { +pub struct Argument { /// The argument's index in the intrinsic function call. pub pos: usize, /// The argument name. pub name: String, /// The type of the argument. - pub ty: T, + pub ty: A::Type, /// Any constraints that are on this argument pub constraint: Option, } -impl Argument +impl Argument where - T: TypeDefinition, + A: SupportedArchitecture, { - pub fn new(pos: usize, name: String, ty: T, constraint: Option) -> Self { + pub fn new(pos: usize, name: String, ty: A::Type, constraint: Option) -> Self { Argument { pos, name, @@ -75,13 +76,13 @@ where /// Arguments of an intrinsic - including parameters that end up being const generics. #[derive(Debug, PartialEq, Clone)] -pub struct ArgumentList { - pub args: Vec>, +pub struct ArgumentList { + pub args: Vec>, } -impl ArgumentList +impl ArgumentList where - T: TypeDefinition, + A: SupportedArchitecture, { /// Returns a string with the arguments in `self` as a parameter list for a wrapper fn /// definition in C (e.g. `$ty1 $arg1, $ty2 $arg2`). @@ -217,7 +218,7 @@ where } /// Returns an iterator over the contained arguments - pub fn iter(&self) -> std::slice::Iter<'_, Argument> { + pub fn iter(&self) -> std::slice::Iter<'_, Argument> { self.args.iter() } } diff --git a/crates/intrinsic-test/src/common/intrinsic.rs b/crates/intrinsic-test/src/common/intrinsic.rs index ac15d23166..3c0d5dbb65 100644 --- a/crates/intrinsic-test/src/common/intrinsic.rs +++ b/crates/intrinsic-test/src/common/intrinsic.rs @@ -9,7 +9,7 @@ pub struct Intrinsic { pub name: String, /// Any arguments for this intrinsic. - pub arguments: ArgumentList, + pub arguments: ArgumentList, /// The return type of this intrinsic. pub results: A::Type, diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 2e3db8187d..3393154a1b 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -36,7 +36,7 @@ pub(crate) const PASSES: u32 = 20; /// Architectures must support this trait /// to be successfully tested. pub trait SupportedArchitecture: Sized { - type Type: TypeDefinition + Sync; + type Type: TypeDefinition + std::fmt::Debug + PartialEq + Sync; fn intrinsics(&self) -> &[Intrinsic]; diff --git a/crates/intrinsic-test/src/x86/xml_parser.rs b/crates/intrinsic-test/src/x86/xml_parser.rs index dd4870712b..f48a7bdfe5 100644 --- a/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/crates/intrinsic-test/src/x86/xml_parser.rs @@ -99,12 +99,7 @@ fn xml_to_intrinsic(intr: XMLIntrinsic) -> Result, Box::new( - i, - param.var_name.clone(), - ty.unwrap(), - constraint, - ); + let arg = Argument::::new(i, param.var_name.clone(), ty.unwrap(), constraint); Some(arg) } }); @@ -124,8 +119,8 @@ fn xml_to_intrinsic(intr: XMLIntrinsic) -> Result, Box| arg.ty.param.etype.as_str() == "MASK"; - let is_vector = |arg: &Argument| re.is_match(arg.ty.param.type_data.as_str()); + let is_mask = |arg: &Argument| arg.ty.param.etype.as_str() == "MASK"; + let is_vector = |arg: &Argument| re.is_match(arg.ty.param.type_data.as_str()); let pos = args_test.position(|arg| is_mask(arg) && is_vector(arg)); if let Some(index) = pos { args[index].ty.bit_len = args[0].ty.bit_len; @@ -133,7 +128,7 @@ fn xml_to_intrinsic(intr: XMLIntrinsic) -> Result, Box { args }; + let arguments = ArgumentList:: { args }; if let Err(message) = result { return Err(Box::from(message)); From 8d809e5ac86dd15a8d4d7d0f74d8699af33e81be Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 4 Jun 2026 15:08:50 +0000 Subject: [PATCH 17/32] intrinsic-test: test values for `svbool_t` Instead of assuming that any scalable boolean argument is a predicate, handle predicates specifically and generate test values for `svbool_t` values. --- crates/intrinsic-test/src/arm/json_parser.rs | 10 ++++- crates/intrinsic-test/src/arm/mod.rs | 31 ++++++++++++++- crates/intrinsic-test/src/arm/types.rs | 10 +++++ crates/intrinsic-test/src/common/argument.rs | 38 +++++++------------ crates/intrinsic-test/src/common/gen_rust.rs | 6 +-- .../src/common/intrinsic_helpers.rs | 6 +++ crates/intrinsic-test/src/common/mod.rs | 14 ++++++- crates/intrinsic-test/src/common/values.rs | 27 +++++++------ crates/intrinsic-test/src/x86/xml_parser.rs | 9 ++++- 9 files changed, 102 insertions(+), 49 deletions(-) diff --git a/crates/intrinsic-test/src/arm/json_parser.rs b/crates/intrinsic-test/src/arm/json_parser.rs index b4e21a5757..c2bf306f5a 100644 --- a/crates/intrinsic-test/src/arm/json_parser.rs +++ b/crates/intrinsic-test/src/arm/json_parser.rs @@ -121,8 +121,14 @@ fn json_to_intrinsic( } }); - let mut arg = - Argument::::new(i, String::from(arg_name), ArmType(arg_ty), constraint); + let is_predicate = arg_name == "pg"; + let mut arg = Argument::::new( + i, + String::from(arg_name), + ArmType(arg_ty), + constraint, + is_predicate, + ); // The JSON doesn't list immediates as const let IntrinsicType { diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 2f98bb3a3e..40c26b7569 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -3,10 +3,12 @@ mod intrinsic; mod json_parser; mod types; -use crate::common::SupportedArchitecture; +use crate::common::argument::Argument; use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; -use crate::common::intrinsic_helpers::{SimdLen, TypeKind}; +use crate::common::intrinsic_helpers::{SimdLen, TypeDefinition, TypeKind}; +use crate::common::values::test_values_array_name; +use crate::common::{PASSES, PREDICATE_LOCAL, SupportedArchitecture}; use intrinsic::ArmType; use json_parser::get_neon_intrinsics; @@ -117,4 +119,29 @@ impl SupportedArchitecture for Arm { fn predicate_function(size: u32) -> String { format!("svptrue_b{size}()") } + + fn load_call(arg: &Argument, idx: usize) -> String { + let name = arg.generate_name(); + let load = arg.ty.load_function(); + let ptr = format!( + "{vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _", + vals_name = test_values_array_name(&arg.ty) + ); + + match arg.ty.num_lanes() { + // If the load is of a `svbool_t`, then we load a `svint8_t` and + SimdLen::Scalable if matches!(arg.ty.kind(), TypeKind::Bool) => { + format!( + r#" +let {name} = {load}({PREDICATE_LOCAL}, {ptr}); +let {name} = svcmpne_n_s8({PREDICATE_LOCAL}, {name}, 0); + "# + ) + } + // If this load is of a scalable vector, then prepend an additional argument + // containing the predicate for the load. + SimdLen::Scalable => format!("let {name} = {load}({PREDICATE_LOCAL}, {ptr});"), + SimdLen::Fixed(..) => format!("let {name} = {load}({ptr});"), + } + } } diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index d19560231b..3453ac4c95 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -74,6 +74,16 @@ impl TypeDefinition for ArmType { } } + fn rust_scalar_type_for_test_value_array(&self) -> String { + if self.kind() == TypeKind::Bool && self.num_lanes() == SimdLen::Scalable { + let mut ty = self.clone(); + ty.kind = TypeKind::Int(Sign::Signed); + ty.rust_scalar_type() + } else { + self.rust_scalar_type() + } + } + /// Determines the load function for this type. fn load_function(&self) -> String { if let Some(bl) = self.bit_len { diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index bac6f3b425..b654924950 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -1,7 +1,7 @@ use itertools::Itertools; use crate::common::SupportedArchitecture; -use crate::common::intrinsic_helpers::{SimdLen, TypeKind}; +use crate::common::intrinsic_helpers::TypeKind; use crate::common::values::test_values_array_name; use super::constraint::Constraint; @@ -19,18 +19,27 @@ pub struct Argument { pub ty: A::Type, /// Any constraints that are on this argument pub constraint: Option, + /// Is the argument a predicate for a scalable intrinsic? + pub is_predicate: bool, } impl Argument where A: SupportedArchitecture, { - pub fn new(pos: usize, name: String, ty: A::Type, constraint: Option) -> Self { + pub fn new( + pos: usize, + name: String, + ty: A::Type, + constraint: Option, + is_predicate: bool, + ) -> Self { Argument { pos, name, ty, constraint, + is_predicate, } } @@ -41,8 +50,7 @@ where /// Generates local variable name for the value passed to this argument pub fn generate_name(&self) -> String { // The same predicate is used for scalable intrinsic invocations - // FIXME(davidtwco): Only for predicate arguments, not all boolean arguments - if self.is_scalable_bool() { + if self.is_predicate { format!("{PREDICATE_LOCAL}") } else { format!("{}_val", self.name) @@ -61,11 +69,6 @@ where self.constraint.is_some() } - /// Is this argument of type `svbool_t` (or otherwise a scalable bool)? - pub fn is_scalable_bool(&self) -> bool { - self.ty.kind == TypeKind::Bool && self.ty.num_lanes() == SimdLen::Scalable - } - /// Should this argument be passed by reference in C wrapper function declarations? /// /// SIMD types and `f16` are currently passed by reference. @@ -188,24 +191,11 @@ where pub fn load_values_rust(&self) -> String { self.iter() .filter(|&arg| !arg.has_constraint()) - // FIXME(davidtwco): Need test values for `svbool_t` when the argument is *not* a - // predicate. - .filter(|&arg| !arg.is_scalable_bool()) + .filter(|&arg| !arg.is_predicate) .enumerate() .map(|(idx, arg)| { if arg.is_simd() { - // If this load is of a scalable vector, then prepend an additional argument - // containing the predicate for the load. - let pred_arg = match arg.ty.num_lanes() { - SimdLen::Scalable => format!("{PREDICATE_LOCAL},"), - SimdLen::Fixed(..) => "".to_string(), - }; - format!( - "let {name} = {load}({pred_arg}{vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", - name = arg.generate_name(), - vals_name = test_values_array_name(&arg.ty), - load = arg.ty.load_function(), - ) + A::load_call(arg, idx) } else { format!( "let {name} = {vals_name}[(i+{idx}) % {PASSES}];\n", diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 101ce016c8..de9bcd7e0c 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -131,11 +131,9 @@ pub fn write_lib_rs( for intrinsic in intrinsics { for arg in &intrinsic.arguments.args { // Skip arguments with constraints as these correspond to generic instantiatons, and - // arguments of scalable bool types as the same predicate is used for all intrinsics + // predicates for scalable intrinsics as the same predicate is used for all intrinsics // under test. - // FIXME(davidtwco): Need test values for `svbool_t` when the argument is *not* a - // predicate. - if !arg.has_constraint() && !arg.is_scalable_bool() { + if !arg.has_constraint() && !arg.is_predicate { let name = test_values_array_name(&arg.ty); if seen.insert(name) { diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs index 37cb242d77..4c229eeba7 100644 --- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -219,6 +219,12 @@ pub trait TypeDefinition: Clone + DerefMut { ty.vec_len = None; ty.rust_type() } + + /// Gets a string containing the name of the scalar type corresponding to this type that should + /// be used as the element type for the test value array. + fn rust_scalar_type_for_test_value_array(&self) -> String { + self.rust_scalar_type() + } } /// Returns the default comparison between results of an intrinsic - casting the vectors to arrays diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 3393154a1b..fc0c298f1c 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -5,12 +5,14 @@ use rayon::prelude::*; use cli::ProcessedCli; use crate::common::{ + argument::Argument, gen_c::write_wrapper_c, gen_rust::{ run_rustfmt, write_bin_cargo_toml, write_build_rs, write_lib_cargo_toml, write_lib_rs, }, intrinsic::Intrinsic, intrinsic_helpers::TypeDefinition, + values::test_values_array_name, }; pub mod argument; @@ -18,10 +20,10 @@ pub mod cli; pub mod constraint; pub mod intrinsic; pub mod intrinsic_helpers; +pub mod values; mod gen_c; mod gen_rust; -mod values; /// Many scalable intrinsics take a predicate argument and for the purposes of intrinsic testing, /// a predicate that enables all lanes is used for all of these intrinsic calls (i.e. loading inputs, @@ -111,6 +113,16 @@ pub trait SupportedArchitecture: Sized { /// Return a call to a intrinsic to generate a predicate, if reqd. fn predicate_function(_: u32) -> String; + + /// Return a call loading `arg`. Can assume that `arg.is_simd()` holds. + fn load_call(arg: &Argument, idx: usize) -> String { + format!( + "let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", + name = arg.generate_name(), + vals_name = test_values_array_name(&arg.ty), + load = arg.ty.load_function(), + ) + } } pub fn manual_chunk(intrinsic_count: usize) -> (usize, usize) { diff --git a/crates/intrinsic-test/src/common/values.rs b/crates/intrinsic-test/src/common/values.rs index ad3cb58fb9..0d4c9abcea 100644 --- a/crates/intrinsic-test/src/common/values.rs +++ b/crates/intrinsic-test/src/common/values.rs @@ -26,7 +26,7 @@ pub fn test_values_array_static( w, "static {name}: [{ty}; {load_size}] = {values};\n", name = test_values_array_name(ty), - ty = ty.rust_scalar_type(), + ty = ty.rust_scalar_type_for_test_value_array(), load_size = test_values_array_length(&ty), values = test_values_array(&ty) ) @@ -55,11 +55,11 @@ pub fn test_values_array_name(ty: &T) -> String { /// `[true, false]` - as there are only ever two values for a boolean. This only works because the /// generated accesses to the test value array is always modulo the length of the test value array. pub fn test_values_array(ty: &IntrinsicType) -> String { - if ty.kind() == TypeKind::Bool { - return "[true, false]".to_string(); - } - let (bit_len, kind) = match ty { + IntrinsicType { + kind: TypeKind::Bool, + .. + } => (1, TypeKind::Bool), IntrinsicType { kind: TypeKind::Float, bit_len: Some(bit_len), @@ -83,6 +83,9 @@ pub fn test_values_array(ty: &IntrinsicType) -> String { let src = bit_pattern_for_test_values_array(bit_len, i); assert!(src == 0 || src.ilog2() < bit_len); match kind { + TypeKind::Bool if ty.num_lanes() != SimdLen::Scalable => { + fmt(&format_args!("{}", if src == 1 { true } else { false })) + } TypeKind::Float => fmt(&format_args!("f{bit_len}::from_bits({src:#x})")), TypeKind::Vector | TypeKind::Int(Sign::Signed) if (src >> (bit_len - 1)) != 0 => { // `src` is a two's complement representation of a negative value. @@ -113,15 +116,7 @@ pub fn test_values_array(ty: &IntrinsicType) -> String { /// /// For scalable vectors (only SVE is currently supported), assume that the length of the vector is /// the maximum supported by the architecture. -/// -/// An exception to the above is when `ty` is a boolean, where this function returns two - as -/// there are only ever two values for a boolean. This only works because the generated accesses to -/// the test value array is always modulo this length. pub fn test_values_array_length(ty: &IntrinsicType) -> u32 { - if ty.kind() == TypeKind::Bool { - return 2; - } - let IntrinsicType { simd_len, vec_len, .. } = ty; @@ -150,7 +145,8 @@ pub fn test_values_array_length(ty: &IntrinsicType) -> u32 { pub fn bit_pattern_for_test_values_array(bits: u32, index: u32) -> u64 { let index = index as usize; match bits { - bits @ (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8) => BIT_PATTERNS_8[index % (1 << bits)].into(), + 1 => BIT_PATTERNS_1[index % BIT_PATTERNS_1.len()].into(), + bits @ (2 | 3 | 4 | 5 | 6 | 7 | 8) => BIT_PATTERNS_8[index % (1 << bits)].into(), 16 => BIT_PATTERNS_16[index % BIT_PATTERNS_16.len()].into(), 32 => BIT_PATTERNS_32[index % BIT_PATTERNS_32.len()].into(), 64 => BIT_PATTERNS_64[index % BIT_PATTERNS_64.len()], @@ -158,6 +154,9 @@ pub fn bit_pattern_for_test_values_array(bits: u32, index: u32) -> u64 { } } +// Contains every possible 1-bit value in order +pub const BIT_PATTERNS_1: &[u8] = &[0x0, 0x1]; + // Contains every possible 8-bit value in order pub const BIT_PATTERNS_8: &[u8] = &[ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, diff --git a/crates/intrinsic-test/src/x86/xml_parser.rs b/crates/intrinsic-test/src/x86/xml_parser.rs index f48a7bdfe5..f84f1f4b9c 100644 --- a/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/crates/intrinsic-test/src/x86/xml_parser.rs @@ -99,8 +99,13 @@ fn xml_to_intrinsic(intr: XMLIntrinsic) -> Result, Box::new(i, param.var_name.clone(), ty.unwrap(), constraint); - Some(arg) + Some(Argument::::new( + i, + param.var_name.clone(), + ty.unwrap(), + constraint, + false, + )) } }); From a1588dfb49b6b780463398f8b7fe40ca41349697 Mon Sep 17 00:00:00 2001 From: David Wood Date: Tue, 9 Jun 2026 08:44:17 +0000 Subject: [PATCH 18/32] intrinsic-test: remove unnecessary newlines All of the generated output is run through rustfmt so these aren't necessary. --- crates/intrinsic-test/src/common/argument.rs | 2 +- crates/intrinsic-test/src/common/mod.rs | 2 +- crates/intrinsic-test/src/common/values.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index b654924950..10d9224183 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -198,7 +198,7 @@ where A::load_call(arg, idx) } else { format!( - "let {name} = {vals_name}[(i+{idx}) % {PASSES}];\n", + "let {name} = {vals_name}[(i+{idx}) % {PASSES}];", name = arg.generate_name(), vals_name = test_values_array_name(&arg.ty), ) diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index fc0c298f1c..197a9b861b 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -117,7 +117,7 @@ pub trait SupportedArchitecture: Sized { /// Return a call loading `arg`. Can assume that `arg.is_simd()` holds. fn load_call(arg: &Argument, idx: usize) -> String { format!( - "let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", + "let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);", name = arg.generate_name(), vals_name = test_values_array_name(&arg.ty), load = arg.ty.load_function(), diff --git a/crates/intrinsic-test/src/common/values.rs b/crates/intrinsic-test/src/common/values.rs index 0d4c9abcea..d1f333fd19 100644 --- a/crates/intrinsic-test/src/common/values.rs +++ b/crates/intrinsic-test/src/common/values.rs @@ -24,7 +24,7 @@ pub fn test_values_array_static( ) -> std::io::Result<()> { writeln!( w, - "static {name}: [{ty}; {load_size}] = {values};\n", + "static {name}: [{ty}; {load_size}] = {values};", name = test_values_array_name(ty), ty = ty.rust_scalar_type_for_test_value_array(), load_size = test_values_array_length(&ty), From 6395077555bb85c86acf6cd2245bb959c4cc12db Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 19/32] arm-intrinsics: `svget{2,3,4}` These intrinsics need `Arguments_Preparation` added so that the intrinsic-test tool knows to generate const arguments. --- intrinsics_data/arm_intrinsics.json | 198 ++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/intrinsics_data/arm_intrinsics.json b/intrinsics_data/arm_intrinsics.json index fab6da7f2c..582de29741 100644 --- a/intrinsics_data/arm_intrinsics.json +++ b/intrinsics_data/arm_intrinsics.json @@ -51174,6 +51174,12 @@ "svfloat16x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svfloat16_t" }, @@ -51188,6 +51194,12 @@ "svfloat32x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svfloat32_t" }, @@ -51202,6 +51214,12 @@ "svfloat64x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svfloat64_t" }, @@ -51216,6 +51234,12 @@ "svint16x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint16_t" }, @@ -51230,6 +51254,12 @@ "svint32x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint32_t" }, @@ -51244,6 +51274,12 @@ "svint64x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint64_t" }, @@ -51258,6 +51294,12 @@ "svint8x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint8_t" }, @@ -51272,6 +51314,12 @@ "svuint16x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint16_t" }, @@ -51286,6 +51334,12 @@ "svuint32x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint32_t" }, @@ -51300,6 +51354,12 @@ "svuint64x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint64_t" }, @@ -51314,6 +51374,12 @@ "svuint8x2_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint8_t" }, @@ -51328,6 +51394,12 @@ "svfloat16x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svfloat16_t" }, @@ -51342,6 +51414,12 @@ "svfloat32x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svfloat32_t" }, @@ -51356,6 +51434,12 @@ "svfloat64x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svfloat64_t" }, @@ -51370,6 +51454,12 @@ "svint16x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint16_t" }, @@ -51384,6 +51474,12 @@ "svint32x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint32_t" }, @@ -51398,6 +51494,12 @@ "svint64x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint64_t" }, @@ -51412,6 +51514,12 @@ "svint8x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint8_t" }, @@ -51426,6 +51534,12 @@ "svuint16x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint16_t" }, @@ -51440,6 +51554,12 @@ "svuint32x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint32_t" }, @@ -51454,6 +51574,12 @@ "svuint64x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint64_t" }, @@ -51468,6 +51594,12 @@ "svuint8x3_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint8_t" }, @@ -51482,6 +51614,12 @@ "svfloat16x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svfloat16_t" }, @@ -51496,6 +51634,12 @@ "svfloat32x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svfloat32_t" }, @@ -51510,6 +51654,12 @@ "svfloat64x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svfloat64_t" }, @@ -51524,6 +51674,12 @@ "svint16x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint16_t" }, @@ -51538,6 +51694,12 @@ "svint32x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint32_t" }, @@ -51552,6 +51714,12 @@ "svint64x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint64_t" }, @@ -51566,6 +51734,12 @@ "svint8x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint8_t" }, @@ -51580,6 +51754,12 @@ "svuint16x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint16_t" }, @@ -51594,6 +51774,12 @@ "svuint32x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint32_t" }, @@ -51608,6 +51794,12 @@ "svuint64x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint64_t" }, @@ -51622,6 +51814,12 @@ "svuint8x4_t tuple", "uint64_t imm_index" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint8_t" }, From 938c047c56904bf438b5566a4a6d682f0d7c8f64 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 20/32] arm-intrinsics: `svset{2,3,4}` These intrinsics need `Arguments_Preparation` added so that the intrinsic-test tool knows to generate const arguments. --- intrinsics_data/arm_intrinsics.json | 198 ++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/intrinsics_data/arm_intrinsics.json b/intrinsics_data/arm_intrinsics.json index 582de29741..7f749fe4d8 100644 --- a/intrinsics_data/arm_intrinsics.json +++ b/intrinsics_data/arm_intrinsics.json @@ -163197,6 +163197,12 @@ "uint64_t imm_index", "svfloat16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svfloat16x2_t" }, @@ -163212,6 +163218,12 @@ "uint64_t imm_index", "svfloat32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svfloat32x2_t" }, @@ -163227,6 +163239,12 @@ "uint64_t imm_index", "svfloat64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svfloat64x2_t" }, @@ -163242,6 +163260,12 @@ "uint64_t imm_index", "svint16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint16x2_t" }, @@ -163257,6 +163281,12 @@ "uint64_t imm_index", "svint32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint32x2_t" }, @@ -163272,6 +163302,12 @@ "uint64_t imm_index", "svint64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint64x2_t" }, @@ -163287,6 +163323,12 @@ "uint64_t imm_index", "svint8_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svint8x2_t" }, @@ -163302,6 +163344,12 @@ "uint64_t imm_index", "svuint16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint16x2_t" }, @@ -163317,6 +163365,12 @@ "uint64_t imm_index", "svuint32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint32x2_t" }, @@ -163332,6 +163386,12 @@ "uint64_t imm_index", "svuint64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint64x2_t" }, @@ -163347,6 +163407,12 @@ "uint64_t imm_index", "svuint8_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 1 + } + }, "return_type": { "value": "svuint8x2_t" }, @@ -163362,6 +163428,12 @@ "uint64_t imm_index", "svfloat16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svfloat16x3_t" }, @@ -163377,6 +163449,12 @@ "uint64_t imm_index", "svfloat32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svfloat32x3_t" }, @@ -163392,6 +163470,12 @@ "uint64_t imm_index", "svfloat64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svfloat64x3_t" }, @@ -163407,6 +163491,12 @@ "uint64_t imm_index", "svint16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint16x3_t" }, @@ -163422,6 +163512,12 @@ "uint64_t imm_index", "svint32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint32x3_t" }, @@ -163437,6 +163533,12 @@ "uint64_t imm_index", "svint64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint64x3_t" }, @@ -163452,6 +163554,12 @@ "uint64_t imm_index", "svint8_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svint8x3_t" }, @@ -163467,6 +163575,12 @@ "uint64_t imm_index", "svuint16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint16x3_t" }, @@ -163482,6 +163596,12 @@ "uint64_t imm_index", "svuint32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint32x3_t" }, @@ -163497,6 +163617,12 @@ "uint64_t imm_index", "svuint64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint64x3_t" }, @@ -163512,6 +163638,12 @@ "uint64_t imm_index", "svuint8_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 2 + } + }, "return_type": { "value": "svuint8x3_t" }, @@ -163527,6 +163659,12 @@ "uint64_t imm_index", "svfloat16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svfloat16x4_t" }, @@ -163542,6 +163680,12 @@ "uint64_t imm_index", "svfloat32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svfloat32x4_t" }, @@ -163557,6 +163701,12 @@ "uint64_t imm_index", "svfloat64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svfloat64x4_t" }, @@ -163572,6 +163722,12 @@ "uint64_t imm_index", "svint16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint16x4_t" }, @@ -163587,6 +163743,12 @@ "uint64_t imm_index", "svint32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint32x4_t" }, @@ -163602,6 +163764,12 @@ "uint64_t imm_index", "svint64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint64x4_t" }, @@ -163617,6 +163785,12 @@ "uint64_t imm_index", "svint8_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svint8x4_t" }, @@ -163632,6 +163806,12 @@ "uint64_t imm_index", "svuint16_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint16x4_t" }, @@ -163647,6 +163827,12 @@ "uint64_t imm_index", "svuint32_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint32x4_t" }, @@ -163662,6 +163848,12 @@ "uint64_t imm_index", "svuint64_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint64x4_t" }, @@ -163677,6 +163869,12 @@ "uint64_t imm_index", "svuint8_t x" ], + "Arguments_Preparation": { + "imm_index": { + "minimum": 0, + "maximum": 3 + } + }, "return_type": { "value": "svuint8x4_t" }, From 9461c188b672ed4bd5b5de6f17b0fa034e603fe5 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 1 Jun 2026 12:44:41 +0000 Subject: [PATCH 21/32] intrinsic-test: stop skipping SVE intrinsics Enables generation of tests for SVE intrinsics leveraging the changes from the previous commits. --- crates/intrinsic-test/src/arm/json_parser.rs | 14 +++----------- crates/intrinsic-test/src/arm/mod.rs | 4 ++-- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/crates/intrinsic-test/src/arm/json_parser.rs b/crates/intrinsic-test/src/arm/json_parser.rs index c2bf306f5a..bb01c55390 100644 --- a/crates/intrinsic-test/src/arm/json_parser.rs +++ b/crates/intrinsic-test/src/arm/json_parser.rs @@ -46,7 +46,7 @@ impl TryFrom for ArgPrep { #[derive(Deserialize, Debug)] struct JsonIntrinsic { #[serde(rename = "SIMD_ISA")] - simd_isa: String, + _simd_isa: String, name: String, arguments: Vec, return_type: ReturnType, @@ -58,22 +58,14 @@ struct JsonIntrinsic { _instructions: Option>>, } -pub fn get_neon_intrinsics( - filename: &Path, -) -> Result>, Box> { +pub fn get_intrinsics(filename: &Path) -> Result>, Box> { let file = std::fs::File::open(filename)?; let reader = std::io::BufReader::new(file); let json: Vec = serde_json::from_reader(reader).expect("Couldn't parse JSON"); let parsed = json .into_iter() - .filter_map(|intr| { - if intr.simd_isa == "Neon" { - Some(json_to_intrinsic(intr).expect("Couldn't parse JSON")) - } else { - None - } - }) + .map(|intr| json_to_intrinsic(intr).expect("Couldn't parse JSON")) .collect(); Ok(parsed) } diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 40c26b7569..28e086c0e6 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -10,7 +10,7 @@ use crate::common::intrinsic_helpers::{SimdLen, TypeDefinition, TypeKind}; use crate::common::values::test_values_array_name; use crate::common::{PASSES, PREDICATE_LOCAL, SupportedArchitecture}; use intrinsic::ArmType; -use json_parser::get_neon_intrinsics; +use json_parser::get_intrinsics; #[derive(PartialEq)] pub struct Arm(Vec>); @@ -50,7 +50,7 @@ impl SupportedArchitecture for Arm { fn create(cli_options: &ProcessedCli) -> Self { let a32 = cli_options.target.starts_with("armv7"); let mut intrinsics = - get_neon_intrinsics(&cli_options.filename).expect("Error parsing input file"); + get_intrinsics(&cli_options.filename).expect("Error parsing input file"); intrinsics.sort_by(|a, b| a.name.cmp(&b.name)); intrinsics.dedup(); From b18c91043cacd4bc0e83d1683d2358d6f6efc043 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 11 Jun 2026 09:58:43 +0000 Subject: [PATCH 22/32] intrinsic-test: simplify architecture constants There doesn't need to be so many or other modules with the values. --- crates/intrinsic-test/src/arm/config.rs | 67 --------- crates/intrinsic-test/src/arm/mod.rs | 85 +++++++++-- crates/intrinsic-test/src/common/gen_c.rs | 10 +- crates/intrinsic-test/src/common/gen_rust.rs | 7 +- crates/intrinsic-test/src/common/mod.rs | 12 +- crates/intrinsic-test/src/x86/config.rs | 138 ----------------- crates/intrinsic-test/src/x86/mod.rs | 148 ++++++++++++++++++- 7 files changed, 226 insertions(+), 241 deletions(-) delete mode 100644 crates/intrinsic-test/src/arm/config.rs delete mode 100644 crates/intrinsic-test/src/x86/config.rs diff --git a/crates/intrinsic-test/src/arm/config.rs b/crates/intrinsic-test/src/arm/config.rs deleted file mode 100644 index da36a5f915..0000000000 --- a/crates/intrinsic-test/src/arm/config.rs +++ /dev/null @@ -1,67 +0,0 @@ -pub const NOTICE: &str = "\ -// This is a transient test file, not intended for distribution. Some aspects of the -// test are derived from a JSON specification, published under the same license as the -// `intrinsic-test` crate.\n"; - -pub const PLATFORM_RUST_DEFINITIONS: &str = ""; - -pub const PLATFORM_RUST_CFGS: &str = r#" -#![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))] -#![cfg_attr(target_arch = "arm", feature(stdarch_aarch32_crc32))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))] -#![feature(stdarch_neon_f16)] -#![feature(stdarch_aarch64_sve)] - -#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] -use core_arch::arch::aarch64::*; - -#[cfg(target_arch = "arm")] -use core_arch::arch::arm::*; - -const fn svpattern_from_i32(value: i32) -> svpattern { - match value { - 0 => svpattern::SV_POW2, - 1 => svpattern::SV_VL1, - 2 => svpattern::SV_VL2, - 3 => svpattern::SV_VL3, - 4 => svpattern::SV_VL4, - 5 => svpattern::SV_VL5, - 6 => svpattern::SV_VL6, - 7 => svpattern::SV_VL7, - 8 => svpattern::SV_VL8, - 9 => svpattern::SV_VL16, - 10 => svpattern::SV_VL32, - 11 => svpattern::SV_VL64, - 12 => svpattern::SV_VL128, - 13 => svpattern::SV_VL256, - 29 => svpattern::SV_MUL4, - 30 => svpattern::SV_MUL3, - 31 => svpattern::SV_ALL, - _ => unreachable!(), - } -} - -const fn svprfop_from_i32(value: i32) -> svprfop { - match value { - 0 => svprfop::SV_PLDL1KEEP, - 1 => svprfop::SV_PLDL1STRM, - 2 => svprfop::SV_PLDL2KEEP, - 3 => svprfop::SV_PLDL2STRM, - 4 => svprfop::SV_PLDL3KEEP, - 5 => svprfop::SV_PLDL3STRM, - 8 => svprfop::SV_PSTL1KEEP, - 9 => svprfop::SV_PSTL1STRM, - 10 => svprfop::SV_PSTL2KEEP, - 11 => svprfop::SV_PSTL2STRM, - 12 => svprfop::SV_PSTL3KEEP, - 13 => svprfop::SV_PSTL3STRM, - _ => unreachable!(), - } -} -"#; diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 28e086c0e6..edf9e4f36f 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -1,4 +1,3 @@ -mod config; mod intrinsic; mod json_parser; mod types; @@ -22,14 +21,21 @@ impl SupportedArchitecture for Arm { &self.0 } - const NOTICE: &str = config::NOTICE; - - const PLATFORM_C_HEADERS: &[&str] = &["arm_sve.h", "arm_neon.h", "arm_acle.h", "arm_fp16.h"]; - - const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; - const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - - fn arch_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> { + const NOTICE: &str = r#" +// This is a transient test file, not intended for distribution. Some aspects of the +// test are derived from a JSON specification, published under the same license as the +// `intrinsic-test` crate. +"#; + + const C_PRELUDE: &str = r#" +#include +#include +#include +#include +"#; + const RUST_PRELUDE: &str = RUST_PRELUDE; + + fn c_compiler_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> { // GCC uses an extra `-` in the arch name let big_endian = cli_options.target.starts_with("aarch64_be"); let a32 = cli_options.target.starts_with("armv7"); @@ -145,3 +151,64 @@ let {name} = svcmpne_n_s8({PREDICATE_LOCAL}, {name}, 0); } } } + +const RUST_PRELUDE: &str = r#" +#![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))] +#![cfg_attr(target_arch = "arm", feature(stdarch_aarch32_crc32))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))] +#![feature(stdarch_neon_f16)] +#![feature(stdarch_aarch64_sve)] + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +use core_arch::arch::aarch64::*; + +#[cfg(target_arch = "arm")] +use core_arch::arch::arm::*; + +const fn svpattern_from_i32(value: i32) -> svpattern { + match value { + 0 => svpattern::SV_POW2, + 1 => svpattern::SV_VL1, + 2 => svpattern::SV_VL2, + 3 => svpattern::SV_VL3, + 4 => svpattern::SV_VL4, + 5 => svpattern::SV_VL5, + 6 => svpattern::SV_VL6, + 7 => svpattern::SV_VL7, + 8 => svpattern::SV_VL8, + 9 => svpattern::SV_VL16, + 10 => svpattern::SV_VL32, + 11 => svpattern::SV_VL64, + 12 => svpattern::SV_VL128, + 13 => svpattern::SV_VL256, + 29 => svpattern::SV_MUL4, + 30 => svpattern::SV_MUL3, + 31 => svpattern::SV_ALL, + _ => unreachable!(), + } +} + +const fn svprfop_from_i32(value: i32) -> svprfop { + match value { + 0 => svprfop::SV_PLDL1KEEP, + 1 => svprfop::SV_PLDL1STRM, + 2 => svprfop::SV_PLDL2KEEP, + 3 => svprfop::SV_PLDL2STRM, + 4 => svprfop::SV_PLDL3KEEP, + 5 => svprfop::SV_PLDL3STRM, + 8 => svprfop::SV_PSTL1KEEP, + 9 => svprfop::SV_PSTL1STRM, + 10 => svprfop::SV_PSTL2KEEP, + 11 => svprfop::SV_PSTL2STRM, + 12 => svprfop::SV_PSTL3KEEP, + 13 => svprfop::SV_PSTL3STRM, + _ => unreachable!(), + } +} +"#; diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index 2023bf9953..104226c5df 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -17,7 +17,7 @@ use super::intrinsic_helpers::TypeDefinition; pub fn write_wrapper_c( w: &mut impl std::io::Write, notice: &str, - platform_headers: &[&str], + prelude: &str, intrinsics: &[Intrinsic], ) -> std::io::Result<()> { write!( @@ -26,16 +26,10 @@ pub fn write_wrapper_c( {notice} #include #include -{platform_headers} +{prelude} {intrinsics} "#, - platform_headers = - platform_headers - .iter() - .format_with("\n", |header, fmt| fmt(&format_args!( - "#include <{header}>" - ))), intrinsics = intrinsics.iter().format_with("", |intrinsic, fmt| { fmt(&intrinsic .specializations() diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index de9bcd7e0c..44128d43b9 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -116,14 +116,11 @@ pub fn write_lib_rs( #![allow(non_camel_case_types)] #![allow(non_snake_case)] -{cfg} +{prelude} {COMMON_RUST_DEFINITIONS} - -{definitions} "#, notice = A::NOTICE, - cfg = A::PLATFORM_RUST_CFGS, - definitions = A::PLATFORM_RUST_DEFINITIONS, + prelude = A::RUST_PRELUDE, )?; let mut seen = std::collections::HashSet::new(); diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 197a9b861b..bd1bc85fbf 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -46,12 +46,10 @@ pub trait SupportedArchitecture: Sized { const NOTICE: &str; - const PLATFORM_C_HEADERS: &[&str]; + const C_PRELUDE: &str; + const RUST_PRELUDE: &str; - const PLATFORM_RUST_CFGS: &str; - const PLATFORM_RUST_DEFINITIONS: &str; - - fn arch_flags(&self, cli_options: &ProcessedCli) -> Vec<&str>; + fn c_compiler_flags(&self, cli_options: &ProcessedCli) -> Vec<&str>; fn generate_c_file(&self) { let (max_chunk_size, _chunk_count) = manual_chunk(self.intrinsics().len()); @@ -63,14 +61,14 @@ pub trait SupportedArchitecture: Sized { .map(|(i, chunk)| { let c_filename = format!("c_programs/wrapper_{i}.c"); let mut file = File::create(&c_filename).unwrap(); - write_wrapper_c(&mut file, Self::NOTICE, Self::PLATFORM_C_HEADERS, chunk) + write_wrapper_c(&mut file, Self::NOTICE, Self::C_PRELUDE, chunk) }) .collect::>() .unwrap(); } fn generate_rust_file(&self, cli_options: &ProcessedCli) { - let arch_flags = self.arch_flags(cli_options); + let arch_flags = self.c_compiler_flags(cli_options); std::fs::create_dir_all("rust_programs").unwrap(); diff --git a/crates/intrinsic-test/src/x86/config.rs b/crates/intrinsic-test/src/x86/config.rs deleted file mode 100644 index 68737ab5ac..0000000000 --- a/crates/intrinsic-test/src/x86/config.rs +++ /dev/null @@ -1,138 +0,0 @@ -pub const NOTICE: &str = "\ -// This is a transient test file, not intended for distribution. Some aspects of the -// test are derived from an XML specification, published under the same license as the -// `intrinsic-test` crate.\n"; - -pub const PLATFORM_RUST_DEFINITIONS: &str = r#" -use core_arch::arch::x86_64::*; - -#[inline] -unsafe fn _mm_loadu_ph_to___m128i(mem_addr: *const f16) -> __m128i { - _mm_castph_si128(_mm_loadu_ph(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_ph_to___m256i(mem_addr: *const f16) -> __m256i { - _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_ph_to___mm512i(mem_addr: *const f16) -> __m512i { - _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) -} - - -#[inline] -unsafe fn _mm_loadu_ps_to___m128h(mem_addr: *const f32) -> __m128h { - _mm_castps_ph(_mm_loadu_ps(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_ps_to___m256h(mem_addr: *const f32) -> __m256h { - _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_ps_to___m512h(mem_addr: *const f32) -> __m512h { - _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi16_to___m128d(mem_addr: *const i16) -> __m128d { - _mm_castsi128_pd(_mm_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi16_to___m256d(mem_addr: *const i16) -> __m256d { - _mm256_castsi256_pd(_mm256_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi16_to___m512d(mem_addr: *const i16) -> __m512d { - _mm512_castsi512_pd(_mm512_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi32_to___m128d(mem_addr: *const i32) -> __m128d { - _mm_castsi128_pd(_mm_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi32_to___m256d(mem_addr: *const i32) -> __m256d { - _mm256_castsi256_pd(_mm256_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi32_to___m512d(mem_addr: *const i32) -> __m512d { - _mm512_castsi512_pd(_mm512_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi64_to___m128d(mem_addr: *const i64) -> __m128d { - _mm_castsi128_pd(_mm_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi64_to___m256d(mem_addr: *const i64) -> __m256d { - _mm256_castsi256_pd(_mm256_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi64_to___m512d(mem_addr: *const i64) -> __m512d { - _mm512_castsi512_pd(_mm512_loadu_epi64(mem_addr)) -} - -// === -#[inline] -unsafe fn _mm_loadu_epi16_to___m128(mem_addr: *const i16) -> __m128 { - _mm_castsi128_ps(_mm_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi16_to___m256(mem_addr: *const i16) -> __m256 { - _mm256_castsi256_ps(_mm256_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi16_to___m512(mem_addr: *const i16) -> __m512 { - _mm512_castsi512_ps(_mm512_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi32_to___m128(mem_addr: *const i32) -> __m128 { - _mm_castsi128_ps(_mm_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi32_to___m256(mem_addr: *const i32) -> __m256 { - _mm256_castsi256_ps(_mm256_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi32_to___m512(mem_addr: *const i32) -> __m512 { - _mm512_castsi512_ps(_mm512_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi64_to___m128(mem_addr: *const i64) -> __m128 { - _mm_castsi128_ps(_mm_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi64_to___m256(mem_addr: *const i64) -> __m256 { - _mm256_castsi256_ps(_mm256_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 { - _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr)) -} - -"#; - -pub const PLATFORM_RUST_CFGS: &str = r#" -#![feature(stdarch_x86_avx512_bf16)] -#![feature(stdarch_x86_avx512_f16)] -#![feature(stdarch_x86_rtm)] -#![feature(x86_amx_intrinsics)] -"#; diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index 0ccb911bf2..36f4fee437 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -1,4 +1,3 @@ -mod config; mod constraint; mod intrinsic; mod types; @@ -22,14 +21,18 @@ impl SupportedArchitecture for X86 { &self.intrinsics } - const NOTICE: &str = config::NOTICE; + const NOTICE: &str = r#" +// This is a transient test file, not intended for distribution. Some aspects of the +// test are derived from an XML specification, published under the same license as the +// `intrinsic-test` crate. +"#; - const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h"]; + const C_PRELUDE: &str = r#" +#include +"#; + const RUST_PRELUDE: &str = RUST_PRELUDE; - const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; - const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - - fn arch_flags(&self, _cli_options: &ProcessedCli) -> Vec<&str> { + fn c_compiler_flags(&self, _cli_options: &ProcessedCli) -> Vec<&str> { vec![ "-maes", "-mf16c", @@ -101,3 +104,134 @@ impl SupportedArchitecture for X86 { unimplemented!("no scalable vectors on x86") } } + +const RUST_PRELUDE: &str = r#" +#![feature(stdarch_x86_avx512_bf16)] +#![feature(stdarch_x86_avx512_f16)] +#![feature(stdarch_x86_rtm)] +#![feature(x86_amx_intrinsics)] + +use core_arch::arch::x86_64::*; + +#[inline] +unsafe fn _mm_loadu_ph_to___m128i(mem_addr: *const f16) -> __m128i { + _mm_castph_si128(_mm_loadu_ph(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_ph_to___m256i(mem_addr: *const f16) -> __m256i { + _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_ph_to___mm512i(mem_addr: *const f16) -> __m512i { + _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) +} + + +#[inline] +unsafe fn _mm_loadu_ps_to___m128h(mem_addr: *const f32) -> __m128h { + _mm_castps_ph(_mm_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_ps_to___m256h(mem_addr: *const f32) -> __m256h { + _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_ps_to___m512h(mem_addr: *const f32) -> __m512h { + _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi16_to___m128d(mem_addr: *const i16) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi16_to___m256d(mem_addr: *const i16) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi16_to___m512d(mem_addr: *const i16) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi32_to___m128d(mem_addr: *const i32) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi32_to___m256d(mem_addr: *const i32) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi32_to___m512d(mem_addr: *const i32) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi64_to___m128d(mem_addr: *const i64) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi64_to___m256d(mem_addr: *const i64) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi64_to___m512d(mem_addr: *const i64) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi64(mem_addr)) +} + +// === +#[inline] +unsafe fn _mm_loadu_epi16_to___m128(mem_addr: *const i16) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi16_to___m256(mem_addr: *const i16) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi16_to___m512(mem_addr: *const i16) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi32_to___m128(mem_addr: *const i32) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi32_to___m256(mem_addr: *const i32) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi32_to___m512(mem_addr: *const i32) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi64_to___m128(mem_addr: *const i64) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi64_to___m256(mem_addr: *const i64) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr)) +} +"#; From 26b58e623676aa672e271853a6746c17d07fac5f Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 11 Jun 2026 10:07:47 +0000 Subject: [PATCH 23/32] intrinsic-test: enable non-baseline target features SVE isn't a baseline target feature for `aarch64-unknown-linux-gnu` but should be enabled when running tests. --- ci/intrinsic-test.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index 0441611f38..253989dc6d 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -39,21 +39,25 @@ case ${1} in aarch64_be*) export CFLAGS="-I${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc/usr/include --sysroot={AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc -Wno-nonportable-vector-initialization" ARCH=aarch64_be + RUNTIME_RUSTFLAGS= ;; aarch64*) export CFLAGS="-I/usr/aarch64-linux-gnu/include/" ARCH=aarch64 + RUNTIME_RUSTFLAGS=-Ctarget-feature=+sve,+sve2 ;; armv7*) export CFLAGS="-I/usr/arm-linux-gnueabihf/include/" ARCH=arm + RUNTIME_RUSTFLAGS= ;; x86_64*) export CFLAGS="-I/usr/include/x86_64-linux-gnu/" ARCH=x86 + RUNTIME_RUSTFLAGS= ;; *) ;; @@ -80,4 +84,5 @@ case "${1}" in ;; esac -cargo test --manifest-path=rust_programs/Cargo.toml --target "${1}" --profile "${PROFILE}" --tests +RUSTFLAGS="${RUNTIME_RUSTFLAGS}" cargo test --manifest-path=rust_programs/Cargo.toml \ + --target "${1}" --profile "${PROFILE}" --tests From dbfab632f642c41e16ab4bb908d0c6b9788d0feb Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 11 Jun 2026 10:23:23 +0000 Subject: [PATCH 24/32] intrinsic-test: skip SVE intrinsics on big endian SVE intrinsics aren't available on big endian --- crates/intrinsic-test/src/arm/json_parser.rs | 3 ++- crates/intrinsic-test/src/arm/mod.rs | 3 +++ crates/intrinsic-test/src/common/intrinsic.rs | 3 +++ crates/intrinsic-test/src/x86/xml_parser.rs | 1 + 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/intrinsic-test/src/arm/json_parser.rs b/crates/intrinsic-test/src/arm/json_parser.rs index bb01c55390..013c20f2db 100644 --- a/crates/intrinsic-test/src/arm/json_parser.rs +++ b/crates/intrinsic-test/src/arm/json_parser.rs @@ -46,7 +46,7 @@ impl TryFrom for ArgPrep { #[derive(Deserialize, Debug)] struct JsonIntrinsic { #[serde(rename = "SIMD_ISA")] - _simd_isa: String, + simd_isa: String, name: String, arguments: Vec, return_type: ReturnType, @@ -140,6 +140,7 @@ fn json_to_intrinsic( arguments, results: result_ty, arch_tags: intr.architectures, + extension: intr.simd_isa, }) } diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index edf9e4f36f..916798ef98 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -54,6 +54,7 @@ impl SupportedArchitecture for Arm { } fn create(cli_options: &ProcessedCli) -> Self { + let big_endian = cli_options.target.starts_with("aarch64_be"); let a32 = cli_options.target.starts_with("armv7"); let mut intrinsics = get_intrinsics(&cli_options.filename).expect("Error parsing input file"); @@ -116,6 +117,8 @@ impl SupportedArchitecture for Arm { .filter(|i| !cli_options.skip.contains(&i.name)) // Skip A64-specific intrinsics on A32 .filter(|i| !(a32 && i.arch_tags == vec!["A64".to_string()])) + // Skip SVE intrinsics on big endian + .filter(|i| !(big_endian && (i.extension == "SVE" || i.extension == "SVE2"))) .take(sample_size) .collect::>(); diff --git a/crates/intrinsic-test/src/common/intrinsic.rs b/crates/intrinsic-test/src/common/intrinsic.rs index 3c0d5dbb65..d5d903d941 100644 --- a/crates/intrinsic-test/src/common/intrinsic.rs +++ b/crates/intrinsic-test/src/common/intrinsic.rs @@ -16,6 +16,9 @@ pub struct Intrinsic { /// Any architecture-specific tags. pub arch_tags: Vec, + + /// Specific extension that the intrinsic is from + pub extension: String, } impl Intrinsic { diff --git a/crates/intrinsic-test/src/x86/xml_parser.rs b/crates/intrinsic-test/src/x86/xml_parser.rs index f84f1f4b9c..2296dffb64 100644 --- a/crates/intrinsic-test/src/x86/xml_parser.rs +++ b/crates/intrinsic-test/src/x86/xml_parser.rs @@ -144,5 +144,6 @@ fn xml_to_intrinsic(intr: XMLIntrinsic) -> Result, Box Date: Thu, 11 Jun 2026 13:31:11 +0000 Subject: [PATCH 25/32] intrinsic-test: SVE float comparison Like with non-SVE test generation, comparison of float results in scalable vectors need special-handling of comparisons. --- crates/intrinsic-test/src/arm/types.rs | 108 +++++++++++++++---------- 1 file changed, 65 insertions(+), 43 deletions(-) diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index 3453ac4c95..2e628aff92 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -110,55 +110,77 @@ impl TypeDefinition for ArmType { } fn comparison_function(&self) -> String { - match self.num_lanes() { - SimdLen::Scalable => { - // There isn't a `svcmpeq` for `svbool_t`, so do an XOR instead and test it is - // empty.. - if self.kind() == TypeKind::Bool { - return format!( - r#" -let eq = sveor_b_z({PREDICATE_LOCAL}, __rust_return_value, __c_return_value); -assert!(!svptest_any({PREDICATE_LOCAL}, eq), "{{}}", id); - "#, - ); - } + if let SimdLen::Fixed(num_lanes) = self.num_lanes() { + return default_fixed_vector_comparison(self, num_lanes); + } + + if self.kind() == TypeKind::Bool { + // There isn't a `svcmpeq` for `svbool_t` and there aren't `svboolxN_t` types, so just + // do an XOR and test it is empty. + return format!( + r#" +let __eq = sveor_b_z({PREDICATE_LOCAL}, __rust_return_value, __c_return_value); +assert!(!svptest_any({PREDICATE_LOCAL}, __eq), "{{}}", id); + "# + ); + } - // Use `svcmpeq` to compare the return values of Rust and C invocations - match self.num_vectors() { - 1 => { - format!( + // Returns `of` when `num_vectors == 1` otherwise returns the appropriate `svget` invocation + // for `of`. + let get = |num_vectors: u32, idx: u32, from: &'static str| -> String { + if num_vectors == 1 { + return from.to_string(); + } + + format!( + "svget{num_vectors}_{ty}{bl}::<{idx}>({from})", + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), + ) + }; + + let n = self.num_vectors(); + (0..n) + .format_with("\n", |i, fmt| { + match self.kind() { + TypeKind::Float | TypeKind::BFloat => { + // Floats need special handling because `NaN != NaN` normally - this + // effectively does `(rust == c) || (isnan(rust) && isnan(c))` + fmt(&format_args!( r#" -let eq = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, __rust_return_value, __c_return_value); -assert!(svptest_any(__pred, eq), "{{}}", id); - "#, +let __rust_eq_return_value = {rust_return_value}; +let __c_eq_return_value = {c_return_value}; +let __eq_sans_nan = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, __rust_eq_return_value, __c_eq_return_value); +let __rust_nan = svcmpuo_{ty}{bl}({PREDICATE_LOCAL}, __rust_eq_return_value, __rust_eq_return_value); +let __c_nan = svcmpuo_{ty}{bl}({PREDICATE_LOCAL}, __c_eq_return_value, __c_eq_return_value); +let __both_nan = svand_b_z({PREDICATE_LOCAL}, __rust_nan, __c_nan); +let __eq = svorr_b_z({PREDICATE_LOCAL}, __eq_sans_nan, __both_nan); +assert!(svptest_any(__pred, __eq), "{{}}-{i_plus_one}/{n}", id); +"#, ty = self.rust_intrinsic_name_prefix(), bl = self.inner_size(), - ) + rust_return_value = get(n, i, "__rust_return_value"), + c_return_value = get(n, i, "__c_return_value"), + i_plus_one = i + 1, // so that the output is "1/2" and "2/2" + )) + } + _ => { + // Most types can just use `svcmpeq` + fmt(&format_args!( + r#" +let __eq = svcmpeq_{ty}{bl}({PREDICATE_LOCAL}, {rust_return_value}, {c_return_value}); +assert!(svptest_any(__pred, __eq), "{{}}-{i_plus_one}/{n}", id); +"#, + ty = self.rust_intrinsic_name_prefix(), + bl = self.inner_size(), + rust_return_value = get(n, i, "__rust_return_value"), + c_return_value = get(n, i, "__c_return_value"), + i_plus_one = i + 1, // so that the output is "1/2" and "2/2" + )) } - // For tuples of vectors, do multiple comparisons, each with a `svget` to - // extract the Nth vector. - n @ (2 | 3 | 4) => (0..n) - .format_with("\n", |i, fmt| { - fmt(&format_args!( - r#" -let eq = svcmpeq_{ty}{bl}( - {PREDICATE_LOCAL}, - svget{n}_{ty}{bl}::<{i}>(__rust_return_value), - svget{n}_{ty}{bl}::<{i}>(__c_return_value) -); -assert!(svptest_any(__pred, eq), "{{}}-{i_plus_one}/{n}", id); - "#, - ty = self.rust_intrinsic_name_prefix(), - bl = self.inner_size(), - i_plus_one = i + 1, // so that the output is "1/2" and "2/2" - )) - }) - .to_string(), - _ => unreachable!(), } - } - SimdLen::Fixed(num_lanes) => default_fixed_vector_comparison(self, num_lanes), - } + }) + .to_string() } } From 3fb44133b9a1c98fc5f42ab0064a22b8479c8a77 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 15 Jun 2026 10:20:39 +0000 Subject: [PATCH 26/32] intrinsic-test: do not test `svundef*` The output of these cannot be compared. --- crates/intrinsic-test/src/arm/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 916798ef98..efe1489d4c 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -108,6 +108,10 @@ impl SupportedArchitecture for Arm { .filter(|i| !i.name.starts_with("svrevd")) // Skip `svpsel_lane_b*` intrinsics - not yet implemented! .filter(|i| !i.name.starts_with("svpsel_lane_b")) + // Skip `svundef*` intrinsics - to avoid undefined behaviour in Rust, these return + // zeroed vectors in Rust, which are inherently going to be different than the + // undefined vectors returned by the C intrinsics. + .filter(|i| !i.name.starts_with("svundef")) // Skip pointers for now, we would probably need to look at the return // type to work out how many elements we need to point to. .filter(|i| !i.arguments.iter().any(|a| a.is_ptr())) From 6e63db8130c0716616a3b8646cbd6362bfe7df88 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 15 Jun 2026 10:20:39 +0000 Subject: [PATCH 27/32] intrinsic-test: fwd args in `intrinsic-test.sh` Forward addl. arguments to `intrinsic-test.sh` to `cargo test` so that `--no-fail-fast` or a specific test name can be passed. --- ci/intrinsic-test.sh | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index 253989dc6d..49846efc8d 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -1,13 +1,19 @@ #!/usr/bin/env sh -set -ex - if [ $# -lt 2 ]; then - >&2 echo "Usage: $0 " + >&2 echo "Usage: $0 <..args for \`cargo test\`..>" exit 1 fi -case ${2} in +set -ex + +# Pop both arguments and leave "$@" as containing args to be forwarded to `cargo test` +TARGET="$1" +shift +CC_KIND="$1" +shift + +case ${CC_KIND} in clang) export CC="${CLANG_PATH}" CC_ARG_STYLE=clang @@ -35,7 +41,7 @@ echo "PROFILE=${PROFILE}" INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml" -case ${1} in +case ${TARGET} in aarch64_be*) export CFLAGS="-I${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc/usr/include --sysroot={AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc -Wno-nonportable-vector-initialization" ARCH=aarch64_be @@ -64,25 +70,25 @@ case ${1} in esac -case "${1}" in +case "${TARGET}" in x86_64-unknown-linux-gnu*) env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \ cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/x86-intel.xml \ --skip "crates/intrinsic-test/missing_${ARCH}_common.txt" \ - --skip "crates/intrinsic-test/missing_${ARCH}_${2}.txt" \ - --target "${1}" \ + --skip "crates/intrinsic-test/missing_${ARCH}_${CC_KIND}.txt" \ + --target "${TARGET}" \ --cc-arg-style "${CC_ARG_STYLE}" ;; *) cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ --skip "crates/intrinsic-test/missing_${ARCH}_common.txt" \ - --skip "crates/intrinsic-test/missing_${ARCH}_${2}.txt" \ - --target "${1}" \ + --skip "crates/intrinsic-test/missing_${ARCH}_${CC_KIND}.txt" \ + --target "${TARGET}" \ --cc-arg-style "${CC_ARG_STYLE}" ;; esac RUSTFLAGS="${RUNTIME_RUSTFLAGS}" cargo test --manifest-path=rust_programs/Cargo.toml \ - --target "${1}" --profile "${PROFILE}" --tests + --target "${TARGET}" --profile "${PROFILE}" --tests "$@" From badf92c5b59cd71512829b62410c843ef4b08bcc Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 15 Jun 2026 10:20:39 +0000 Subject: [PATCH 28/32] core_arch: redefine `svrev_b{16,32,64}` Clang uses the `llvm.aarch64.sve.rev.bN` intrinsic for `svrev` with `b16`, `b32` and `b64`. This required small generator changes so it knew a bool-to-bool conversion was a no-op and a new blanket identity impl of `SveInto` so the calls generated compile. --- crates/core_arch/src/aarch64/sve/generated.rs | 44 +++++++++---------- crates/core_arch/src/aarch64/sve/mod.rs | 8 ++++ .../stdarch-gen-arm/spec/sve/aarch64.spec.yml | 9 ++-- crates/stdarch-gen-arm/src/intrinsic.rs | 1 + 4 files changed, 37 insertions(+), 25 deletions(-) diff --git a/crates/core_arch/src/aarch64/sve/generated.rs b/crates/core_arch/src/aarch64/sve/generated.rs index 116adcf746..42d2d1c5b8 100644 --- a/crates/core_arch/src/aarch64/sve/generated.rs +++ b/crates/core_arch/src/aarch64/sve/generated.rs @@ -35226,19 +35226,6 @@ pub fn svreinterpret_u64_u64(op: svuint64_t) -> svuint64_t { unsafe { crate::intrinsics::transmute_unchecked(op) } } #[doc = "Reverse all elements"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(rev))] -pub fn svrev_b8(op: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv16i1")] - fn _svrev_b8(op: svbool_t) -> svbool_t; - } - unsafe { _svrev_b8(op) } -} -#[doc = "Reverse all elements"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -35246,10 +35233,10 @@ pub fn svrev_b8(op: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_b16(op: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv8i1")] - fn _svrev_b16(op: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.b16")] + fn _svrev_b16(op: svbool_t) -> svbool_t; } - unsafe { _svrev_b16(op.sve_into()).sve_into() } + unsafe { _svrev_b16(op.sve_into()) } } #[doc = "Reverse all elements"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev_b32)"] @@ -35259,10 +35246,10 @@ pub fn svrev_b16(op: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_b32(op: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4i1")] - fn _svrev_b32(op: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.b32")] + fn _svrev_b32(op: svbool_t) -> svbool_t; } - unsafe { _svrev_b32(op.sve_into()).sve_into() } + unsafe { _svrev_b32(op.sve_into()) } } #[doc = "Reverse all elements"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev_b64)"] @@ -35272,10 +35259,10 @@ pub fn svrev_b32(op: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(rev))] pub fn svrev_b64(op: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2i1")] - fn _svrev_b64(op: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.b64")] + fn _svrev_b64(op: svbool_t) -> svbool_t; } - unsafe { _svrev_b64(op.sve_into()).sve_into() } + unsafe { _svrev_b64(op.sve_into()) } } #[doc = "Reverse all elements"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_f32])"] @@ -35391,6 +35378,19 @@ pub fn svrev_u32(op: svuint32_t) -> svuint32_t { pub fn svrev_u64(op: svuint64_t) -> svuint64_t { unsafe { svrev_s64(op.as_signed()).as_unsigned() } } +#[doc = "Reverse all elements"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(rev))] +pub fn svrev_b8(op: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv16i1")] + fn _svrev_b8(op: svbool_t) -> svbool_t; + } + unsafe { _svrev_b8(op) } +} #[doc = "Reverse bytes within elements"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_s16]_m)"] #[inline] diff --git a/crates/core_arch/src/aarch64/sve/mod.rs b/crates/core_arch/src/aarch64/sve/mod.rs index c6d5d57aca..f11ca660b1 100644 --- a/crates/core_arch/src/aarch64/sve/mod.rs +++ b/crates/core_arch/src/aarch64/sve/mod.rs @@ -28,6 +28,14 @@ pub(super) trait SveInto: Sized { unsafe fn sve_into(self) -> T; } +impl SveInto for T { + #[inline] + #[target_feature(enable = "sve")] + unsafe fn sve_into(self) -> T { + self + } +} + macro_rules! impl_sve_type { ($(($v:vis, $elem_type:ty, $name:ident, $elt:literal))*) => ($( #[doc = concat!("Scalable vector of type ", stringify!($elem_type))] diff --git a/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml index 1f65732412..057491d31c 100644 --- a/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml @@ -1207,7 +1207,7 @@ intrinsics: doc: Reverse all elements arguments: ["op: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [rev] compose: - LLVMLink: { name: "llvm.vector.reverse.{sve_type}" } @@ -1217,10 +1217,13 @@ intrinsics: doc: Reverse all elements arguments: ["op: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [rev] compose: - - LLVMLink: { name: "llvm.vector.reverse.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.rev.b{size}" + arguments: ["op: svbool_t"] + return_type: "svbool_t" - name: svrevb[_{type}]{_mxz} attr: [*sve-unstable] diff --git a/crates/stdarch-gen-arm/src/intrinsic.rs b/crates/stdarch-gen-arm/src/intrinsic.rs index 72fb97fee1..f96f05dfec 100644 --- a/crates/stdarch-gen-arm/src/intrinsic.rs +++ b/crates/stdarch-gen-arm/src/intrinsic.rs @@ -1604,6 +1604,7 @@ impl Intrinsic { (Some(BaseTypeKind::Float), Some(BaseTypeKind::Float)) => ex, (Some(BaseTypeKind::UInt), Some(BaseTypeKind::UInt)) => ex, (Some(BaseTypeKind::Poly), Some(BaseTypeKind::Poly)) => ex, + (Some(BaseTypeKind::Bool), Some(BaseTypeKind::Bool)) => ex, (None, None) => ex, _ => unreachable!( From 195ccbe1305d2fd7f33d5661ebb20cd398ce93cb Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 15 Jun 2026 10:20:39 +0000 Subject: [PATCH 29/32] core_arch: redefine `sv{zip,uzp}_b{16,32,64}` Clang uses the `llvm.aarch64.sve.zip.bN` intrinsic for `svzip` with `b16`, `b32` and `b64` and the `llvm.aarch64.sve.uzp.bN` intrinsic for `svuzp` with the same types. --- crates/core_arch/src/aarch64/sve/generated.rs | 176 +++++++++--------- .../stdarch-gen-arm/spec/sve/aarch64.spec.yml | 36 ++-- 2 files changed, 112 insertions(+), 100 deletions(-) diff --git a/crates/core_arch/src/aarch64/sve/generated.rs b/crates/core_arch/src/aarch64/sve/generated.rs index 42d2d1c5b8..ac3070918a 100644 --- a/crates/core_arch/src/aarch64/sve/generated.rs +++ b/crates/core_arch/src/aarch64/sve/generated.rs @@ -43336,19 +43336,6 @@ pub fn svusmmla_s32(op1: svint32_t, op2: svuint8_t, op3: svint8_t) -> svint32_t unsafe { _svusmmla_s32(op1, op2.as_signed(), op3) } } #[doc = "Concatenate even elements from two inputs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(uzp1))] -pub fn svuzp1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv16i1")] - fn _svuzp1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; - } - unsafe { _svuzp1_b8(op1, op2) } -} -#[doc = "Concatenate even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -43356,10 +43343,10 @@ pub fn svuzp1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(uzp1))] pub fn svuzp1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv8i1")] - fn _svuzp1_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.b16")] + fn _svuzp1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svuzp1_b16(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svuzp1_b16(op1.sve_into(), op2.sve_into()) } } #[doc = "Concatenate even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1_b32)"] @@ -43369,10 +43356,10 @@ pub fn svuzp1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(uzp1))] pub fn svuzp1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv4i1")] - fn _svuzp1_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.b32")] + fn _svuzp1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svuzp1_b32(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svuzp1_b32(op1.sve_into(), op2.sve_into()) } } #[doc = "Concatenate even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1_b64)"] @@ -43382,10 +43369,10 @@ pub fn svuzp1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(uzp1))] pub fn svuzp1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv2i1")] - fn _svuzp1_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.b64")] + fn _svuzp1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svuzp1_b64(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svuzp1_b64(op1.sve_into(), op2.sve_into()) } } #[doc = "Concatenate even elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_f32])"] @@ -43501,6 +43488,19 @@ pub fn svuzp1_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t { pub fn svuzp1_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svuzp1_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } +#[doc = "Concatenate even elements from two inputs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(uzp1))] +pub fn svuzp1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv16i1")] + fn _svuzp1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; + } + unsafe { _svuzp1_b8(op1, op2) } +} #[doc = "Concatenate even quadwords from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1q[_f32])"] #[inline] @@ -43616,19 +43616,6 @@ pub fn svuzp1q_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svuzp1q_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } #[doc = "Concatenate odd elements from two inputs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(uzp2))] -pub fn svuzp2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv16i1")] - fn _svuzp2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; - } - unsafe { _svuzp2_b8(op1, op2) } -} -#[doc = "Concatenate odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -43636,10 +43623,10 @@ pub fn svuzp2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(uzp2))] pub fn svuzp2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv8i1")] - fn _svuzp2_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.b16")] + fn _svuzp2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svuzp2_b16(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svuzp2_b16(op1.sve_into(), op2.sve_into()) } } #[doc = "Concatenate odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2_b32)"] @@ -43649,10 +43636,10 @@ pub fn svuzp2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(uzp2))] pub fn svuzp2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv4i1")] - fn _svuzp2_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.b32")] + fn _svuzp2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svuzp2_b32(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svuzp2_b32(op1.sve_into(), op2.sve_into()) } } #[doc = "Concatenate odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2_b64)"] @@ -43662,10 +43649,10 @@ pub fn svuzp2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(uzp2))] pub fn svuzp2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv2i1")] - fn _svuzp2_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.b64")] + fn _svuzp2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svuzp2_b64(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svuzp2_b64(op1.sve_into(), op2.sve_into()) } } #[doc = "Concatenate odd elements from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_f32])"] @@ -43781,6 +43768,19 @@ pub fn svuzp2_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t { pub fn svuzp2_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svuzp2_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } +#[doc = "Concatenate odd elements from two inputs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(uzp2))] +pub fn svuzp2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv16i1")] + fn _svuzp2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; + } + unsafe { _svuzp2_b8(op1, op2) } +} #[doc = "Concatenate odd quadwords from two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2q[_f32])"] #[inline] @@ -44421,19 +44421,6 @@ pub fn svwrffr(op: svbool_t) { unsafe { _svwrffr(op) } } #[doc = "Interleave elements from low halves of two inputs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(zip1))] -pub fn svzip1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv16i1")] - fn _svzip1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; - } - unsafe { _svzip1_b8(op1, op2) } -} -#[doc = "Interleave elements from low halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -44441,10 +44428,10 @@ pub fn svzip1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(zip1))] pub fn svzip1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv8i1")] - fn _svzip1_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.b16")] + fn _svzip1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svzip1_b16(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svzip1_b16(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave elements from low halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1_b32)"] @@ -44454,10 +44441,10 @@ pub fn svzip1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(zip1))] pub fn svzip1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv4i1")] - fn _svzip1_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.b32")] + fn _svzip1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svzip1_b32(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svzip1_b32(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave elements from low halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1_b64)"] @@ -44467,10 +44454,10 @@ pub fn svzip1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(zip1))] pub fn svzip1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv2i1")] - fn _svzip1_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.b64")] + fn _svzip1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svzip1_b64(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svzip1_b64(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave elements from low halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_f32])"] @@ -44586,6 +44573,19 @@ pub fn svzip1_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t { pub fn svzip1_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svzip1_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } +#[doc = "Interleave elements from low halves of two inputs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(zip1))] +pub fn svzip1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv16i1")] + fn _svzip1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; + } + unsafe { _svzip1_b8(op1, op2) } +} #[doc = "Interleave quadwords from low halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1q[_f32])"] #[inline] @@ -44701,19 +44701,6 @@ pub fn svzip1q_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svzip1q_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } #[doc = "Interleave elements from high halves of two inputs"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2_b8)"] -#[inline] -#[target_feature(enable = "sve")] -#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] -#[cfg_attr(test, assert_instr(zip2))] -pub fn svzip2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv16i1")] - fn _svzip2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; - } - unsafe { _svzip2_b8(op1, op2) } -} -#[doc = "Interleave elements from high halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2_b16)"] #[inline] #[target_feature(enable = "sve")] @@ -44721,10 +44708,10 @@ pub fn svzip2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(zip2))] pub fn svzip2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv8i1")] - fn _svzip2_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.b16")] + fn _svzip2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svzip2_b16(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svzip2_b16(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave elements from high halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2_b32)"] @@ -44734,10 +44721,10 @@ pub fn svzip2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(zip2))] pub fn svzip2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv4i1")] - fn _svzip2_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.b32")] + fn _svzip2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svzip2_b32(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svzip2_b32(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave elements from high halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2_b64)"] @@ -44747,10 +44734,10 @@ pub fn svzip2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t { #[cfg_attr(test, assert_instr(zip2))] pub fn svzip2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t { unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv2i1")] - fn _svzip2_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t; + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.b64")] + fn _svzip2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t; } - unsafe { _svzip2_b64(op1.sve_into(), op2.sve_into()).sve_into() } + unsafe { _svzip2_b64(op1.sve_into(), op2.sve_into()) } } #[doc = "Interleave elements from high halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_f32])"] @@ -44866,6 +44853,19 @@ pub fn svzip2_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t { pub fn svzip2_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t { unsafe { svzip2_s64(op1.as_signed(), op2.as_signed()).as_unsigned() } } +#[doc = "Interleave elements from high halves of two inputs"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_b8])"] +#[inline] +#[target_feature(enable = "sve")] +#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")] +#[cfg_attr(test, assert_instr(zip2))] +pub fn svzip2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv16i1")] + fn _svzip2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t; + } + unsafe { _svzip2_b8(op1, op2) } +} #[doc = "Interleave quadwords from high halves of two inputs"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2q[_f32])"] #[inline] diff --git a/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml index 057491d31c..138d5ba311 100644 --- a/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml @@ -1021,7 +1021,7 @@ intrinsics: doc: Interleave elements from low halves of two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [zip1] compose: - LLVMLink: { name: "zip1.{sve_type}" } @@ -1031,10 +1031,13 @@ intrinsics: doc: Interleave elements from low halves of two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [zip1] compose: - - LLVMLink: { name: "zip1.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.zip1.b{size}" + arguments: ["op1: svbool_t", "op2: svbool_t"] + return_type: "svbool_t" - name: svzip1q[_{type}] attr: [*sve-unstable] @@ -1052,7 +1055,7 @@ intrinsics: doc: Interleave elements from high halves of two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [zip2] compose: - LLVMLink: { name: "zip2.{sve_type}" } @@ -1062,10 +1065,13 @@ intrinsics: doc: Interleave elements from high halves of two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [zip2] compose: - - LLVMLink: { name: "zip2.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.zip2.b{size}" + arguments: ["op1: svbool_t", "op2: svbool_t"] + return_type: "svbool_t" - name: svzip2q[_{type}] attr: [*sve-unstable] @@ -1083,7 +1089,7 @@ intrinsics: doc: Concatenate even elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [uzp1] compose: - LLVMLink: { name: "uzp1.{sve_type}" } @@ -1093,10 +1099,13 @@ intrinsics: doc: Concatenate even elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [uzp1] compose: - - LLVMLink: { name: "uzp1.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.uzp1.b{size}" + arguments: ["op1: svbool_t", "op2: svbool_t"] + return_type: "svbool_t" - name: svuzp1q[_{type}] attr: [*sve-unstable] @@ -1114,7 +1123,7 @@ intrinsics: doc: Concatenate odd elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] + types: [b8, f32, f64, i8, i16, i32, i64, u8, u16, u32, u64] assert_instr: [uzp2] compose: - LLVMLink: { name: "uzp2.{sve_type}" } @@ -1124,10 +1133,13 @@ intrinsics: doc: Concatenate odd elements from two inputs arguments: ["op1: {sve_type}", "op2: {sve_type}"] return_type: "{sve_type}" - types: [b8, b16, b32, b64] + types: [b16, b32, b64] assert_instr: [uzp2] compose: - - LLVMLink: { name: "uzp2.{sve_type}" } + - LLVMLink: + name: "llvm.aarch64.sve.uzp2.b{size}" + arguments: ["op1: svbool_t", "op2: svbool_t"] + return_type: "svbool_t" - name: svuzp2q[_{type}] attr: [*sve-unstable] From 38c08c35166fcd727299d64db599c5b937ba99b5 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 15 Jun 2026 10:20:39 +0000 Subject: [PATCH 30/32] intrinsic-test: skip `sveorv*` (llvm-project#203921) `sveorv` intrinsics trigger a miscompile in LLVM where the call to the Rust intrinsic is optimised out and replaced with a zero, which is incorrect. --- crates/intrinsic-test/src/arm/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index efe1489d4c..19c6b418f6 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -112,6 +112,10 @@ impl SupportedArchitecture for Arm { // zeroed vectors in Rust, which are inherently going to be different than the // undefined vectors returned by the C intrinsics. .filter(|i| !i.name.starts_with("svundef")) + // Skip `sveorv` intrinsics - the code produced by `intrinsic-test` for these + // miscompiles and the Rust intrinsic call gets replaced by a constant zero (see + // llvm/llvm-project#203921). + .filter(|i| !i.name.starts_with("sveorv")) // Skip pointers for now, we would probably need to look at the return // type to work out how many elements we need to point to. .filter(|i| !i.arguments.iter().any(|a| a.is_ptr())) From 0a396d635da6e7a7cc04af4a8d3b7c7b72822b2a Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 15 Jun 2026 10:20:39 +0000 Subject: [PATCH 31/32] intrinsic-test: skip `svld*_gather_*` These tests require that we generate test arrays with values that are valid when cast to a pointer, which we don't currently support. --- crates/intrinsic-test/src/arm/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 19c6b418f6..484ca5b9b5 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -116,6 +116,9 @@ impl SupportedArchitecture for Arm { // miscompiles and the Rust intrinsic call gets replaced by a constant zero (see // llvm/llvm-project#203921). .filter(|i| !i.name.starts_with("sveorv")) + // These load intrinsics expect each element in the scalable vector `bases` argument to + // be able to be cast to a pointer, which we don't support generating tests for yet. + .filter(|i| !(i.name.starts_with("svld") && i.name.contains("_gather_"))) // Skip pointers for now, we would probably need to look at the return // type to work out how many elements we need to point to. .filter(|i| !i.arguments.iter().any(|a| a.is_ptr())) From 7bbab4cc960400178032aa30a5b7a1353a9f0580 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 15 Jun 2026 10:20:39 +0000 Subject: [PATCH 32/32] intrinsic-test: `__ARM_FEATURE_SVE` ifdef Only include the `arm_sve.h` header if SVE is available - avoiding the include on 32-bit or big-endian, etc. --- crates/intrinsic-test/src/arm/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 484ca5b9b5..48fa88185e 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -31,7 +31,9 @@ impl SupportedArchitecture for Arm { #include #include #include +#ifdef __ARM_FEATURE_SVE #include +#endif "#; const RUST_PRELUDE: &str = RUST_PRELUDE;