From 474b3899625aeba6c5249e5500d21d38a6f1ffc4 Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Wed, 17 Jun 2026 14:14:17 -0400 Subject: [PATCH 1/2] powerpc/vsx: implement vec_sld for vector_double --- crates/core_arch/src/powerpc/altivec.rs | 26 ++++--------------------- crates/core_arch/src/powerpc/macros.rs | 19 ++++++++++++++++++ crates/core_arch/src/powerpc/vsx.rs | 25 ++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 22 deletions(-) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index ccfed88a93..27ce8a5c26 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -411,7 +411,7 @@ unsafe extern "unadjusted" { } #[macro_use] -mod sealed { +pub(crate) mod sealed { use super::*; #[unstable(feature = "stdarch_powerpc", issue = "111145")] @@ -3030,7 +3030,7 @@ mod sealed { #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vsldoi, UIMM4 = 1))] - unsafe fn vsldoi( + pub(crate) unsafe fn vsldoi( a: vector_unsigned_char, b: vector_unsigned_char, ) -> vector_unsigned_char { @@ -3082,9 +3082,9 @@ mod sealed { // TODO: collapse the two once generic_const_exprs are usable. #[inline] - #[target_feature(enable = "altivec")] + #[target_feature(enable = "vsx")] #[cfg_attr(test, assert_instr(xxsldwi, UIMM2 = 1))] - unsafe fn xxsldwi( + pub(crate) unsafe fn xxsldwi( a: vector_unsigned_char, b: vector_unsigned_char, ) -> vector_unsigned_char { @@ -3134,24 +3134,6 @@ mod sealed { } } - macro_rules! impl_vec_sld { - ($($ty:ident),+) => { $( - #[unstable(feature = "stdarch_powerpc", issue = "111145")] - impl VectorSld for $ty { - #[inline] - #[target_feature(enable = "altivec")] - unsafe fn vec_sld(self, b: Self) -> Self { - transmute(vsldoi::(transmute(self), transmute(b))) - } - #[inline] - #[target_feature(enable = "altivec")] - unsafe fn vec_sldw(self, b: Self) -> Self { - transmute(xxsldwi::(transmute(self), transmute(b))) - } - } - )+ }; - } - impl_vec_sld! { vector_bool_char, vector_signed_char, vector_unsigned_char } impl_vec_sld! { vector_bool_short, vector_signed_short, vector_unsigned_short } impl_vec_sld! { vector_bool_int, vector_signed_int, vector_unsigned_int } diff --git a/crates/core_arch/src/powerpc/macros.rs b/crates/core_arch/src/powerpc/macros.rs index f697d4d257..f60142a718 100644 --- a/crates/core_arch/src/powerpc/macros.rs +++ b/crates/core_arch/src/powerpc/macros.rs @@ -274,6 +274,25 @@ macro_rules! t_b { }; } +macro_rules! impl_vec_sld { + ($($ty:ident),+) => { $( + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorSld for $ty { + #[inline] + #[target_feature(enable = "altivec")] + unsafe fn vec_sld(self, b: Self) -> Self { + transmute(vsldoi::(transmute(self), transmute(b))) + } + #[inline] + #[target_feature(enable = "vsx")] + unsafe fn vec_sldw(self, b: Self) -> Self { + transmute(xxsldwi::(transmute(self), transmute(b))) + } + } + )+ }; +} + +pub(crate) use impl_vec_sld; pub(crate) use impl_vec_trait; pub(crate) use s_t_l; pub(crate) use t_b; diff --git a/crates/core_arch/src/powerpc/vsx.rs b/crates/core_arch/src/powerpc/vsx.rs index 60cb2ad44c..eab287f627 100644 --- a/crates/core_arch/src/powerpc/vsx.rs +++ b/crates/core_arch/src/powerpc/vsx.rs @@ -10,6 +10,7 @@ use crate::core_arch::powerpc::*; use crate::core_arch::simd::*; +use super::macros::*; #[cfg(test)] use stdarch_test::assert_instr; @@ -173,6 +174,10 @@ mod sealed { vec_mergeeo! { vector_float, mergee, mergeo } } +// Implement AltiVec's VectorSld trait for vector_double to enable vec_sld support +use crate::core_arch::powerpc::altivec::sealed::{VectorSld, vsldoi, xxsldwi}; +impl_vec_sld! { vector_double } + /// Vector permute. #[inline] #[target_feature(enable = "vsx")] @@ -255,4 +260,24 @@ mod tests { test_vec_xxpermdi! {test_vec_xxpermdi_i64x2, i64x2, vector_signed_long, [0], [-1], [2], [-3]} test_vec_xxpermdi! {test_vec_xxpermdi_m64x2, m64x2, vector_bool_long, [false], [true], [false], [true]} test_vec_xxpermdi! {test_vec_xxpermdi_f64x2, f64x2, vector_double, [0.0], [1.0], [2.0], [3.0]} + + #[simd_test(enable = "altivec")] + fn test_vec_sld_f64x2() { + use crate::core_arch::powerpc::altivec::sealed::VectorSld; + + let a = vector_double::from(f64x2::from_array([1.0, 2.0])); + let b = vector_double::from(f64x2::from_array([3.0, 4.0])); + + // Shift left by 8 bytes (1 f64 element) + // On little-endian: shifts right in memory, result is [b[1], a[0]] = [4.0, 1.0] + // On big-endian: shifts left in memory, result is [a[1], b[0]] = [2.0, 3.0] + unsafe { + let result: f64x2 = transmute(a.vec_sld::<8>(b)); + #[cfg(target_endian = "little")] + let expected = f64x2::from_array([4.0, 1.0]); + #[cfg(target_endian = "big")] + let expected = f64x2::from_array([2.0, 3.0]); + assert_eq!(result, expected); + } + } } From cf3363dc7a5cad88cbe193d6f5f7fbd387647e6c Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Thu, 18 Jun 2026 08:36:17 -0400 Subject: [PATCH 2/2] powerpc/vsx: implement vec_xl for vector_double --- crates/core_arch/src/powerpc/altivec.rs | 36 +----------- crates/core_arch/src/powerpc/macros.rs | 40 ++++++++++++++ crates/core_arch/src/powerpc/vsx.rs | 73 ++++++++++++++++++++++--- 3 files changed, 106 insertions(+), 43 deletions(-) diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs index 27ce8a5c26..d5b3348816 100644 --- a/crates/core_arch/src/powerpc/altivec.rs +++ b/crates/core_arch/src/powerpc/altivec.rs @@ -728,40 +728,8 @@ pub(crate) mod sealed { unsafe fn vec_xl(self, a: isize) -> Self::Result; } - macro_rules! impl_vec_xl { - ($fun:ident $notpwr9:ident / $pwr9:ident $ty:ident) => { - #[inline] - #[target_feature(enable = "altivec")] - #[cfg_attr( - all(test, not(target_feature = "power9-altivec")), - assert_instr($notpwr9) - )] - #[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))] - pub unsafe fn $fun(a: isize, b: *const $ty) -> t_t_l!($ty) { - let addr = (b as *const u8).offset(a); - - let mut r = mem::MaybeUninit::uninit(); - - crate::ptr::copy_nonoverlapping( - addr, - r.as_mut_ptr() as *mut u8, - mem::size_of::(), - ); - - r.assume_init() - } - - #[unstable(feature = "stdarch_powerpc", issue = "111145")] - impl VectorXl for *const $ty { - type Result = t_t_l!($ty); - #[inline] - #[target_feature(enable = "altivec")] - unsafe fn vec_xl(self, a: isize) -> Self::Result { - $fun(a, self) - } - } - }; - } + // Use the impl_vec_xl macro from macros module + use crate::core_arch::powerpc::macros::impl_vec_xl; impl_vec_xl! { vec_xl_i8 lxvd2x / lxv i8 } impl_vec_xl! { vec_xl_u8 lxvd2x / lxv u8 } diff --git a/crates/core_arch/src/powerpc/macros.rs b/crates/core_arch/src/powerpc/macros.rs index f60142a718..56707cb327 100644 --- a/crates/core_arch/src/powerpc/macros.rs +++ b/crates/core_arch/src/powerpc/macros.rs @@ -180,6 +180,9 @@ macro_rules! t_t_l { (f32) => { vector_float }; + (f64) => { + vector_double + }; } macro_rules! t_t_s { @@ -292,8 +295,45 @@ macro_rules! impl_vec_sld { )+ }; } +macro_rules! impl_vec_xl { + ($fun:ident $notpwr9:ident / $pwr9:ident $ty:ident) => { + #[inline] + #[target_feature(enable = "vsx")] + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + #[cfg_attr( + all(test, not(target_feature = "power9-altivec")), + assert_instr($notpwr9) + )] + #[cfg_attr(all(test, target_feature = "power9-altivec"), assert_instr($pwr9))] + pub unsafe fn $fun(a: isize, b: *const $ty) -> t_t_l!($ty) { + let addr = (b as *const u8).offset(a); + + let mut r = mem::MaybeUninit::uninit(); + + crate::ptr::copy_nonoverlapping( + addr, + r.as_mut_ptr() as *mut u8, + mem::size_of::(), + ); + + r.assume_init() + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + impl VectorXl for *const $ty { + type Result = t_t_l!($ty); + #[inline] + #[target_feature(enable = "vsx")] + unsafe fn vec_xl(self, a: isize) -> Self::Result { + $fun(a, self) + } + } + }; +} + pub(crate) use impl_vec_sld; pub(crate) use impl_vec_trait; +pub(crate) use impl_vec_xl; pub(crate) use s_t_l; pub(crate) use t_b; pub(crate) use t_t_l; diff --git a/crates/core_arch/src/powerpc/vsx.rs b/crates/core_arch/src/powerpc/vsx.rs index eab287f627..ed2863f95f 100644 --- a/crates/core_arch/src/powerpc/vsx.rs +++ b/crates/core_arch/src/powerpc/vsx.rs @@ -8,14 +8,14 @@ #![allow(non_camel_case_types)] +use super::macros::*; use crate::core_arch::powerpc::*; use crate::core_arch::simd::*; -use super::macros::*; #[cfg(test)] use stdarch_test::assert_instr; -use crate::mem::transmute; +use crate::mem::{self, transmute}; types! { #![unstable(feature = "stdarch_powerpc", issue = "111145")] @@ -172,11 +172,15 @@ mod sealed { vec_mergeeo! { vector_unsigned_int, mergee, mergeo } vec_mergeeo! { vector_bool_int, mergee, mergeo } vec_mergeeo! { vector_float, mergee, mergeo } -} -// Implement AltiVec's VectorSld trait for vector_double to enable vec_sld support -use crate::core_arch::powerpc::altivec::sealed::{VectorSld, vsldoi, xxsldwi}; -impl_vec_sld! { vector_double } + // Implement vec_xl for f64 (vector_double). + use crate::core_arch::powerpc::altivec::sealed::VectorXl; + impl_vec_xl! { vec_xl_f64 lxvd2x / lxv f64 } + + // Implement vec_sld for vector_double. + use crate::core_arch::powerpc::altivec::sealed::{VectorSld, vsldoi, xxsldwi}; + impl_vec_sld! { vector_double } +} /// Vector permute. #[inline] @@ -268,9 +272,9 @@ mod tests { let a = vector_double::from(f64x2::from_array([1.0, 2.0])); let b = vector_double::from(f64x2::from_array([3.0, 4.0])); - // Shift left by 8 bytes (1 f64 element) - // On little-endian: shifts right in memory, result is [b[1], a[0]] = [4.0, 1.0] - // On big-endian: shifts left in memory, result is [a[1], b[0]] = [2.0, 3.0] + // Shift left by 8 bytes (1 f64 element). + // On little-endian: shifts right in memory, result is [b[1], a[0]] = [4.0, 1.0]. + // On big-endian: shifts left in memory, result is [a[1], b[0]] = [2.0, 3.0]. unsafe { let result: f64x2 = transmute(a.vec_sld::<8>(b)); #[cfg(target_endian = "little")] @@ -280,4 +284,55 @@ mod tests { assert_eq!(result, expected); } } + + #[simd_test(enable = "vsx")] + fn test_vec_sldw_f64x2() { + use crate::core_arch::powerpc::altivec::sealed::VectorSld; + + let a = vector_double::from(f64x2::from_array([1.0, 2.0])); + let b = vector_double::from(f64x2::from_array([3.0, 4.0])); + + // Shift left by 1 word (4 bytes). + // vec_sldw shifts by words (4-byte units), so UIMM2=1 means shift by 4 bytes + // whic is equivalent to vec_sld with UIMM4=4. + unsafe { + let result: f64x2 = transmute(a.vec_sldw::<1>(b)); + // Shifting by 4 bytes (half of an f64) will mix the high/low parts. + // On little-endian: shifts right in memory, result is [b[1], a[0]] but with 4-byte shift. + // On big-endian: shifts left in memory, result is [a[1], b[0]] but with 4-byte shift. + // Since we're shifting by 4 bytes (half an f64), the result will have mixed bits. + // Verify using vec_sld with the same shift amount for comparison. + let expected: f64x2 = transmute(a.vec_sld::<4>(b)); + assert_eq!(result, expected); + } + } + + #[simd_test(enable = "vsx")] + fn test_vec_xl_f64() { + let pat = [1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; + + // Test loading from aligned offset 0 + unsafe { + let result = vec_xl(0, pat.as_ptr()); + let result_f64: f64x2 = transmute(result); + assert_eq!(result_f64.as_array()[0], 1.0); + assert_eq!(result_f64.as_array()[1], 2.0); + } + + // Test loading from offset 16 (2 f64 elements = 16 bytes) + unsafe { + let result = vec_xl(16, pat.as_ptr()); + let result_f64: f64x2 = transmute(result); + assert_eq!(result_f64.as_array()[0], 3.0); + assert_eq!(result_f64.as_array()[1], 4.0); + } + + // Test loading from offset 32 (4 f64 elements = 32 bytes) + unsafe { + let result = vec_xl(32, pat.as_ptr()); + let result_f64: f64x2 = transmute(result); + assert_eq!(result_f64.as_array()[0], 5.0); + assert_eq!(result_f64.as_array()[1], 6.0); + } + } }