From e08d817d962653706bae655a15824f198a690bbf Mon Sep 17 00:00:00 2001 From: Parker Bibus Date: Mon, 30 Mar 2026 11:16:03 -0700 Subject: [PATCH 1/5] Fix SVE benchmark CreateWhileLessThanMask API rename for .NET 11 The .NET 11 runtime renamed CreateWhileLessThanMask methods from bit-width suffixes (8Bit, 16Bit, 32Bit, 64Bit) to type-name suffixes (Byte, UInt16, Int32, UInt32, Single, UInt64, etc.). This caused MissingMethodException at runtime on SVE-capable machines: - CreateWhileLessThanMask8Bit -> CreateWhileLessThanMaskByte - CreateWhileLessThanMask16Bit -> CreateWhileLessThanMaskUInt16/Int16 - CreateWhileLessThanMask32Bit -> CreateWhileLessThanMaskUInt32/Int32/Single - CreateWhileLessThanMask64Bit -> CreateWhileLessThanMaskUInt64 Added SveMaskHelper shim class with #if NET11_0_OR_GREATER conditionals to support both the old API (net9.0/net10.0) and new API (net11.0+). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/benchmarks/micro/sve/Clamp.cs | 6 +- src/benchmarks/micro/sve/ComplexDotProduct.cs | 8 +- src/benchmarks/micro/sve/ComplexMultiply.cs | 8 +- src/benchmarks/micro/sve/Exponent.cs | 4 +- src/benchmarks/micro/sve/FP64Overflow.cs | 8 +- src/benchmarks/micro/sve/FastDivision.cs | 4 +- src/benchmarks/micro/sve/GatherLoad.cs | 4 +- src/benchmarks/micro/sve/Logarithm.cs | 4 +- src/benchmarks/micro/sve/MultiplyAdd.cs | 4 +- src/benchmarks/micro/sve/MultiplyPow2.cs | 4 +- src/benchmarks/micro/sve/OddEvenSort.cs | 4 +- src/benchmarks/micro/sve/PairwiseAdd.cs | 8 +- src/benchmarks/micro/sve/Partition.cs | 4 +- src/benchmarks/micro/sve/ScatterStore.cs | 4 +- src/benchmarks/micro/sve/SobelFilter.cs | 8 +- src/benchmarks/micro/sve/SquareRoot.cs | 4 +- src/benchmarks/micro/sve/StrCmp.cs | 4 +- src/benchmarks/micro/sve/StrIndexOf.cs | 4 +- src/benchmarks/micro/sve/StrLen.cs | 4 +- src/benchmarks/micro/sve/SveMaskHelper.cs | 97 +++++++++++++++++++ src/benchmarks/micro/sve/TCPChecksum.cs | 4 +- src/benchmarks/micro/sve/UpscaleFilter.cs | 4 +- src/benchmarks/micro/sve/VectorMax.cs | 6 +- 23 files changed, 153 insertions(+), 56 deletions(-) create mode 100644 src/benchmarks/micro/sve/SveMaskHelper.cs diff --git a/src/benchmarks/micro/sve/Clamp.cs b/src/benchmarks/micro/sve/Clamp.cs index 9788913eb98..ba044edbf3f 100644 --- a/src/benchmarks/micro/sve/Clamp.cs +++ b/src/benchmarks/micro/sve/Clamp.cs @@ -90,7 +90,7 @@ public unsafe void SveClamp() Vector valVec = new Vector(Size / 2); Vector minVec = Vector.Indices; Vector pTrue = Sve.CreateTrueMaskInt32(); - Vector pLoop = (Vector)Sve.CreateWhileLessThanMask32Bit(i, length); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, length); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector maxVec = Sve.ShiftLeftLogical(minVec, Vector.One); @@ -99,7 +99,7 @@ public unsafe void SveClamp() minVec = Sve.Add(minVec, new Vector(cntw)); i += cntw; - pLoop = (Vector)Sve.CreateWhileLessThanMask32Bit(i, length); + pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, length); } _output = (int)Sve.AddAcross(resVec).ToScalar(); } @@ -116,7 +116,7 @@ public unsafe void SveTail() Vector minVec = Vector.Indices; for (; i < length; i += cntw) { - Vector pLoop = (Vector)Sve.CreateWhileLessThanMask32Bit(i, length); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, length); Vector maxVec = Sve.ShiftLeftLogical(minVec, Vector.One); Vector tmpVec = Sve.Min(Sve.Max(valVec, minVec), maxVec); resVec = Sve.ConditionalSelect(pLoop, Sve.Add(resVec, tmpVec), resVec); diff --git a/src/benchmarks/micro/sve/ComplexDotProduct.cs b/src/benchmarks/micro/sve/ComplexDotProduct.cs index fafbd9b0236..670de3a246a 100644 --- a/src/benchmarks/micro/sve/ComplexDotProduct.cs +++ b/src/benchmarks/micro/sve/ComplexDotProduct.cs @@ -213,7 +213,7 @@ public unsafe void SveComplexDotProduct() // Create mask for the imaginary half of a word. Vector imMask = (Vector)(new Vector(0xFFFF0000u)); Vector pTrue = Sve.CreateTrueMaskInt32(); - Vector pLoop = (Vector)Sve.CreateWhileLessThanMask32Bit(0, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { // Load inputs. @@ -248,7 +248,7 @@ public unsafe void SveComplexDotProduct() // Handle loop. i += cntw; - pLoop = (Vector)Sve.CreateWhileLessThanMask32Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, Size); } } } @@ -263,7 +263,7 @@ public unsafe void Sve2ComplexDotProduct() int cntw = (int)Sve.Count32BitElements(); Vector pTrue = Sve.CreateTrueMaskInt32(); - Vector pLoop = (Vector)Sve.CreateWhileLessThanMask32Bit(0, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector a1 = (Vector)Sve.LoadVector(pLoop, (int*)(a + 4 * i)); @@ -276,7 +276,7 @@ public unsafe void Sve2ComplexDotProduct() // Handle loop. i += cntw; - pLoop = (Vector)Sve.CreateWhileLessThanMask32Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, Size); } } } diff --git a/src/benchmarks/micro/sve/ComplexMultiply.cs b/src/benchmarks/micro/sve/ComplexMultiply.cs index 199d6948f4c..22c2d7bae82 100644 --- a/src/benchmarks/micro/sve/ComplexMultiply.cs +++ b/src/benchmarks/micro/sve/ComplexMultiply.cs @@ -135,11 +135,11 @@ public unsafe void SveComplexMultiply() // Handle remaining elements using predicates. lmt = Size * 2; - Vector pLoop = Sve.CreateWhileLessThanMask32Bit(i, lmt); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, lmt); if (Sve.TestFirstTrue(pTrue, pLoop)) { // Compute the predicate for elements in i + cntw. - Vector pTail = Sve.CreateWhileLessThanMask32Bit(i + cntw, lmt); + Vector pTail = SveMaskHelper.CreateWhileLessThanMaskUInt32(i + cntw, lmt); // Unzip the predicates pLoop and pTail for 2xVector load/store. Vector pInner = Sve.UnzipEven(pLoop, pTail); @@ -190,7 +190,7 @@ public unsafe void Sve2ComplexMultiply() // Handle remaining elements. lmt = Size; - Vector pLoop = Sve.CreateWhileLessThanMask64Bit(i, lmt); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, lmt); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector a1 = (Vector)Sve2.LoadVector(pLoop, (ulong*)a + i); @@ -201,7 +201,7 @@ public unsafe void Sve2ComplexMultiply() Sve.StoreAndZip(pLoop, (ulong*)c + i, (Vector)(c1)); i += cntd; - pLoop = Sve.CreateWhileLessThanMask64Bit(i, lmt); + pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, lmt); } } } diff --git a/src/benchmarks/micro/sve/Exponent.cs b/src/benchmarks/micro/sve/Exponent.cs index add346fb147..44f300e19f7 100644 --- a/src/benchmarks/micro/sve/Exponent.cs +++ b/src/benchmarks/micro/sve/Exponent.cs @@ -168,7 +168,7 @@ public unsafe void SveExponent() Vector constVec = new Vector(new ReadOnlySpan(&d[3], 4)); Vector pTrue = Sve.CreateTrueMaskUInt32(); - Vector pLoop = Sve.CreateWhileLessThanMask32Bit(0, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector x = (Vector)Sve.LoadVector(pLoop, (uint*)(input + i)); @@ -197,7 +197,7 @@ public unsafe void SveExponent() // Handle loop. i += cntw; - pLoop = Sve.CreateWhileLessThanMask32Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); } } } diff --git a/src/benchmarks/micro/sve/FP64Overflow.cs b/src/benchmarks/micro/sve/FP64Overflow.cs index 973a7ec6eb2..79702cc602f 100644 --- a/src/benchmarks/micro/sve/FP64Overflow.cs +++ b/src/benchmarks/micro/sve/FP64Overflow.cs @@ -153,7 +153,7 @@ public unsafe void SveFP64Overflow() Vector maskVec = new Vector(1023); Vector pTrue = Sve.CreateTrueMaskUInt64(); - Vector pLoop = Sve.CreateWhileLessThanMask64Bit(i, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { // Load Vector as ulong then convert to Vector. @@ -178,7 +178,7 @@ public unsafe void SveFP64Overflow() // Handle loop. i += cntd; - pLoop = Sve.CreateWhileLessThanMask64Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); } } } @@ -193,7 +193,7 @@ public unsafe void Sve2FP64Overflow() int cntd = (int)Sve.Count64BitElements(); Vector pTrue = Sve.CreateTrueMaskUInt64(); - Vector pLoop = Sve.CreateWhileLessThanMask64Bit(i, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { // Load input vectors. @@ -214,7 +214,7 @@ public unsafe void Sve2FP64Overflow() // Handle loop. i += cntd; - pLoop = Sve.CreateWhileLessThanMask64Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); } } } diff --git a/src/benchmarks/micro/sve/FastDivision.cs b/src/benchmarks/micro/sve/FastDivision.cs index 24f5b86f790..422b46e37ff 100644 --- a/src/benchmarks/micro/sve/FastDivision.cs +++ b/src/benchmarks/micro/sve/FastDivision.cs @@ -123,7 +123,7 @@ public unsafe void SveFastDivision() int cntd = (int)Sve.Count64BitElements(); Vector pTrue = Sve.CreateTrueMaskUInt64(); - Vector pLoop = Sve.CreateWhileLessThanMask64Bit(i, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector input1Vec = (Vector)Sve.LoadVector(pLoop, (ulong*)input1 + i); @@ -145,7 +145,7 @@ public unsafe void SveFastDivision() Sve.StoreAndZip(pLoop, (ulong*)output + i, (Vector)outVec); i += cntd; - pLoop = Sve.CreateWhileLessThanMask64Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); } } } diff --git a/src/benchmarks/micro/sve/GatherLoad.cs b/src/benchmarks/micro/sve/GatherLoad.cs index badec15d255..d1e8bc8fb5c 100644 --- a/src/benchmarks/micro/sve/GatherLoad.cs +++ b/src/benchmarks/micro/sve/GatherLoad.cs @@ -80,7 +80,7 @@ public unsafe void SveGatherLoad() Vector resVec = Vector.Zero; Vector pTrue = Sve.CreateTrueMaskUInt32(); - Vector pLoop = Sve.CreateWhileLessThanMask32Bit(0, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { // Load indices @@ -91,7 +91,7 @@ public unsafe void SveGatherLoad() resVec = Sve.Add(resVec, objVec); i += cntw; - pLoop = Sve.CreateWhileLessThanMask32Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); } // Add up all elements in resVec. uint res = (uint)Sve.AddAcross(resVec).ToScalar(); diff --git a/src/benchmarks/micro/sve/Logarithm.cs b/src/benchmarks/micro/sve/Logarithm.cs index 3400c2a6b58..e2f37db9c6e 100644 --- a/src/benchmarks/micro/sve/Logarithm.cs +++ b/src/benchmarks/micro/sve/Logarithm.cs @@ -174,7 +174,7 @@ public unsafe void SveLogarithm() Vector pTrue = Sve.CreateTrueMaskUInt32(); Vector pTruef = Sve.CreateTrueMaskSingle(); - Vector pLoop = Sve.CreateWhileLessThanMask32Bit(0, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector x = (Vector)Sve.LoadVector(pLoop, (uint*)(input + i)); @@ -239,7 +239,7 @@ public unsafe void SveLogarithm() // Handle loop. i += cntw; - pLoop = Sve.CreateWhileLessThanMask32Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); } } } diff --git a/src/benchmarks/micro/sve/MultiplyAdd.cs b/src/benchmarks/micro/sve/MultiplyAdd.cs index 407ebf187cc..d5b8bd33a63 100644 --- a/src/benchmarks/micro/sve/MultiplyAdd.cs +++ b/src/benchmarks/micro/sve/MultiplyAdd.cs @@ -154,7 +154,7 @@ public unsafe void SveMultiplyAdd() // Handle remaining elements using predicates. lmt = Size; - Vector pLoop = (Vector)Sve.CreateWhileLessThanMask32Bit(i, lmt); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, lmt); while (Sve.TestAnyTrue(pTrue, pLoop)) { Vector aVec = Sve.LoadVector(pLoop, a + i); @@ -165,7 +165,7 @@ public unsafe void SveMultiplyAdd() // Increment by a vector length. i += cntw; - pLoop = (Vector)Sve.CreateWhileLessThanMask32Bit(i, lmt); + pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, lmt); } // Sum up all elements in the 4 result vectors. diff --git a/src/benchmarks/micro/sve/MultiplyPow2.cs b/src/benchmarks/micro/sve/MultiplyPow2.cs index 1639d120a5d..6167fa6f704 100644 --- a/src/benchmarks/micro/sve/MultiplyPow2.cs +++ b/src/benchmarks/micro/sve/MultiplyPow2.cs @@ -116,7 +116,7 @@ public unsafe void SveMultiplyPow2() int cntd = (int)Sve.Count64BitElements(); Vector pTrue = Sve.CreateTrueMaskUInt64(); - Vector pLoop = Sve.CreateWhileLessThanMask64Bit(i, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { // Cast the array pointers to ulong so the predicate can be shared. @@ -129,7 +129,7 @@ public unsafe void SveMultiplyPow2() // Handle loop. i += cntd; - pLoop = Sve.CreateWhileLessThanMask64Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); } } } diff --git a/src/benchmarks/micro/sve/OddEvenSort.cs b/src/benchmarks/micro/sve/OddEvenSort.cs index 6e4a302351b..405f03c01a5 100644 --- a/src/benchmarks/micro/sve/OddEvenSort.cs +++ b/src/benchmarks/micro/sve/OddEvenSort.cs @@ -180,7 +180,7 @@ public unsafe void SveOddEvenSort() for (; j < n - 1; j += (cntw << 1)) { // Get predicate for elements to load/store. - Vector pLoop = Sve.CreateWhileLessThanMask32Bit(0, (n - j) / 2); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, (n - j) / 2); // Interleaved load elements. (Vector a0, Vector a1) = Sve.Load2xVectorAndUnzip(pLoop, source + j); @@ -248,7 +248,7 @@ public unsafe void SveTail() // Handle tail using predicates. for (; j < n - 1; j += (cntw << 1)) { - Vector pLoop = Sve.CreateWhileLessThanMask32Bit(0, (n - j) / 2); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, (n - j) / 2); (Vector a0, Vector a1) = Sve.Load2xVectorAndUnzip(pLoop, source + j); Vector pCmp = Sve.ConditionalSelect(pLoop, Sve.CompareGreaterThan(a0, a1), Sve.CreateFalseMaskUInt32()); diff --git a/src/benchmarks/micro/sve/PairwiseAdd.cs b/src/benchmarks/micro/sve/PairwiseAdd.cs index 3d6af5446b5..20f21e9dacb 100644 --- a/src/benchmarks/micro/sve/PairwiseAdd.cs +++ b/src/benchmarks/micro/sve/PairwiseAdd.cs @@ -133,11 +133,11 @@ public unsafe void SvePairwiseAdd() // Handle remaining elements using predicates. lmt = Size * 2; - Vector pLoop = (Vector)Sve.CreateWhileLessThanMask32Bit(i, lmt); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, lmt); if (Sve.TestFirstTrue(pTrue, pLoop)) { // Compute the predicate for elements in i + cntw. - Vector pTail = (Vector)Sve.CreateWhileLessThanMask32Bit(i + cntw, lmt); + Vector pTail = SveMaskHelper.CreateWhileLessThanMaskInt32(i + cntw, lmt); // Unzip the predicates pLoop and pTail for 2xVector load/store. Vector pInner = Sve.UnzipEven(pLoop, pTail); @@ -181,7 +181,7 @@ public unsafe void Sve2PairwiseAdd() // Handle remaining elements. lmt = Size * 2; - Vector pLoop = (Vector)Sve.CreateWhileLessThanMask32Bit(i, lmt); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, lmt); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector a0 = Sve.LoadVector(pLoop, a + i); @@ -189,7 +189,7 @@ public unsafe void Sve2PairwiseAdd() Vector c0 = Sve2.AddPairwise(a0, b0); Sve.StoreAndZip(pLoop, c + i, c0); i += cntw; - pLoop = (Vector)Sve.CreateWhileLessThanMask32Bit(i, lmt); + pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, lmt); } } } diff --git a/src/benchmarks/micro/sve/Partition.cs b/src/benchmarks/micro/sve/Partition.cs index 4fa1222dfeb..e9a47d09523 100644 --- a/src/benchmarks/micro/sve/Partition.cs +++ b/src/benchmarks/micro/sve/Partition.cs @@ -86,7 +86,7 @@ public unsafe ulong SvePartition() ); // Create a predicate for the loop. - Vector pLoop = Sve.CreateWhileLessThanMask32Bit(i, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); while (Sve.TestAnyTrue(Sve.CreateTrueMaskUInt32(), pLoop)) { @@ -119,7 +119,7 @@ public unsafe ulong SvePartition() indexRight = Sve.SaturatingIncrementByActiveElementCount(indexRight, pInner); i = Sve.SaturatingIncrementBy32BitElementCount(i, 1); - pLoop = Sve.CreateWhileLessThanMask32Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); } return indexRight; diff --git a/src/benchmarks/micro/sve/ScatterStore.cs b/src/benchmarks/micro/sve/ScatterStore.cs index e5ec7fa651f..6ce25faaaee 100644 --- a/src/benchmarks/micro/sve/ScatterStore.cs +++ b/src/benchmarks/micro/sve/ScatterStore.cs @@ -80,7 +80,7 @@ public unsafe void SveScatterStore() Vector ones = Vector.One; Vector pTrue = Sve.CreateTrueMaskUInt32(); - Vector pLoop = Sve.CreateWhileLessThanMask32Bit(0, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector idxVec = Sve.LoadVector(pLoop, indices + i); @@ -89,7 +89,7 @@ public unsafe void SveScatterStore() Sve.Scatter(pLoop, objects, idxVec, ones); i += cntw; - pLoop = Sve.CreateWhileLessThanMask32Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); } } } diff --git a/src/benchmarks/micro/sve/SobelFilter.cs b/src/benchmarks/micro/sve/SobelFilter.cs index 19540ea3caa..b4f75d7aec6 100644 --- a/src/benchmarks/micro/sve/SobelFilter.cs +++ b/src/benchmarks/micro/sve/SobelFilter.cs @@ -195,8 +195,8 @@ public unsafe void SveSobelFilter() Vector resVec; // Load coefficients of the filter into vectors. - Vector kxVec = Sve.LoadVector((Vector)Sve.CreateWhileLessThanMask32Bit(0, 3), kx); - Vector kyVec = Sve.LoadVector((Vector)Sve.CreateWhileLessThanMask32Bit(0, 3), ky); + Vector kxVec = Sve.LoadVector(SveMaskHelper.CreateWhileLessThanMaskSingle(0, 3), kx); + Vector kyVec = Sve.LoadVector(SveMaskHelper.CreateWhileLessThanMaskSingle(0, 3), ky); for (int j = 0; j < img_size; j++) { // Load the elements from input and output the intermediate result to temp. @@ -205,7 +205,7 @@ public unsafe void SveSobelFilter() for (int i = 0; i < out_size; i += cntw) { - Vector pRow = (Vector)Sve.CreateWhileLessThanMask32Bit(i, out_size); + Vector pRow = SveMaskHelper.CreateWhileLessThanMaskSingle(i, out_size); // Load input elements from the next 3 columns. Vector col0 = Sve.LoadVector(pRow, in_ptr + i); @@ -228,7 +228,7 @@ public unsafe void SveSobelFilter() for (int i = 0; i < out_size; i += cntw) { - Vector pRow = (Vector)Sve.CreateWhileLessThanMask32Bit(i, out_size); + Vector pRow = SveMaskHelper.CreateWhileLessThanMaskSingle(i, out_size); // Load input elements from the next 3 rows. Vector row0 = Sve.LoadVector(pRow, in_ptr + i); diff --git a/src/benchmarks/micro/sve/SquareRoot.cs b/src/benchmarks/micro/sve/SquareRoot.cs index f889944d9a4..888d32c63d0 100644 --- a/src/benchmarks/micro/sve/SquareRoot.cs +++ b/src/benchmarks/micro/sve/SquareRoot.cs @@ -94,7 +94,7 @@ public unsafe void SveSquareRoot() // We use Vector for predicates since there are no Vector // overloads for TestFirstTrue and CreateWhileLessThanMask etc. Vector pTrue = Sve.CreateTrueMaskUInt32(); - Vector pLoop = Sve.CreateWhileLessThanMask32Bit(0, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { // Since pLoop is a Vector predicate, we load the input as uint array, @@ -107,7 +107,7 @@ public unsafe void SveSquareRoot() // Handle loop. i += cntw; - pLoop = Sve.CreateWhileLessThanMask32Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); } } } diff --git a/src/benchmarks/micro/sve/StrCmp.cs b/src/benchmarks/micro/sve/StrCmp.cs index 997f65b809c..ae0c30a0bf6 100644 --- a/src/benchmarks/micro/sve/StrCmp.cs +++ b/src/benchmarks/micro/sve/StrCmp.cs @@ -120,7 +120,7 @@ public unsafe long SveStrCmp() int elemsInVector = (int)Sve.Count8BitElements(); Vector ptrue = Sve.CreateTrueMaskByte(); - Vector pLoop = (Vector)Sve.CreateWhileLessThanMask8Bit(i, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskByte(i, Size); Vector cmp = Vector.Zero; Vector arr1_data, arr2_data; @@ -141,7 +141,7 @@ public unsafe long SveStrCmp() i += elemsInVector; - pLoop = (Vector)Sve.CreateWhileLessThanMask8Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskByte(i, Size); } // create a bitmask to find position of changed value diff --git a/src/benchmarks/micro/sve/StrIndexOf.cs b/src/benchmarks/micro/sve/StrIndexOf.cs index 293b3336df3..fe31fbde90c 100644 --- a/src/benchmarks/micro/sve/StrIndexOf.cs +++ b/src/benchmarks/micro/sve/StrIndexOf.cs @@ -109,7 +109,7 @@ public unsafe int SveIndexOf() fixed (char* arr_ptr = _array) { Vector target = new Vector((ushort)_searchValue); - var pLoop = (Vector)Sve.CreateWhileLessThanMask16Bit(i, Size); + var pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt16(i, Size); while (Sve.TestFirstTrue(Sve.CreateTrueMaskUInt16(), pLoop)) { @@ -126,7 +126,7 @@ public unsafe int SveIndexOf() } i += (int)Sve.Count16BitElements(); - pLoop = (Vector)Sve.CreateWhileLessThanMask16Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt16(i, Size); } return -1; diff --git a/src/benchmarks/micro/sve/StrLen.cs b/src/benchmarks/micro/sve/StrLen.cs index 5bbde0991e0..1745eac57af 100644 --- a/src/benchmarks/micro/sve/StrLen.cs +++ b/src/benchmarks/micro/sve/StrLen.cs @@ -120,7 +120,7 @@ public unsafe ulong SveStrLen() ulong i = 0; ulong elemsInVector = Sve.Count8BitElements(); - Vector pLoop = (Vector)Sve.CreateWhileLessThanMask8Bit((int)i, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskByte((int)i, Size); fixed (byte* arr_ptr = _array) { @@ -134,7 +134,7 @@ public unsafe ulong SveStrLen() else { i += elemsInVector; - pLoop = (Vector)Sve.CreateWhileLessThanMask8Bit((int)i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskByte((int)i, Size); } } diff --git a/src/benchmarks/micro/sve/SveMaskHelper.cs b/src/benchmarks/micro/sve/SveMaskHelper.cs new file mode 100644 index 00000000000..da338d7fff0 --- /dev/null +++ b/src/benchmarks/micro/sve/SveMaskHelper.cs @@ -0,0 +1,97 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics.Arm; + +namespace SveBenchmarks +{ + // Compatibility shim for CreateWhileLessThanMask API rename in .NET 11. + // In .NET 9/10 the methods are named CreateWhileLessThanMask{8,16,32,64}Bit. + // In .NET 11+ they were renamed to CreateWhileLessThanMask{Byte,Int16,UInt16,...}. + internal static class SveMaskHelper + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector CreateWhileLessThanMaskByte(int left, int right) + { +#if NET11_0_OR_GREATER + return Sve.CreateWhileLessThanMaskByte(left, right); +#else + return Sve.CreateWhileLessThanMask8Bit(left, right); +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector CreateWhileLessThanMaskUInt16(int left, int right) + { +#if NET11_0_OR_GREATER + return Sve.CreateWhileLessThanMaskUInt16(left, right); +#else + return Sve.CreateWhileLessThanMask16Bit(left, right); +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector CreateWhileLessThanMaskInt16(int left, int right) + { +#if NET11_0_OR_GREATER + return Sve.CreateWhileLessThanMaskInt16(left, right); +#else + return (Vector)Sve.CreateWhileLessThanMask16Bit(left, right); +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector CreateWhileLessThanMaskUInt32(int left, int right) + { +#if NET11_0_OR_GREATER + return Sve.CreateWhileLessThanMaskUInt32(left, right); +#else + return Sve.CreateWhileLessThanMask32Bit(left, right); +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector CreateWhileLessThanMaskInt32(int left, int right) + { +#if NET11_0_OR_GREATER + return Sve.CreateWhileLessThanMaskInt32(left, right); +#else + return (Vector)Sve.CreateWhileLessThanMask32Bit(left, right); +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector CreateWhileLessThanMaskSingle(int left, int right) + { +#if NET11_0_OR_GREATER + return Sve.CreateWhileLessThanMaskSingle(left, right); +#else + return (Vector)Sve.CreateWhileLessThanMask32Bit(left, right); +#endif + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector CreateWhileLessThanMaskUInt64(int left, int right) + { +#if NET11_0_OR_GREATER + return Sve.CreateWhileLessThanMaskUInt64(left, right); +#else + return Sve.CreateWhileLessThanMask64Bit(left, right); +#endif + } + + // long overloads for benchmarks that use long loop counters (e.g. Partition.cs) + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector CreateWhileLessThanMaskUInt32(long left, long right) + { +#if NET11_0_OR_GREATER + return Sve.CreateWhileLessThanMaskUInt32(left, right); +#else + return Sve.CreateWhileLessThanMask32Bit(left, right); +#endif + } + } +} diff --git a/src/benchmarks/micro/sve/TCPChecksum.cs b/src/benchmarks/micro/sve/TCPChecksum.cs index 874eb514083..ebb0f35697f 100644 --- a/src/benchmarks/micro/sve/TCPChecksum.cs +++ b/src/benchmarks/micro/sve/TCPChecksum.cs @@ -177,7 +177,7 @@ public unsafe void SveTCPChecksum() int i = 0; Vector acc = Vector.Zero; - Vector pLoop = Sve.CreateWhileLessThanMask16Bit(0, lengthWords); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt16(0, lengthWords); while (Sve.TestAnyTrue(pTrue, pLoop)) { Vector d = Sve.LoadVector(pLoop, ((ushort*)p) + i); @@ -187,7 +187,7 @@ public unsafe void SveTCPChecksum() // Handle loop predicate. i += (int)Sve.Count16BitElements(); - pLoop = Sve.CreateWhileLessThanMask16Bit(i, lengthWords); + pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt16(i, lengthWords); } // Reduce result to scalar. ulong sum = Sve.AddAcross(acc).ToScalar(); diff --git a/src/benchmarks/micro/sve/UpscaleFilter.cs b/src/benchmarks/micro/sve/UpscaleFilter.cs index 02319660630..2d1c6e43602 100644 --- a/src/benchmarks/micro/sve/UpscaleFilter.cs +++ b/src/benchmarks/micro/sve/UpscaleFilter.cs @@ -137,7 +137,7 @@ public unsafe void Sve2UpscaleFilter() { int lmt = Size - 1; int i = 0; - Vector pLoop = Sve.CreateWhileLessThanMask8Bit(0, lmt); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskByte(0, lmt); while (Sve.TestAnyTrue(pTrue, pLoop)) { // Load two consecutive samples. @@ -168,7 +168,7 @@ public unsafe void Sve2UpscaleFilter() Sve.StoreAndZip(pLoop, output + i * 2, (b0, b1)); i += (int)Sve.Count8BitElements(); - pLoop = Sve.CreateWhileLessThanMask8Bit(i, lmt); + pLoop = SveMaskHelper.CreateWhileLessThanMaskByte(i, lmt); } } } diff --git a/src/benchmarks/micro/sve/VectorMax.cs b/src/benchmarks/micro/sve/VectorMax.cs index 41e559099e7..d23ba99fecb 100644 --- a/src/benchmarks/micro/sve/VectorMax.cs +++ b/src/benchmarks/micro/sve/VectorMax.cs @@ -129,7 +129,7 @@ public unsafe void SveVectorMax() short cnth = (short)Sve.Count16BitElements(); Vector pTrue = Sve.CreateTrueMaskInt16(); - Vector pLoop = (Vector)Sve.CreateWhileLessThanMask16Bit(0, Size); + Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt16(0, Size); Vector idxVec = Vector.Indices; // Initialize the first vector worth of values. @@ -137,7 +137,7 @@ public unsafe void SveVectorMax() Vector maxIdxVec = idxVec; i += cnth; - pLoop = (Vector)Sve.CreateWhileLessThanMask16Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskInt16(i, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector val = Sve.LoadVector(pLoop, input + i); @@ -152,7 +152,7 @@ public unsafe void SveVectorMax() // Handle loop. i += cnth; - pLoop = (Vector)Sve.CreateWhileLessThanMask16Bit(i, Size); + pLoop = SveMaskHelper.CreateWhileLessThanMaskInt16(i, Size); } // Get the maximum element across the max vector. From 048882bdb00f091a16a5913c06a565f6b1a45269 Mon Sep 17 00:00:00 2001 From: Parker Bibus Date: Mon, 30 Mar 2026 13:08:36 -0700 Subject: [PATCH 2/5] Temporarily disable all non-SVE perf jobs for test run Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- eng/pipelines/runtime-perf-jobs.yml | 33 +++++++++++++++-------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/eng/pipelines/runtime-perf-jobs.yml b/eng/pipelines/runtime-perf-jobs.yml index f0361093463..43ae10b7a65 100644 --- a/eng/pipelines/runtime-perf-jobs.yml +++ b/eng/pipelines/runtime-perf-jobs.yml @@ -14,7 +14,7 @@ parameters: - name: viperMicro type: object default: - enabled: true + enabled: false configs: - linux_x64 - windows_x64 @@ -22,46 +22,46 @@ parameters: - name: viperMicroNoR2R type: object default: - enabled: true + enabled: false configs: - linux_x64 - windows_x64 - name: viperMicroR2RInterpreter type: object default: - enabled: true + enabled: false configs: - linux_x64 - name: monoMicro type: object default: - enabled: true + enabled: false configs: - linux_x64 - name: monoInterpreter type: object default: - enabled: true + enabled: false configs: - linux_x64 - name: monoAot type: object default: - enabled: true + enabled: false configs: - linux_x64 - name: androidMonoJit type: object default: - enabled: true + enabled: false - name: androidMonoAot type: object default: - enabled: true + enabled: false - name: androidCoreclrJit type: object default: - enabled: true + enabled: false - name: cobaltSveMicro type: object default: @@ -71,7 +71,7 @@ parameters: - name: androidCoreclrR2r type: object default: - enabled: true + enabled: false jobs: - template: /eng/pipelines/performance/templates/perf-build-jobs.yml@${{ parameters.runtimeRepoAlias }} @@ -79,12 +79,13 @@ jobs: perfBranch: ${{ parameters.perfBranch }} - ${{ if not(startswith(variables['Build.SourceBranch'], 'refs/heads/release')) }}: - # Build and run iOS Mono and NativeAOT scenarios - - template: /eng/pipelines/runtime-ios-scenarios-perf-jobs.yml - parameters: - runtimeRepoAlias: ${{ parameters.runtimeRepoAlias }} - performanceRepoAlias: ${{ parameters.performanceRepoAlias }} - jobParameters: ${{ parameters.jobParameters }} + # Build and run iOS Mono and NativeAOT scenarios — disabled for SVE-only test run + - ${{ if false }}: + - template: /eng/pipelines/runtime-ios-scenarios-perf-jobs.yml + parameters: + runtimeRepoAlias: ${{ parameters.runtimeRepoAlias }} + performanceRepoAlias: ${{ parameters.performanceRepoAlias }} + jobParameters: ${{ parameters.jobParameters }} # Android Mono JIT — controlled by androidMonoJit toggle - ${{ if eq(parameters.androidMonoJit.enabled, true) }}: From 9decec2948dc3d4c8ea7e48571040f4ff397874b Mon Sep 17 00:00:00 2001 From: Parker Bibus Date: Mon, 30 Mar 2026 14:23:32 -0700 Subject: [PATCH 3/5] Fix SveMaskHelper to use reflection+delegate caching for API rename compat The SDK ref assemblies have old method names (CreateWhileLessThanMask8Bit etc.) while the runtime corerun has new names (CreateWhileLessThanMaskByte etc.). Compile-time conditionals cannot work since both builds define NET11_0_OR_GREATER. Use reflection at startup to detect which names exist, cache as delegates. Cast-wrapping lambdas handle cases where old API returns unsigned type but caller needs signed/float reinterpret (e.g. Vector -> Vector). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/benchmarks/micro/sve/SveMaskHelper.cs | 138 ++++++++++++---------- 1 file changed, 74 insertions(+), 64 deletions(-) diff --git a/src/benchmarks/micro/sve/SveMaskHelper.cs b/src/benchmarks/micro/sve/SveMaskHelper.cs index da338d7fff0..6bf7d468328 100644 --- a/src/benchmarks/micro/sve/SveMaskHelper.cs +++ b/src/benchmarks/micro/sve/SveMaskHelper.cs @@ -1,97 +1,107 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System; using System.Numerics; +using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics.Arm; namespace SveBenchmarks { - // Compatibility shim for CreateWhileLessThanMask API rename in .NET 11. - // In .NET 9/10 the methods are named CreateWhileLessThanMask{8,16,32,64}Bit. - // In .NET 11+ they were renamed to CreateWhileLessThanMask{Byte,Int16,UInt16,...}. + // Compatibility shim for CreateWhileLessThanMask API rename. + // The runtime renamed these methods from bit-width suffixes (e.g. CreateWhileLessThanMask8Bit) + // to type-name suffixes (e.g. CreateWhileLessThanMaskByte). Because the SDK ref assemblies and + // the corerun may be from different builds, we detect which names exist at runtime via + // reflection and cache delegates. The one-time reflection cost is negligible for benchmarks. internal static class SveMaskHelper { + private static readonly Type[] s_intInt = new[] { typeof(int), typeof(int) }; + private static readonly Type[] s_longLong = new[] { typeof(long), typeof(long) }; + + private static readonly Func> s_maskByte = InitMaskByte(); + private static readonly Func> s_maskUInt16 = InitMaskUInt16(); + private static readonly Func> s_maskInt16 = InitMaskInt16(); + private static readonly Func> s_maskUInt32 = InitMaskUInt32(); + private static readonly Func> s_maskInt32 = InitMaskInt32(); + private static readonly Func> s_maskSingle = InitMaskSingle(); + private static readonly Func> s_maskUInt64 = InitMaskUInt64(); + private static readonly Func> s_maskUInt32Long = InitMaskUInt32Long(); + [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskByte(int left, int right) - { -#if NET11_0_OR_GREATER - return Sve.CreateWhileLessThanMaskByte(left, right); -#else - return Sve.CreateWhileLessThanMask8Bit(left, right); -#endif - } + internal static Vector CreateWhileLessThanMaskByte(int left, int right) => s_maskByte(left, right); [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskUInt16(int left, int right) - { -#if NET11_0_OR_GREATER - return Sve.CreateWhileLessThanMaskUInt16(left, right); -#else - return Sve.CreateWhileLessThanMask16Bit(left, right); -#endif - } + internal static Vector CreateWhileLessThanMaskUInt16(int left, int right) => s_maskUInt16(left, right); [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskInt16(int left, int right) - { -#if NET11_0_OR_GREATER - return Sve.CreateWhileLessThanMaskInt16(left, right); -#else - return (Vector)Sve.CreateWhileLessThanMask16Bit(left, right); -#endif - } + internal static Vector CreateWhileLessThanMaskInt16(int left, int right) => s_maskInt16(left, right); [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskUInt32(int left, int right) - { -#if NET11_0_OR_GREATER - return Sve.CreateWhileLessThanMaskUInt32(left, right); -#else - return Sve.CreateWhileLessThanMask32Bit(left, right); -#endif - } + internal static Vector CreateWhileLessThanMaskUInt32(int left, int right) => s_maskUInt32(left, right); [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskInt32(int left, int right) - { -#if NET11_0_OR_GREATER - return Sve.CreateWhileLessThanMaskInt32(left, right); -#else - return (Vector)Sve.CreateWhileLessThanMask32Bit(left, right); -#endif - } + internal static Vector CreateWhileLessThanMaskInt32(int left, int right) => s_maskInt32(left, right); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector CreateWhileLessThanMaskSingle(int left, int right) => s_maskSingle(left, right); [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskSingle(int left, int right) + internal static Vector CreateWhileLessThanMaskUInt64(int left, int right) => s_maskUInt64(left, right); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector CreateWhileLessThanMaskUInt32(long left, long right) => s_maskUInt32Long(left, right); + + // Helpers to resolve method by new name first, then old name. + private static MethodInfo Resolve(string newName, string oldName, Type[] paramTypes) { -#if NET11_0_OR_GREATER - return Sve.CreateWhileLessThanMaskSingle(left, right); -#else - return (Vector)Sve.CreateWhileLessThanMask32Bit(left, right); -#endif + return typeof(Sve).GetMethod(newName, BindingFlags.Public | BindingFlags.Static, null, paramTypes, null) + ?? typeof(Sve).GetMethod(oldName, BindingFlags.Public | BindingFlags.Static, null, paramTypes, null) + ?? throw new PlatformNotSupportedException($"Neither '{newName}' nor '{oldName}' found on Sve."); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskUInt64(int left, int right) + private static TDelegate Bind(string newName, string oldName, Type[] paramTypes) where TDelegate : Delegate + => (TDelegate)Delegate.CreateDelegate(typeof(TDelegate), Resolve(newName, oldName, paramTypes)); + + // Direct delegate binding — return type matches between old and new names. + private static Func> InitMaskByte() + => Bind>>("CreateWhileLessThanMaskByte", "CreateWhileLessThanMask8Bit", s_intInt); + + private static Func> InitMaskUInt16() + => Bind>>("CreateWhileLessThanMaskUInt16", "CreateWhileLessThanMask16Bit", s_intInt); + + private static Func> InitMaskUInt32() + => Bind>>("CreateWhileLessThanMaskUInt32", "CreateWhileLessThanMask32Bit", s_intInt); + + private static Func> InitMaskUInt64() + => Bind>>("CreateWhileLessThanMaskUInt64", "CreateWhileLessThanMask64Bit", s_intInt); + + private static Func> InitMaskUInt32Long() + => Bind>>("CreateWhileLessThanMaskUInt32", "CreateWhileLessThanMask32Bit", s_longLong); + + // Cast-wrapping delegates — old name returns unsigned type, but caller needs signed/float reinterpret. + private static Func> InitMaskInt16() { -#if NET11_0_OR_GREATER - return Sve.CreateWhileLessThanMaskUInt64(left, right); -#else - return Sve.CreateWhileLessThanMask64Bit(left, right); -#endif + var m = typeof(Sve).GetMethod("CreateWhileLessThanMaskInt16", BindingFlags.Public | BindingFlags.Static, null, s_intInt, null); + if (m != null) return (Func>)Delegate.CreateDelegate(typeof(Func>), m); + var old = Bind>>("CreateWhileLessThanMaskUInt16", "CreateWhileLessThanMask16Bit", s_intInt); + return (l, r) => { var v = old(l, r); return Unsafe.As, Vector>(ref v); }; } - // long overloads for benchmarks that use long loop counters (e.g. Partition.cs) + private static Func> InitMaskInt32() + { + var m = typeof(Sve).GetMethod("CreateWhileLessThanMaskInt32", BindingFlags.Public | BindingFlags.Static, null, s_intInt, null); + if (m != null) return (Func>)Delegate.CreateDelegate(typeof(Func>), m); + var old = Bind>>("CreateWhileLessThanMaskUInt32", "CreateWhileLessThanMask32Bit", s_intInt); + return (l, r) => { var v = old(l, r); return Unsafe.As, Vector>(ref v); }; + } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskUInt32(long left, long right) + private static Func> InitMaskSingle() { -#if NET11_0_OR_GREATER - return Sve.CreateWhileLessThanMaskUInt32(left, right); -#else - return Sve.CreateWhileLessThanMask32Bit(left, right); -#endif + var m = typeof(Sve).GetMethod("CreateWhileLessThanMaskSingle", BindingFlags.Public | BindingFlags.Static, null, s_intInt, null); + if (m != null) return (Func>)Delegate.CreateDelegate(typeof(Func>), m); + var old = Bind>>("CreateWhileLessThanMaskUInt32", "CreateWhileLessThanMask32Bit", s_intInt); + return (l, r) => { var v = old(l, r); return Unsafe.As, Vector>(ref v); }; } } } From dde80e899d8f9d6fb3f502aaab3fd861160f761c Mon Sep 17 00:00:00 2001 From: Parker Bibus Date: Wed, 1 Apr 2026 12:39:31 -0700 Subject: [PATCH 4/5] Revert "Temporarily disable all non-SVE perf jobs for test run" This reverts commit 048882bdb00f091a16a5913c06a565f6b1a45269. --- eng/pipelines/runtime-perf-jobs.yml | 33 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/eng/pipelines/runtime-perf-jobs.yml b/eng/pipelines/runtime-perf-jobs.yml index 43ae10b7a65..f0361093463 100644 --- a/eng/pipelines/runtime-perf-jobs.yml +++ b/eng/pipelines/runtime-perf-jobs.yml @@ -14,7 +14,7 @@ parameters: - name: viperMicro type: object default: - enabled: false + enabled: true configs: - linux_x64 - windows_x64 @@ -22,46 +22,46 @@ parameters: - name: viperMicroNoR2R type: object default: - enabled: false + enabled: true configs: - linux_x64 - windows_x64 - name: viperMicroR2RInterpreter type: object default: - enabled: false + enabled: true configs: - linux_x64 - name: monoMicro type: object default: - enabled: false + enabled: true configs: - linux_x64 - name: monoInterpreter type: object default: - enabled: false + enabled: true configs: - linux_x64 - name: monoAot type: object default: - enabled: false + enabled: true configs: - linux_x64 - name: androidMonoJit type: object default: - enabled: false + enabled: true - name: androidMonoAot type: object default: - enabled: false + enabled: true - name: androidCoreclrJit type: object default: - enabled: false + enabled: true - name: cobaltSveMicro type: object default: @@ -71,7 +71,7 @@ parameters: - name: androidCoreclrR2r type: object default: - enabled: false + enabled: true jobs: - template: /eng/pipelines/performance/templates/perf-build-jobs.yml@${{ parameters.runtimeRepoAlias }} @@ -79,13 +79,12 @@ jobs: perfBranch: ${{ parameters.perfBranch }} - ${{ if not(startswith(variables['Build.SourceBranch'], 'refs/heads/release')) }}: - # Build and run iOS Mono and NativeAOT scenarios — disabled for SVE-only test run - - ${{ if false }}: - - template: /eng/pipelines/runtime-ios-scenarios-perf-jobs.yml - parameters: - runtimeRepoAlias: ${{ parameters.runtimeRepoAlias }} - performanceRepoAlias: ${{ parameters.performanceRepoAlias }} - jobParameters: ${{ parameters.jobParameters }} + # Build and run iOS Mono and NativeAOT scenarios + - template: /eng/pipelines/runtime-ios-scenarios-perf-jobs.yml + parameters: + runtimeRepoAlias: ${{ parameters.runtimeRepoAlias }} + performanceRepoAlias: ${{ parameters.performanceRepoAlias }} + jobParameters: ${{ parameters.jobParameters }} # Android Mono JIT — controlled by androidMonoJit toggle - ${{ if eq(parameters.androidMonoJit.enabled, true) }}: From f14206a06641cb13cdcd1f861869f3c0672b271c Mon Sep 17 00:00:00 2001 From: Drew Scoggins Date: Wed, 1 Apr 2026 16:41:07 -0700 Subject: [PATCH 5/5] Set SVE tests to only use new APIs on net11.0 --- src/benchmarks/micro/MicroBenchmarks.csproj | 26 +++++ src/benchmarks/micro/sve/Clamp.cs | 6 +- src/benchmarks/micro/sve/ComplexDotProduct.cs | 8 +- src/benchmarks/micro/sve/ComplexMultiply.cs | 8 +- src/benchmarks/micro/sve/Exponent.cs | 4 +- src/benchmarks/micro/sve/FP64Overflow.cs | 8 +- src/benchmarks/micro/sve/FastDivision.cs | 4 +- src/benchmarks/micro/sve/GatherLoad.cs | 4 +- src/benchmarks/micro/sve/Logarithm.cs | 4 +- src/benchmarks/micro/sve/MultiplyAdd.cs | 4 +- src/benchmarks/micro/sve/MultiplyPow2.cs | 4 +- src/benchmarks/micro/sve/OddEvenSort.cs | 4 +- src/benchmarks/micro/sve/PairwiseAdd.cs | 8 +- src/benchmarks/micro/sve/Partition.cs | 4 +- src/benchmarks/micro/sve/ScatterStore.cs | 4 +- src/benchmarks/micro/sve/SobelFilter.cs | 8 +- src/benchmarks/micro/sve/SquareRoot.cs | 4 +- src/benchmarks/micro/sve/StrCmp.cs | 4 +- src/benchmarks/micro/sve/StrIndexOf.cs | 4 +- src/benchmarks/micro/sve/StrLen.cs | 4 +- src/benchmarks/micro/sve/SveMaskHelper.cs | 107 ------------------ src/benchmarks/micro/sve/TCPChecksum.cs | 4 +- src/benchmarks/micro/sve/UpscaleFilter.cs | 4 +- src/benchmarks/micro/sve/VectorMax.cs | 6 +- 24 files changed, 82 insertions(+), 163 deletions(-) delete mode 100644 src/benchmarks/micro/sve/SveMaskHelper.cs diff --git a/src/benchmarks/micro/MicroBenchmarks.csproj b/src/benchmarks/micro/MicroBenchmarks.csproj index 9e21bcdb8a3..665ba21a9a3 100644 --- a/src/benchmarks/micro/MicroBenchmarks.csproj +++ b/src/benchmarks/micro/MicroBenchmarks.csproj @@ -263,6 +263,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/benchmarks/micro/sve/Clamp.cs b/src/benchmarks/micro/sve/Clamp.cs index ba044edbf3f..f90a2032780 100644 --- a/src/benchmarks/micro/sve/Clamp.cs +++ b/src/benchmarks/micro/sve/Clamp.cs @@ -90,7 +90,7 @@ public unsafe void SveClamp() Vector valVec = new Vector(Size / 2); Vector minVec = Vector.Indices; Vector pTrue = Sve.CreateTrueMaskInt32(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, length); + Vector pLoop = Sve.CreateWhileLessThanMaskInt32(i, length); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector maxVec = Sve.ShiftLeftLogical(minVec, Vector.One); @@ -99,7 +99,7 @@ public unsafe void SveClamp() minVec = Sve.Add(minVec, new Vector(cntw)); i += cntw; - pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, length); + pLoop = Sve.CreateWhileLessThanMaskInt32(i, length); } _output = (int)Sve.AddAcross(resVec).ToScalar(); } @@ -116,7 +116,7 @@ public unsafe void SveTail() Vector minVec = Vector.Indices; for (; i < length; i += cntw) { - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, length); + Vector pLoop = Sve.CreateWhileLessThanMaskInt32(i, length); Vector maxVec = Sve.ShiftLeftLogical(minVec, Vector.One); Vector tmpVec = Sve.Min(Sve.Max(valVec, minVec), maxVec); resVec = Sve.ConditionalSelect(pLoop, Sve.Add(resVec, tmpVec), resVec); diff --git a/src/benchmarks/micro/sve/ComplexDotProduct.cs b/src/benchmarks/micro/sve/ComplexDotProduct.cs index 670de3a246a..7483637b41d 100644 --- a/src/benchmarks/micro/sve/ComplexDotProduct.cs +++ b/src/benchmarks/micro/sve/ComplexDotProduct.cs @@ -213,7 +213,7 @@ public unsafe void SveComplexDotProduct() // Create mask for the imaginary half of a word. Vector imMask = (Vector)(new Vector(0xFFFF0000u)); Vector pTrue = Sve.CreateTrueMaskInt32(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(0, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { // Load inputs. @@ -248,7 +248,7 @@ public unsafe void SveComplexDotProduct() // Handle loop. i += cntw; - pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, Size); + pLoop = Sve.CreateWhileLessThanMaskInt32(i, Size); } } } @@ -263,7 +263,7 @@ public unsafe void Sve2ComplexDotProduct() int cntw = (int)Sve.Count32BitElements(); Vector pTrue = Sve.CreateTrueMaskInt32(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(0, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector a1 = (Vector)Sve.LoadVector(pLoop, (int*)(a + 4 * i)); @@ -276,7 +276,7 @@ public unsafe void Sve2ComplexDotProduct() // Handle loop. i += cntw; - pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, Size); + pLoop = Sve.CreateWhileLessThanMaskInt32(i, Size); } } } diff --git a/src/benchmarks/micro/sve/ComplexMultiply.cs b/src/benchmarks/micro/sve/ComplexMultiply.cs index 22c2d7bae82..06c5b5402e1 100644 --- a/src/benchmarks/micro/sve/ComplexMultiply.cs +++ b/src/benchmarks/micro/sve/ComplexMultiply.cs @@ -135,11 +135,11 @@ public unsafe void SveComplexMultiply() // Handle remaining elements using predicates. lmt = Size * 2; - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, lmt); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt32(i, lmt); if (Sve.TestFirstTrue(pTrue, pLoop)) { // Compute the predicate for elements in i + cntw. - Vector pTail = SveMaskHelper.CreateWhileLessThanMaskUInt32(i + cntw, lmt); + Vector pTail = Sve.CreateWhileLessThanMaskUInt32(i + cntw, lmt); // Unzip the predicates pLoop and pTail for 2xVector load/store. Vector pInner = Sve.UnzipEven(pLoop, pTail); @@ -190,7 +190,7 @@ public unsafe void Sve2ComplexMultiply() // Handle remaining elements. lmt = Size; - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, lmt); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt64(i, lmt); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector a1 = (Vector)Sve2.LoadVector(pLoop, (ulong*)a + i); @@ -201,7 +201,7 @@ public unsafe void Sve2ComplexMultiply() Sve.StoreAndZip(pLoop, (ulong*)c + i, (Vector)(c1)); i += cntd; - pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, lmt); + pLoop = Sve.CreateWhileLessThanMaskUInt64(i, lmt); } } } diff --git a/src/benchmarks/micro/sve/Exponent.cs b/src/benchmarks/micro/sve/Exponent.cs index 44f300e19f7..4e36cda4584 100644 --- a/src/benchmarks/micro/sve/Exponent.cs +++ b/src/benchmarks/micro/sve/Exponent.cs @@ -168,7 +168,7 @@ public unsafe void SveExponent() Vector constVec = new Vector(new ReadOnlySpan(&d[3], 4)); Vector pTrue = Sve.CreateTrueMaskUInt32(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector x = (Vector)Sve.LoadVector(pLoop, (uint*)(input + i)); @@ -197,7 +197,7 @@ public unsafe void SveExponent() // Handle loop. i += cntw; - pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); + pLoop = Sve.CreateWhileLessThanMaskUInt32(i, Size); } } } diff --git a/src/benchmarks/micro/sve/FP64Overflow.cs b/src/benchmarks/micro/sve/FP64Overflow.cs index 79702cc602f..6102792ff7e 100644 --- a/src/benchmarks/micro/sve/FP64Overflow.cs +++ b/src/benchmarks/micro/sve/FP64Overflow.cs @@ -153,7 +153,7 @@ public unsafe void SveFP64Overflow() Vector maskVec = new Vector(1023); Vector pTrue = Sve.CreateTrueMaskUInt64(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt64(i, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { // Load Vector as ulong then convert to Vector. @@ -178,7 +178,7 @@ public unsafe void SveFP64Overflow() // Handle loop. i += cntd; - pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); + pLoop = Sve.CreateWhileLessThanMaskUInt64(i, Size); } } } @@ -193,7 +193,7 @@ public unsafe void Sve2FP64Overflow() int cntd = (int)Sve.Count64BitElements(); Vector pTrue = Sve.CreateTrueMaskUInt64(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt64(i, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { // Load input vectors. @@ -214,7 +214,7 @@ public unsafe void Sve2FP64Overflow() // Handle loop. i += cntd; - pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); + pLoop = Sve.CreateWhileLessThanMaskUInt64(i, Size); } } } diff --git a/src/benchmarks/micro/sve/FastDivision.cs b/src/benchmarks/micro/sve/FastDivision.cs index 422b46e37ff..38764883fc6 100644 --- a/src/benchmarks/micro/sve/FastDivision.cs +++ b/src/benchmarks/micro/sve/FastDivision.cs @@ -123,7 +123,7 @@ public unsafe void SveFastDivision() int cntd = (int)Sve.Count64BitElements(); Vector pTrue = Sve.CreateTrueMaskUInt64(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt64(i, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector input1Vec = (Vector)Sve.LoadVector(pLoop, (ulong*)input1 + i); @@ -145,7 +145,7 @@ public unsafe void SveFastDivision() Sve.StoreAndZip(pLoop, (ulong*)output + i, (Vector)outVec); i += cntd; - pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); + pLoop = Sve.CreateWhileLessThanMaskUInt64(i, Size); } } } diff --git a/src/benchmarks/micro/sve/GatherLoad.cs b/src/benchmarks/micro/sve/GatherLoad.cs index d1e8bc8fb5c..35cf8a1c5be 100644 --- a/src/benchmarks/micro/sve/GatherLoad.cs +++ b/src/benchmarks/micro/sve/GatherLoad.cs @@ -80,7 +80,7 @@ public unsafe void SveGatherLoad() Vector resVec = Vector.Zero; Vector pTrue = Sve.CreateTrueMaskUInt32(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { // Load indices @@ -91,7 +91,7 @@ public unsafe void SveGatherLoad() resVec = Sve.Add(resVec, objVec); i += cntw; - pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); + pLoop = Sve.CreateWhileLessThanMaskUInt32(i, Size); } // Add up all elements in resVec. uint res = (uint)Sve.AddAcross(resVec).ToScalar(); diff --git a/src/benchmarks/micro/sve/Logarithm.cs b/src/benchmarks/micro/sve/Logarithm.cs index e2f37db9c6e..9222e7521c4 100644 --- a/src/benchmarks/micro/sve/Logarithm.cs +++ b/src/benchmarks/micro/sve/Logarithm.cs @@ -174,7 +174,7 @@ public unsafe void SveLogarithm() Vector pTrue = Sve.CreateTrueMaskUInt32(); Vector pTruef = Sve.CreateTrueMaskSingle(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector x = (Vector)Sve.LoadVector(pLoop, (uint*)(input + i)); @@ -239,7 +239,7 @@ public unsafe void SveLogarithm() // Handle loop. i += cntw; - pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); + pLoop = Sve.CreateWhileLessThanMaskUInt32(i, Size); } } } diff --git a/src/benchmarks/micro/sve/MultiplyAdd.cs b/src/benchmarks/micro/sve/MultiplyAdd.cs index d5b8bd33a63..438bb575b11 100644 --- a/src/benchmarks/micro/sve/MultiplyAdd.cs +++ b/src/benchmarks/micro/sve/MultiplyAdd.cs @@ -154,7 +154,7 @@ public unsafe void SveMultiplyAdd() // Handle remaining elements using predicates. lmt = Size; - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, lmt); + Vector pLoop = Sve.CreateWhileLessThanMaskInt32(i, lmt); while (Sve.TestAnyTrue(pTrue, pLoop)) { Vector aVec = Sve.LoadVector(pLoop, a + i); @@ -165,7 +165,7 @@ public unsafe void SveMultiplyAdd() // Increment by a vector length. i += cntw; - pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, lmt); + pLoop = Sve.CreateWhileLessThanMaskInt32(i, lmt); } // Sum up all elements in the 4 result vectors. diff --git a/src/benchmarks/micro/sve/MultiplyPow2.cs b/src/benchmarks/micro/sve/MultiplyPow2.cs index 6167fa6f704..cc13932133a 100644 --- a/src/benchmarks/micro/sve/MultiplyPow2.cs +++ b/src/benchmarks/micro/sve/MultiplyPow2.cs @@ -116,7 +116,7 @@ public unsafe void SveMultiplyPow2() int cntd = (int)Sve.Count64BitElements(); Vector pTrue = Sve.CreateTrueMaskUInt64(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt64(i, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { // Cast the array pointers to ulong so the predicate can be shared. @@ -129,7 +129,7 @@ public unsafe void SveMultiplyPow2() // Handle loop. i += cntd; - pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt64(i, Size); + pLoop = Sve.CreateWhileLessThanMaskUInt64(i, Size); } } } diff --git a/src/benchmarks/micro/sve/OddEvenSort.cs b/src/benchmarks/micro/sve/OddEvenSort.cs index 405f03c01a5..7a17f6044b0 100644 --- a/src/benchmarks/micro/sve/OddEvenSort.cs +++ b/src/benchmarks/micro/sve/OddEvenSort.cs @@ -180,7 +180,7 @@ public unsafe void SveOddEvenSort() for (; j < n - 1; j += (cntw << 1)) { // Get predicate for elements to load/store. - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, (n - j) / 2); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt32(0, (n - j) / 2); // Interleaved load elements. (Vector a0, Vector a1) = Sve.Load2xVectorAndUnzip(pLoop, source + j); @@ -248,7 +248,7 @@ public unsafe void SveTail() // Handle tail using predicates. for (; j < n - 1; j += (cntw << 1)) { - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, (n - j) / 2); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt32(0, (n - j) / 2); (Vector a0, Vector a1) = Sve.Load2xVectorAndUnzip(pLoop, source + j); Vector pCmp = Sve.ConditionalSelect(pLoop, Sve.CompareGreaterThan(a0, a1), Sve.CreateFalseMaskUInt32()); diff --git a/src/benchmarks/micro/sve/PairwiseAdd.cs b/src/benchmarks/micro/sve/PairwiseAdd.cs index 20f21e9dacb..f612e2b2e00 100644 --- a/src/benchmarks/micro/sve/PairwiseAdd.cs +++ b/src/benchmarks/micro/sve/PairwiseAdd.cs @@ -133,11 +133,11 @@ public unsafe void SvePairwiseAdd() // Handle remaining elements using predicates. lmt = Size * 2; - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, lmt); + Vector pLoop = Sve.CreateWhileLessThanMaskInt32(i, lmt); if (Sve.TestFirstTrue(pTrue, pLoop)) { // Compute the predicate for elements in i + cntw. - Vector pTail = SveMaskHelper.CreateWhileLessThanMaskInt32(i + cntw, lmt); + Vector pTail = Sve.CreateWhileLessThanMaskInt32(i + cntw, lmt); // Unzip the predicates pLoop and pTail for 2xVector load/store. Vector pInner = Sve.UnzipEven(pLoop, pTail); @@ -181,7 +181,7 @@ public unsafe void Sve2PairwiseAdd() // Handle remaining elements. lmt = Size * 2; - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, lmt); + Vector pLoop = Sve.CreateWhileLessThanMaskInt32(i, lmt); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector a0 = Sve.LoadVector(pLoop, a + i); @@ -189,7 +189,7 @@ public unsafe void Sve2PairwiseAdd() Vector c0 = Sve2.AddPairwise(a0, b0); Sve.StoreAndZip(pLoop, c + i, c0); i += cntw; - pLoop = SveMaskHelper.CreateWhileLessThanMaskInt32(i, lmt); + pLoop = Sve.CreateWhileLessThanMaskInt32(i, lmt); } } } diff --git a/src/benchmarks/micro/sve/Partition.cs b/src/benchmarks/micro/sve/Partition.cs index e9a47d09523..79433c53cdc 100644 --- a/src/benchmarks/micro/sve/Partition.cs +++ b/src/benchmarks/micro/sve/Partition.cs @@ -86,7 +86,7 @@ public unsafe ulong SvePartition() ); // Create a predicate for the loop. - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt32(i, Size); while (Sve.TestAnyTrue(Sve.CreateTrueMaskUInt32(), pLoop)) { @@ -119,7 +119,7 @@ public unsafe ulong SvePartition() indexRight = Sve.SaturatingIncrementByActiveElementCount(indexRight, pInner); i = Sve.SaturatingIncrementBy32BitElementCount(i, 1); - pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); + pLoop = Sve.CreateWhileLessThanMaskUInt32(i, Size); } return indexRight; diff --git a/src/benchmarks/micro/sve/ScatterStore.cs b/src/benchmarks/micro/sve/ScatterStore.cs index 6ce25faaaee..0728deec252 100644 --- a/src/benchmarks/micro/sve/ScatterStore.cs +++ b/src/benchmarks/micro/sve/ScatterStore.cs @@ -80,7 +80,7 @@ public unsafe void SveScatterStore() Vector ones = Vector.One; Vector pTrue = Sve.CreateTrueMaskUInt32(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector idxVec = Sve.LoadVector(pLoop, indices + i); @@ -89,7 +89,7 @@ public unsafe void SveScatterStore() Sve.Scatter(pLoop, objects, idxVec, ones); i += cntw; - pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); + pLoop = Sve.CreateWhileLessThanMaskUInt32(i, Size); } } } diff --git a/src/benchmarks/micro/sve/SobelFilter.cs b/src/benchmarks/micro/sve/SobelFilter.cs index b4f75d7aec6..ecb06c5caa1 100644 --- a/src/benchmarks/micro/sve/SobelFilter.cs +++ b/src/benchmarks/micro/sve/SobelFilter.cs @@ -195,8 +195,8 @@ public unsafe void SveSobelFilter() Vector resVec; // Load coefficients of the filter into vectors. - Vector kxVec = Sve.LoadVector(SveMaskHelper.CreateWhileLessThanMaskSingle(0, 3), kx); - Vector kyVec = Sve.LoadVector(SveMaskHelper.CreateWhileLessThanMaskSingle(0, 3), ky); + Vector kxVec = Sve.LoadVector(Sve.CreateWhileLessThanMaskSingle(0, 3), kx); + Vector kyVec = Sve.LoadVector(Sve.CreateWhileLessThanMaskSingle(0, 3), ky); for (int j = 0; j < img_size; j++) { // Load the elements from input and output the intermediate result to temp. @@ -205,7 +205,7 @@ public unsafe void SveSobelFilter() for (int i = 0; i < out_size; i += cntw) { - Vector pRow = SveMaskHelper.CreateWhileLessThanMaskSingle(i, out_size); + Vector pRow = Sve.CreateWhileLessThanMaskSingle(i, out_size); // Load input elements from the next 3 columns. Vector col0 = Sve.LoadVector(pRow, in_ptr + i); @@ -228,7 +228,7 @@ public unsafe void SveSobelFilter() for (int i = 0; i < out_size; i += cntw) { - Vector pRow = SveMaskHelper.CreateWhileLessThanMaskSingle(i, out_size); + Vector pRow = Sve.CreateWhileLessThanMaskSingle(i, out_size); // Load input elements from the next 3 rows. Vector row0 = Sve.LoadVector(pRow, in_ptr + i); diff --git a/src/benchmarks/micro/sve/SquareRoot.cs b/src/benchmarks/micro/sve/SquareRoot.cs index 888d32c63d0..d2e2f4749ab 100644 --- a/src/benchmarks/micro/sve/SquareRoot.cs +++ b/src/benchmarks/micro/sve/SquareRoot.cs @@ -94,7 +94,7 @@ public unsafe void SveSquareRoot() // We use Vector for predicates since there are no Vector // overloads for TestFirstTrue and CreateWhileLessThanMask etc. Vector pTrue = Sve.CreateTrueMaskUInt32(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(0, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt32(0, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { // Since pLoop is a Vector predicate, we load the input as uint array, @@ -107,7 +107,7 @@ public unsafe void SveSquareRoot() // Handle loop. i += cntw; - pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt32(i, Size); + pLoop = Sve.CreateWhileLessThanMaskUInt32(i, Size); } } } diff --git a/src/benchmarks/micro/sve/StrCmp.cs b/src/benchmarks/micro/sve/StrCmp.cs index ae0c30a0bf6..14cf905a62e 100644 --- a/src/benchmarks/micro/sve/StrCmp.cs +++ b/src/benchmarks/micro/sve/StrCmp.cs @@ -120,7 +120,7 @@ public unsafe long SveStrCmp() int elemsInVector = (int)Sve.Count8BitElements(); Vector ptrue = Sve.CreateTrueMaskByte(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskByte(i, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskByte(i, Size); Vector cmp = Vector.Zero; Vector arr1_data, arr2_data; @@ -141,7 +141,7 @@ public unsafe long SveStrCmp() i += elemsInVector; - pLoop = SveMaskHelper.CreateWhileLessThanMaskByte(i, Size); + pLoop = Sve.CreateWhileLessThanMaskByte(i, Size); } // create a bitmask to find position of changed value diff --git a/src/benchmarks/micro/sve/StrIndexOf.cs b/src/benchmarks/micro/sve/StrIndexOf.cs index fe31fbde90c..e0da41462d1 100644 --- a/src/benchmarks/micro/sve/StrIndexOf.cs +++ b/src/benchmarks/micro/sve/StrIndexOf.cs @@ -109,7 +109,7 @@ public unsafe int SveIndexOf() fixed (char* arr_ptr = _array) { Vector target = new Vector((ushort)_searchValue); - var pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt16(i, Size); + var pLoop = Sve.CreateWhileLessThanMaskUInt16(i, Size); while (Sve.TestFirstTrue(Sve.CreateTrueMaskUInt16(), pLoop)) { @@ -126,7 +126,7 @@ public unsafe int SveIndexOf() } i += (int)Sve.Count16BitElements(); - pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt16(i, Size); + pLoop = Sve.CreateWhileLessThanMaskUInt16(i, Size); } return -1; diff --git a/src/benchmarks/micro/sve/StrLen.cs b/src/benchmarks/micro/sve/StrLen.cs index 1745eac57af..7f33c824338 100644 --- a/src/benchmarks/micro/sve/StrLen.cs +++ b/src/benchmarks/micro/sve/StrLen.cs @@ -120,7 +120,7 @@ public unsafe ulong SveStrLen() ulong i = 0; ulong elemsInVector = Sve.Count8BitElements(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskByte((int)i, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskByte((int)i, Size); fixed (byte* arr_ptr = _array) { @@ -134,7 +134,7 @@ public unsafe ulong SveStrLen() else { i += elemsInVector; - pLoop = SveMaskHelper.CreateWhileLessThanMaskByte((int)i, Size); + pLoop = Sve.CreateWhileLessThanMaskByte((int)i, Size); } } diff --git a/src/benchmarks/micro/sve/SveMaskHelper.cs b/src/benchmarks/micro/sve/SveMaskHelper.cs deleted file mode 100644 index 6bf7d468328..00000000000 --- a/src/benchmarks/micro/sve/SveMaskHelper.cs +++ /dev/null @@ -1,107 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System; -using System.Numerics; -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.Intrinsics.Arm; - -namespace SveBenchmarks -{ - // Compatibility shim for CreateWhileLessThanMask API rename. - // The runtime renamed these methods from bit-width suffixes (e.g. CreateWhileLessThanMask8Bit) - // to type-name suffixes (e.g. CreateWhileLessThanMaskByte). Because the SDK ref assemblies and - // the corerun may be from different builds, we detect which names exist at runtime via - // reflection and cache delegates. The one-time reflection cost is negligible for benchmarks. - internal static class SveMaskHelper - { - private static readonly Type[] s_intInt = new[] { typeof(int), typeof(int) }; - private static readonly Type[] s_longLong = new[] { typeof(long), typeof(long) }; - - private static readonly Func> s_maskByte = InitMaskByte(); - private static readonly Func> s_maskUInt16 = InitMaskUInt16(); - private static readonly Func> s_maskInt16 = InitMaskInt16(); - private static readonly Func> s_maskUInt32 = InitMaskUInt32(); - private static readonly Func> s_maskInt32 = InitMaskInt32(); - private static readonly Func> s_maskSingle = InitMaskSingle(); - private static readonly Func> s_maskUInt64 = InitMaskUInt64(); - private static readonly Func> s_maskUInt32Long = InitMaskUInt32Long(); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskByte(int left, int right) => s_maskByte(left, right); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskUInt16(int left, int right) => s_maskUInt16(left, right); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskInt16(int left, int right) => s_maskInt16(left, right); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskUInt32(int left, int right) => s_maskUInt32(left, right); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskInt32(int left, int right) => s_maskInt32(left, right); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskSingle(int left, int right) => s_maskSingle(left, right); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskUInt64(int left, int right) => s_maskUInt64(left, right); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector CreateWhileLessThanMaskUInt32(long left, long right) => s_maskUInt32Long(left, right); - - // Helpers to resolve method by new name first, then old name. - private static MethodInfo Resolve(string newName, string oldName, Type[] paramTypes) - { - return typeof(Sve).GetMethod(newName, BindingFlags.Public | BindingFlags.Static, null, paramTypes, null) - ?? typeof(Sve).GetMethod(oldName, BindingFlags.Public | BindingFlags.Static, null, paramTypes, null) - ?? throw new PlatformNotSupportedException($"Neither '{newName}' nor '{oldName}' found on Sve."); - } - - private static TDelegate Bind(string newName, string oldName, Type[] paramTypes) where TDelegate : Delegate - => (TDelegate)Delegate.CreateDelegate(typeof(TDelegate), Resolve(newName, oldName, paramTypes)); - - // Direct delegate binding — return type matches between old and new names. - private static Func> InitMaskByte() - => Bind>>("CreateWhileLessThanMaskByte", "CreateWhileLessThanMask8Bit", s_intInt); - - private static Func> InitMaskUInt16() - => Bind>>("CreateWhileLessThanMaskUInt16", "CreateWhileLessThanMask16Bit", s_intInt); - - private static Func> InitMaskUInt32() - => Bind>>("CreateWhileLessThanMaskUInt32", "CreateWhileLessThanMask32Bit", s_intInt); - - private static Func> InitMaskUInt64() - => Bind>>("CreateWhileLessThanMaskUInt64", "CreateWhileLessThanMask64Bit", s_intInt); - - private static Func> InitMaskUInt32Long() - => Bind>>("CreateWhileLessThanMaskUInt32", "CreateWhileLessThanMask32Bit", s_longLong); - - // Cast-wrapping delegates — old name returns unsigned type, but caller needs signed/float reinterpret. - private static Func> InitMaskInt16() - { - var m = typeof(Sve).GetMethod("CreateWhileLessThanMaskInt16", BindingFlags.Public | BindingFlags.Static, null, s_intInt, null); - if (m != null) return (Func>)Delegate.CreateDelegate(typeof(Func>), m); - var old = Bind>>("CreateWhileLessThanMaskUInt16", "CreateWhileLessThanMask16Bit", s_intInt); - return (l, r) => { var v = old(l, r); return Unsafe.As, Vector>(ref v); }; - } - - private static Func> InitMaskInt32() - { - var m = typeof(Sve).GetMethod("CreateWhileLessThanMaskInt32", BindingFlags.Public | BindingFlags.Static, null, s_intInt, null); - if (m != null) return (Func>)Delegate.CreateDelegate(typeof(Func>), m); - var old = Bind>>("CreateWhileLessThanMaskUInt32", "CreateWhileLessThanMask32Bit", s_intInt); - return (l, r) => { var v = old(l, r); return Unsafe.As, Vector>(ref v); }; - } - - private static Func> InitMaskSingle() - { - var m = typeof(Sve).GetMethod("CreateWhileLessThanMaskSingle", BindingFlags.Public | BindingFlags.Static, null, s_intInt, null); - if (m != null) return (Func>)Delegate.CreateDelegate(typeof(Func>), m); - var old = Bind>>("CreateWhileLessThanMaskUInt32", "CreateWhileLessThanMask32Bit", s_intInt); - return (l, r) => { var v = old(l, r); return Unsafe.As, Vector>(ref v); }; - } - } -} diff --git a/src/benchmarks/micro/sve/TCPChecksum.cs b/src/benchmarks/micro/sve/TCPChecksum.cs index ebb0f35697f..3463d688ab1 100644 --- a/src/benchmarks/micro/sve/TCPChecksum.cs +++ b/src/benchmarks/micro/sve/TCPChecksum.cs @@ -177,7 +177,7 @@ public unsafe void SveTCPChecksum() int i = 0; Vector acc = Vector.Zero; - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt16(0, lengthWords); + Vector pLoop = Sve.CreateWhileLessThanMaskUInt16(0, lengthWords); while (Sve.TestAnyTrue(pTrue, pLoop)) { Vector d = Sve.LoadVector(pLoop, ((ushort*)p) + i); @@ -187,7 +187,7 @@ public unsafe void SveTCPChecksum() // Handle loop predicate. i += (int)Sve.Count16BitElements(); - pLoop = SveMaskHelper.CreateWhileLessThanMaskUInt16(i, lengthWords); + pLoop = Sve.CreateWhileLessThanMaskUInt16(i, lengthWords); } // Reduce result to scalar. ulong sum = Sve.AddAcross(acc).ToScalar(); diff --git a/src/benchmarks/micro/sve/UpscaleFilter.cs b/src/benchmarks/micro/sve/UpscaleFilter.cs index 2d1c6e43602..f2b13df89be 100644 --- a/src/benchmarks/micro/sve/UpscaleFilter.cs +++ b/src/benchmarks/micro/sve/UpscaleFilter.cs @@ -137,7 +137,7 @@ public unsafe void Sve2UpscaleFilter() { int lmt = Size - 1; int i = 0; - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskByte(0, lmt); + Vector pLoop = Sve.CreateWhileLessThanMaskByte(0, lmt); while (Sve.TestAnyTrue(pTrue, pLoop)) { // Load two consecutive samples. @@ -168,7 +168,7 @@ public unsafe void Sve2UpscaleFilter() Sve.StoreAndZip(pLoop, output + i * 2, (b0, b1)); i += (int)Sve.Count8BitElements(); - pLoop = SveMaskHelper.CreateWhileLessThanMaskByte(i, lmt); + pLoop = Sve.CreateWhileLessThanMaskByte(i, lmt); } } } diff --git a/src/benchmarks/micro/sve/VectorMax.cs b/src/benchmarks/micro/sve/VectorMax.cs index d23ba99fecb..2e040da2104 100644 --- a/src/benchmarks/micro/sve/VectorMax.cs +++ b/src/benchmarks/micro/sve/VectorMax.cs @@ -129,7 +129,7 @@ public unsafe void SveVectorMax() short cnth = (short)Sve.Count16BitElements(); Vector pTrue = Sve.CreateTrueMaskInt16(); - Vector pLoop = SveMaskHelper.CreateWhileLessThanMaskInt16(0, Size); + Vector pLoop = Sve.CreateWhileLessThanMaskInt16(0, Size); Vector idxVec = Vector.Indices; // Initialize the first vector worth of values. @@ -137,7 +137,7 @@ public unsafe void SveVectorMax() Vector maxIdxVec = idxVec; i += cnth; - pLoop = SveMaskHelper.CreateWhileLessThanMaskInt16(i, Size); + pLoop = Sve.CreateWhileLessThanMaskInt16(i, Size); while (Sve.TestFirstTrue(pTrue, pLoop)) { Vector val = Sve.LoadVector(pLoop, input + i); @@ -152,7 +152,7 @@ public unsafe void SveVectorMax() // Handle loop. i += cnth; - pLoop = SveMaskHelper.CreateWhileLessThanMaskInt16(i, Size); + pLoop = Sve.CreateWhileLessThanMaskInt16(i, Size); } // Get the maximum element across the max vector.