Skip to content

Commit ba86cc0

Browse files
alsepkowdamyanp
andauthored
Execution Tests: Long Vector - Add basic derivative and quad op tests (#7938)
This PR resolves #7471 All new tests were validated against a local build of WARP with fixes for the quad and derivative ops with long vector arguments. --------- Co-authored-by: Damyan Pepper <damyanp@microsoft.com>
1 parent 59495c1 commit ba86cc0

File tree

3 files changed

+195
-7
lines changed

3 files changed

+195
-7
lines changed

tools/clang/unittests/HLSLExec/LongVectorOps.def

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,13 +212,40 @@ OP_DEFAULT_DEFINES(Wave, WaveActiveBitXor, 1, "TestWaveActiveBitXor", "", " -DFU
212212
OP_DEFAULT_DEFINES(Wave, WaveActiveAllEqual, 1, "TestWaveActiveAllEqual", "", " -DFUNC_WAVE_ACTIVE_ALL_EQUAL=1")
213213
OP_DEFAULT_DEFINES(Wave, WaveReadLaneAt, 1, "TestWaveReadLaneAt", "", " -DFUNC_WAVE_READ_LANE_AT=1")
214214
OP_DEFAULT_DEFINES(Wave, WaveReadLaneFirst, 1, "TestWaveReadLaneFirst", "", " -DFUNC_WAVE_READ_LANE_FIRST=1")
215-
OP_DEFAULT_DEFINES(Wave, WavePrefixSum, 1, "TestWavePrefixSum", "", " -DFUNC_WAVE_PREFIX_SUM=1 -DIS_WAVE_PREFIX_OP=1")
216-
OP_DEFAULT_DEFINES(Wave, WavePrefixProduct, 1, "TestWavePrefixProduct", "", " -DFUNC_WAVE_PREFIX_PRODUCT=1 -DIS_WAVE_PREFIX_OP=1")
215+
OP_DEFAULT_DEFINES(Wave, WavePrefixSum, 1, "TestWavePrefixSum", "", " -DFUNC_WAVE_PREFIX_SUM=1 -DOP_STORES_RESULT_ON_SPECIFIC_LANE=1")
216+
OP_DEFAULT_DEFINES(Wave, WavePrefixProduct, 1, "TestWavePrefixProduct", "", " -DFUNC_WAVE_PREFIX_PRODUCT=1 -DOP_STORES_RESULT_ON_SPECIFIC_LANE=1")
217217
OP(Wave, WaveMultiPrefixSum, 1, "TestWaveMultiPrefixSum", "", " -DFUNC_WAVE_MULTI_PREFIX_SUM=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", Default1, Default2, Default3)
218218
OP(Wave, WaveMultiPrefixProduct, 1, "TestWaveMultiPrefixProduct", "", " -DFUNC_WAVE_MULTI_PREFIX_PRODUCT=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", Default1, Default2, Default3)
219219
OP(Wave, WaveMultiPrefixBitAnd, 1, "TestWaveMultiPrefixBitAnd", "", " -DFUNC_WAVE_MULTI_PREFIX_BIT_AND=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", WaveMultiPrefixBitwise, Default2, Default3)
220220
OP(Wave, WaveMultiPrefixBitOr, 1, "TestWaveMultiPrefixBitOr", "", " -DFUNC_WAVE_MULTI_PREFIX_BIT_OR=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", WaveMultiPrefixBitwise, Default2, Default3)
221221
OP(Wave, WaveMultiPrefixBitXor, 1, "TestWaveMultiPrefixBitXor", "", " -DFUNC_WAVE_MULTI_PREFIX_BIT_XOR=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", WaveMultiPrefixBitwise, Default2, Default3)
222222
OP_DEFAULT_DEFINES(Wave, WaveMatch, 1, "TestWaveMatch", "", " -DFUNC_WAVE_MATCH=1 -DIS_WAVE_PREFIX_OP=1")
223223

224+
#define OP_DERIVATIVE(GROUP, SYMBOL, DERIVATIVE_INTRINSIC) \
225+
OP(GROUP, SYMBOL, 1, "TestDerivative", "", "-DFUNC_TEST_DERIVATIVE=1 \
226+
-DNUMTHREADS_XYZ=2,2,1 -DOP_STORES_RESULT_ON_SPECIFIC_LANE=1" \
227+
" -DDERIVATIVE_FUNC=" DERIVATIVE_INTRINSIC, \
228+
"LongVectorOp", Default2, Default1, Default3)
229+
230+
OP_DERIVATIVE(Derivative, DerivativeDdx, "ddx")
231+
OP_DERIVATIVE(Derivative, DerivativeDdy, "ddy")
232+
OP_DERIVATIVE(Derivative, DerivativeDdxFine, "ddx_fine")
233+
OP_DERIVATIVE(Derivative, DerivativeDdyFine, "ddy_fine")
234+
235+
#undef OP_DERIVATIVE
236+
237+
#define OP_QUAD_READ(GROUP, ARITY, SYMBOL, QUAD_INTRINSIC, SOURCE_LANE_ID) \
238+
OP(GROUP, SYMBOL, ARITY, "TestQuadRead", "", "-DFUNC_TEST_QUAD_READ=1" \
239+
" -DNUMTHREADS_XYZ=2,2,1 -DOP_STORES_RESULT_ON_SPECIFIC_LANE=1" \
240+
" -DQUAD_READ_FUNC=" QUAD_INTRINSIC \
241+
" -DSOURCE_LANE_ID=" SOURCE_LANE_ID, \
242+
"LongVectorOp", Default1, Default2, Default3)
243+
244+
OP_QUAD_READ(Quad, 2, QuadReadLaneAt, "QuadReadLaneAt", "2")
245+
OP_QUAD_READ(Quad, 1, QuadReadAcrossX, "QuadReadAcrossX", "2")
246+
OP_QUAD_READ(Quad, 1, QuadReadAcrossY, "QuadReadAcrossY", "1")
247+
OP_QUAD_READ(Quad, 1, QuadReadAcrossDiagonal, "QuadReadAcrossDiagonal", "0")
248+
249+
#undef OP_QUAD_READ
250+
224251
#undef OP

tools/clang/unittests/HLSLExec/LongVectors.cpp

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1339,6 +1339,55 @@ template <typename T> struct ExpectedBuilder<OpType::ModF, T> {
13391339
}
13401340
};
13411341

1342+
//
1343+
// Derivative Ops
1344+
//
1345+
1346+
// Coarse derivatives (ddx/ddy): All lanes in quad get same result
1347+
// Fine derivatives (ddx_fine/ddy_fine): Each lane gets unique result
1348+
// For testing, we validate results on lane 3 to keep validation generic
1349+
//
1350+
// The value of A in each lane is computed by : A = A + LaneID*2
1351+
//
1352+
// Top right (lane 1) - Top Left (lane 0)
1353+
DEFAULT_OP_1(OpType::DerivativeDdx, ((A + 2) - (A + 0)));
1354+
// Lower left (lane 2) - Top Left (lane 0)
1355+
DEFAULT_OP_1(OpType::DerivativeDdy, ((A + 4) - (A + 0)));
1356+
1357+
// Bottom right (lane 3) - Bottom left (lane 2)
1358+
DEFAULT_OP_1(OpType::DerivativeDdxFine, ((A + 6) - (A + 4)));
1359+
// Bottom right (lane 3) - Top right (lane 1)
1360+
DEFAULT_OP_1(OpType::DerivativeDdyFine, ((A + 6) - (A + 2)));
1361+
1362+
//
1363+
// Quad Read Ops
1364+
//
1365+
1366+
// We keep things generic so we can re-use this macro for all quad ops.
1367+
// The lane we write to is determined via a defines in the shader code.
1368+
// See TestQuadRead in ShaderOpArith.xml.
1369+
// For all cases we simply fill the vector on that lane with the value of the
1370+
// third element.
1371+
#define QUAD_READ_OP(OP, ARITY) \
1372+
template <typename T> struct Op<OP, T, ARITY> : DefaultValidation<T> {}; \
1373+
template <typename T> struct ExpectedBuilder<OP, T> { \
1374+
static std::vector<T> buildExpected(Op<OP, T, ARITY> &, \
1375+
const InputSets<T> &Inputs) { \
1376+
DXASSERT_NOMSG(Inputs.size() == ARITY); \
1377+
std::vector<T> Expected; \
1378+
const size_t VectorSize = Inputs[0].size(); \
1379+
Expected.assign(VectorSize, Inputs[0][2]); \
1380+
return Expected; \
1381+
} \
1382+
};
1383+
1384+
QUAD_READ_OP(OpType::QuadReadLaneAt, 2);
1385+
QUAD_READ_OP(OpType::QuadReadAcrossX, 1);
1386+
QUAD_READ_OP(OpType::QuadReadAcrossY, 1);
1387+
QUAD_READ_OP(OpType::QuadReadAcrossDiagonal, 1);
1388+
1389+
#undef QUAD_READ_OP
1390+
13421391
//
13431392
// Wave Ops
13441393
//
@@ -1701,7 +1750,7 @@ void dispatchWaveOpTest(ID3D12Device *D3DDevice, bool VerboseLogging,
17011750

17021751
const std::string AdditionalCompilerOptions =
17031752
"-DWAVE_SIZE=" + std::to_string(WaveSize) +
1704-
" -DNUMTHREADS_X=" + std::to_string(WaveSize);
1753+
" -DNUMTHREADS_XYZ=" + std::to_string(WaveSize) + ",1,1 ";
17051754

17061755
for (size_t VectorSize : InputVectorSizes) {
17071756
std::vector<std::vector<T>> Inputs =
@@ -2493,6 +2542,60 @@ class DxilConf_SM69_Vectorized {
24932542
HLK_TEST(LoadAndStore_RD_SB_SRV, double);
24942543
HLK_TEST(LoadAndStore_RD_SB_UAV, double);
24952544

2545+
// Derivative
2546+
HLK_TEST(DerivativeDdx, HLSLHalf_t);
2547+
HLK_TEST(DerivativeDdy, HLSLHalf_t);
2548+
HLK_TEST(DerivativeDdxFine, HLSLHalf_t);
2549+
HLK_TEST(DerivativeDdyFine, HLSLHalf_t);
2550+
HLK_TEST(DerivativeDdx, float);
2551+
HLK_TEST(DerivativeDdy, float);
2552+
HLK_TEST(DerivativeDdxFine, float);
2553+
HLK_TEST(DerivativeDdyFine, float);
2554+
2555+
// Quad
2556+
HLK_TEST(QuadReadLaneAt, HLSLBool_t);
2557+
HLK_TEST(QuadReadAcrossX, HLSLBool_t);
2558+
HLK_TEST(QuadReadAcrossY, HLSLBool_t);
2559+
HLK_TEST(QuadReadAcrossDiagonal, HLSLBool_t);
2560+
HLK_TEST(QuadReadLaneAt, int16_t);
2561+
HLK_TEST(QuadReadAcrossX, int16_t);
2562+
HLK_TEST(QuadReadAcrossY, int16_t);
2563+
HLK_TEST(QuadReadAcrossDiagonal, int16_t);
2564+
HLK_TEST(QuadReadLaneAt, int32_t);
2565+
HLK_TEST(QuadReadAcrossX, int32_t);
2566+
HLK_TEST(QuadReadAcrossY, int32_t);
2567+
HLK_TEST(QuadReadAcrossDiagonal, int32_t);
2568+
HLK_TEST(QuadReadLaneAt, int64_t);
2569+
HLK_TEST(QuadReadAcrossX, int64_t);
2570+
HLK_TEST(QuadReadAcrossY, int64_t);
2571+
HLK_TEST(QuadReadAcrossDiagonal, int64_t);
2572+
HLK_TEST(QuadReadLaneAt, uint16_t);
2573+
HLK_TEST(QuadReadAcrossX, uint16_t);
2574+
HLK_TEST(QuadReadAcrossY, uint16_t);
2575+
HLK_TEST(QuadReadAcrossDiagonal, uint16_t);
2576+
HLK_TEST(QuadReadLaneAt, uint32_t);
2577+
HLK_TEST(QuadReadAcrossX, uint32_t);
2578+
HLK_TEST(QuadReadAcrossY, uint32_t);
2579+
HLK_TEST(QuadReadAcrossDiagonal, uint32_t);
2580+
HLK_TEST(QuadReadLaneAt, uint64_t);
2581+
HLK_TEST(QuadReadAcrossX, uint64_t);
2582+
HLK_TEST(QuadReadAcrossY, uint64_t);
2583+
HLK_TEST(QuadReadAcrossDiagonal, uint64_t);
2584+
HLK_TEST(QuadReadLaneAt, HLSLHalf_t);
2585+
HLK_TEST(QuadReadAcrossX, HLSLHalf_t);
2586+
HLK_TEST(QuadReadAcrossY, HLSLHalf_t);
2587+
HLK_TEST(QuadReadAcrossDiagonal, HLSLHalf_t);
2588+
HLK_TEST(QuadReadLaneAt, float);
2589+
HLK_TEST(QuadReadAcrossX, float);
2590+
HLK_TEST(QuadReadAcrossY, float);
2591+
HLK_TEST(QuadReadAcrossDiagonal, float);
2592+
HLK_TEST(QuadReadLaneAt, double);
2593+
HLK_TEST(QuadReadAcrossX, double);
2594+
HLK_TEST(QuadReadAcrossY, double);
2595+
HLK_TEST(QuadReadAcrossDiagonal, double);
2596+
2597+
// Wave
2598+
24962599
HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLBool_t);
24972600
HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLBool_t);
24982601
HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLBool_t);

tools/clang/unittests/HLSLExec/ShaderOpArith.xml

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4456,8 +4456,67 @@ void MSMain(uint GID : SV_GroupIndex,
44564456
}
44574457
#endif
44584458
4459-
#ifdef NUMTHREADS_X
4460-
#define NUMTHREADS_ATTR [numthreads(NUMTHREADS_X, 1, 1)]
4459+
#ifdef FUNC_TEST_DERIVATIVE
4460+
void TestDerivative(vector<TYPE, NUM> Vector)
4461+
{
4462+
// 0 == upper-left lane in quad
4463+
// 1 == upper-right lane in quad
4464+
// 2 == lower-left lane in quad
4465+
// 3 == lower-right lane in quad
4466+
4467+
const uint LaneIndex = WaveGetLaneIndex();
4468+
4469+
// We need to make sure the values are unique across lanes used in the
4470+
// partial derivative calculation so we can get a non-zero partial
4471+
// derivative. Multiplying the lane index by 2 is a simple way to
4472+
// ensure that. And we do this on all lanes so this function can be
4473+
// used generically for coarse and fine partial derivatives.
4474+
Vector += ((TYPE)(LaneIndex * 2));
4475+
4476+
vector<OUT_TYPE, NUM> Result = DERIVATIVE_FUNC(Vector);
4477+
4478+
// For coarse derivatives, all lanes in the quad get the same result.
4479+
// But for fine derivatives, each lane gets a different result. To
4480+
// keep things generic we only store in the third lane as thats the
4481+
// lane we arbitrarily chose for validation with fine derivatives.
4482+
if(LaneIndex == 3)
4483+
{
4484+
g_OutputVector.Store< vector<OUT_TYPE, NUM> >(0, Result);
4485+
}
4486+
}
4487+
#endif
4488+
4489+
#ifdef FUNC_TEST_QUAD_READ
4490+
void TestQuadRead(vector<TYPE, NUM> Vector)
4491+
{
4492+
const uint LaneIndex = WaveGetLaneIndex();
4493+
4494+
// Fill the long vector with something different on SOURCE_LANE_ID.
4495+
// We choose the 3rd element arbitrarily because it makes it easy
4496+
// to compute expected values CPU side.
4497+
[unroll]
4498+
for(uint i = 0; i < NUM; ++i)
4499+
{
4500+
Vector[i] = (LaneIndex == SOURCE_LANE_ID) ? Vector[2] : Vector[i];
4501+
}
4502+
4503+
#if IS_BINARY_OP
4504+
// QuadReadLaneAt
4505+
vector<OUT_TYPE, NUM> Result = QUAD_READ_FUNC(Vector, SOURCE_LANE_ID);
4506+
#else
4507+
// QuadReadAcross*
4508+
vector<OUT_TYPE, NUM> Result = QUAD_READ_FUNC(Vector);
4509+
#endif
4510+
4511+
if(LaneIndex == 3)
4512+
{
4513+
g_OutputVector.Store< vector<OUT_TYPE, NUM> >(0, Result);
4514+
}
4515+
}
4516+
#endif
4517+
4518+
#ifdef NUMTHREADS_XYZ
4519+
#define NUMTHREADS_ATTR [numthreads(NUMTHREADS_XYZ)]
44614520
#else
44624521
#define NUMTHREADS_ATTR [numthreads(1, 1, 1)]
44634522
#endif
@@ -4498,8 +4557,7 @@ void MSMain(uint GID : SV_GroupIndex,
44984557
#endif
44994558
45004559
vector<OUT_TYPE, OutNum> OutputVector;
4501-
#ifdef IS_WAVE_PREFIX_OP
4502-
// Wave prefix ops store the output on a specific lane only.
4560+
#ifdef OP_STORES_RESULT_ON_SPECIFIC_LANE
45034561
FUNC(Input1);
45044562
return;
45054563
#elif TEST_ARRAY_OPERATOR

0 commit comments

Comments
 (0)