From 13c75cab9b5231726facd7b447e1ef6c14a8608c Mon Sep 17 00:00:00 2001 From: FangRui Date: Wed, 27 May 2026 17:13:24 +0800 Subject: [PATCH] Relax thistogram verifier for ui32 byte 3 --- docs/PTO_IR_manual.md | 8 +++-- lib/PTO/IR/PTO.cpp | 36 ++++++++++--------- ...stogram_verify_u32_byte3_unused_idx_a5.pto | 14 ++++++++ 3 files changed, 39 insertions(+), 19 deletions(-) create mode 100644 test/lit/pto/thistogram_verify_u32_byte3_unused_idx_a5.pto diff --git a/docs/PTO_IR_manual.md b/docs/PTO_IR_manual.md index a32de72f7..1cb566af4 100644 --- a/docs/PTO_IR_manual.md +++ b/docs/PTO_IR_manual.md @@ -4616,9 +4616,11 @@ pto.thistogram ins(, : , ) - `idx` rows and valid rows must match `src`. - `idx` must have exactly one column. - When `src` is `ui32`: - - `idx` must use `row_major + none_box` layout. - - `idx` cols and valid cols must match `src`. - - `idx` rows / valid rows must be `1` for `byte = 3` or `2`, `2` for `byte = 1`, and `3` for `byte = 0`. + - When `byte = 3`, `idx` is accepted but not semantically used by the A5 backend intrinsic; no additional layout or shape constraints are imposed beyond the generic `tile_buf`, `loc=vec`, `dtype=ui8`, and rank-2 requirements. + - When `byte = 2`, `1`, or `0`, `idx` must use `row_major + none_box` layout. `idx` cols and valid cols must match `src`. + - When `byte = 2`, `idx` rows / valid rows must be `1`. + - When `byte = 1`, `idx` rows / valid rows must be `2`. + - When `byte = 0`, `idx` rows / valid rows must be `3`. **Hardware Mapping:** diff --git a/lib/PTO/IR/PTO.cpp b/lib/PTO/IR/PTO.cpp index 5427ac36e..5d4e57060 100644 --- a/lib/PTO/IR/PTO.cpp +++ b/lib/PTO/IR/PTO.cpp @@ -6887,22 +6887,26 @@ LogicalResult THistogramOp::verify() { if (!isKnownUnitExtent(idxShape[1]) || !isKnownUnitExtent(idxValid[1])) return emitOpError("expects idx to have exactly one column when src element type is ui16"); } else { - if (idxTB.getBLayoutValueI32() != static_cast(pto::BLayout::RowMajor) || - idxTB.getSLayoutValueI32() != static_cast(pto::SLayout::NoneBox)) - return emitOpError( - "expects idx to use row_major + none_box layout when src element type is ui32"); - if (!hasCompatibleKnownExtent(srcShape[1], idxShape[1]) || - !hasCompatibleKnownExtent(srcValid[1], idxValid[1])) - return emitOpError("expects idx cols and valid cols to match src when src element type is ui32"); - - int64_t expectedIdxRows = 1; - if (byte == 1) - expectedIdxRows = 2; - else if (byte == 0) - expectedIdxRows = 3; - if (!hasCompatibleKnownExtent(idxShape[0], expectedIdxRows) || - !hasCompatibleKnownExtent(idxValid[0], expectedIdxRows)) - return emitOpError("expects idx rows/valid rows to match the byte-selected filter depth when src element type is ui32"); + if (byte != 3) { + if (idxTB.getBLayoutValueI32() != static_cast(pto::BLayout::RowMajor) || + idxTB.getSLayoutValueI32() != static_cast(pto::SLayout::NoneBox)) + return emitOpError( + "expects idx to use row_major + none_box layout when src element type is ui32 and byte is 0, 1, or 2"); + if (!hasCompatibleKnownExtent(srcShape[1], idxShape[1]) || + !hasCompatibleKnownExtent(srcValid[1], idxValid[1])) + return emitOpError( + "expects idx cols and valid cols to match src when src element type is ui32 and byte is 0, 1, or 2"); + + int64_t expectedIdxRows = 1; + if (byte == 1) + expectedIdxRows = 2; + else if (byte == 0) + expectedIdxRows = 3; + if (!hasCompatibleKnownExtent(idxShape[0], expectedIdxRows) || + !hasCompatibleKnownExtent(idxValid[0], expectedIdxRows)) + return emitOpError( + "expects idx rows/valid rows to match the byte-selected filter depth when src element type is ui32 and byte is 0, 1, or 2"); + } } if (dstShape[1] != ShapedType::kDynamic && dstShape[1] < 256) return emitOpError("expects dst shape[1] to be at least 256"); diff --git a/test/lit/pto/thistogram_verify_u32_byte3_unused_idx_a5.pto b/test/lit/pto/thistogram_verify_u32_byte3_unused_idx_a5.pto new file mode 100644 index 000000000..2155dd6c6 --- /dev/null +++ b/test/lit/pto/thistogram_verify_u32_byte3_unused_idx_a5.pto @@ -0,0 +1,14 @@ +// RUN: ptoas --pto-arch=a5 %s 2>&1 | FileCheck %s + +module { + func.func @thistogram_verify_u32_byte3_unused_idx_a5() { + %src = pto.alloc_tile : !pto.tile_buf + %idx = pto.alloc_tile : !pto.tile_buf + %dst = pto.alloc_tile : !pto.tile_buf + pto.thistogram ins(%src, %idx : !pto.tile_buf, !pto.tile_buf) + outs(%dst : !pto.tile_buf) {byte = 3 : i32} + return + } +} + +// CHECK-NOT: error: