nir,nak: Add a nir_op_prmt_nv

We have this in hardware since forever and it's really useful.  May as
well add it to NIR so we can use it in various lowerings.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30218>
This commit is contained in:
Faith Ekstrand
2024-07-16 17:15:26 -05:00
committed by Marge Bot
parent 3619ec9630
commit bbccbd8d50
2 changed files with 23 additions and 0 deletions

View File

@@ -1371,6 +1371,19 @@ binop_convert("interleave_agx", tuint32, tuint16, "", """
be used as-is for Morton encoding.
""")
# NVIDIA PRMT
opcode("prmt_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32],
False, "", """
dst = 0;
for (unsigned i = 0; i < 4; i++) {
uint8_t byte = (src0 >> (i * 4)) & 0x7;
uint8_t x = byte < 4 ? (src1 >> (byte * 8))
: (src2 >> ((byte - 4) * 8));
if ((src0 >> (i * 4)) & 0x8)
x = ((int8_t)x) >> 7;
dst |= ((uint32_t)x) << i * 8;
}""")
# 24b multiply into 32b result (with sign extension)
binop("imul24", tint32, _2src_commutative + associative,
"(((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8)")

View File

@@ -1428,6 +1428,16 @@ impl<'a> ShaderFromNir<'a> {
b.prmt(low.into(), high.into(), [0, 1, 4, 5])
}
nir_op_prmt_nv => {
let dst = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpPrmt {
dst: dst.into(),
srcs: [srcs[1], srcs[2]],
sel: srcs[0],
mode: PrmtMode::Index,
});
dst
}
nir_op_sdot_4x8_iadd => {
let dst = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpIDp4 {