diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 49814215a45..8040f80fdd1 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1371,6 +1371,19 @@ binop_convert("interleave_agx", tuint32, tuint16, "", """ be used as-is for Morton encoding. """) +# NVIDIA PRMT +opcode("prmt_nv", 0, tuint32, [0, 0, 0], [tuint32, tuint32, tuint32], + False, "", """ + dst = 0; + for (unsigned i = 0; i < 4; i++) { + uint8_t byte = (src0 >> (i * 4)) & 0x7; + uint8_t x = byte < 4 ? (src1 >> (byte * 8)) + : (src2 >> ((byte - 4) * 8)); + if ((src0 >> (i * 4)) & 0x8) + x = ((int8_t)x) >> 7; + dst |= ((uint32_t)x) << i * 8; + }""") + # 24b multiply into 32b result (with sign extension) binop("imul24", tint32, _2src_commutative + associative, "(((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8)") diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index d97dfb8d959..88bdc34a1c1 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -1428,6 +1428,16 @@ impl<'a> ShaderFromNir<'a> { b.prmt(low.into(), high.into(), [0, 1, 4, 5]) } + nir_op_prmt_nv => { + let dst = b.alloc_ssa(RegFile::GPR, 1); + b.push_op(OpPrmt { + dst: dst.into(), + srcs: [srcs[1], srcs[2]], + sel: srcs[0], + mode: PrmtMode::Index, + }); + dst + } nir_op_sdot_4x8_iadd => { let dst = b.alloc_ssa(RegFile::GPR, 1); b.push_op(OpIDp4 {