From 4a0f5fff875a97ffb9daf73f99124c30ac09f426 Mon Sep 17 00:00:00 2001 From: Benjamin Lee Date: Mon, 22 Jan 2024 11:24:38 -0800 Subject: [PATCH] nak: implement rro op on SM50 This is the "range reduction operator", which is needed to preprocess srcs for some of the mufu ops on SM50. Part-of: --- src/nouveau/compiler/nak/encode_sm50.rs | 28 +++++++++++++++++++ src/nouveau/compiler/nak/ir.rs | 37 ++++++++++++++++++++++++- src/nouveau/compiler/nak/legalize.rs | 3 ++ 3 files changed, 67 insertions(+), 1 deletion(-) diff --git a/src/nouveau/compiler/nak/encode_sm50.rs b/src/nouveau/compiler/nak/encode_sm50.rs index 6b108e1085d..85baa2c1822 100644 --- a/src/nouveau/compiler/nak/encode_sm50.rs +++ b/src/nouveau/compiler/nak/encode_sm50.rs @@ -1646,6 +1646,33 @@ impl SM50Instr { self.set_bit(47, false); /* dst.CC */ } + fn encode_rro(&mut self, op: &OpRro) { + match &op.src.src_ref { + SrcRef::Imm32(imm32) => { + self.set_opcode(0x3890); + self.set_src_imm_f20(20..39, 56, *imm32); + } + SrcRef::Zero | SrcRef::Reg(_) => { + self.set_opcode(0x5c90); + self.set_reg_fmod_src(20..28, 49, 45, op.src); + } + SrcRef::CBuf(_) => { + self.set_opcode(0x4c90); + self.set_cb_fmod_src(20..39, 49, 45, op.src); + } + src => panic!("Unsupported src type for RRO: {src}"), + } + + self.set_dst(op.dst); + self.set_field( + 39..40, + match op.op { + RroOp::SinCos => 0u8, + RroOp::Exp2 => 1u8, + }, + ); + } + fn encode_mufu(&mut self, op: &OpMuFu) { assert!(op.src.is_reg_or_zero()); @@ -2014,6 +2041,7 @@ impl SM50Instr { Op::FSet(op) => si.encode_fset(&op), Op::FSetP(op) => si.encode_fsetp(&op), Op::FSwzAdd(op) => si.encode_fswzadd(&op), + Op::Rro(op) => si.encode_rro(&op), Op::MuFu(op) => si.encode_mufu(&op), Op::Flo(op) => si.encode_flo(&op), Op::DAdd(op) => si.encode_dadd(&op), diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 23cc138e5cc..f5acf092f08 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -2414,6 +2414,40 @@ impl DisplayOp for OpFSwzAdd { } impl_display_for_op!(OpFSwzAdd); +pub enum RroOp { + SinCos, + Exp2, +} + +impl fmt::Display for RroOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + RroOp::SinCos => write!(f, ".sincos"), + RroOp::Exp2 => write!(f, ".exp2"), + } + } +} + +/// MuFu range reduction operator +/// +/// Not available on SM70+ +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpRro { + pub dst: Dst, + pub op: RroOp, + + #[src_type(F32)] + pub src: Src, +} + +impl DisplayOp for OpRro { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "rro{} {}", self.op, self.src) + } +} +impl_display_for_op!(OpRro); + #[allow(dead_code)] #[derive(Clone, Copy, Eq, PartialEq)] pub enum MuFuOp { @@ -4775,6 +4809,7 @@ pub enum Op { FFma(OpFFma), FMnMx(OpFMnMx), FMul(OpFMul), + Rro(OpRro), MuFu(OpMuFu), FSet(OpFSet), FSetP(OpFSetP), @@ -5220,7 +5255,7 @@ impl Instr { | Op::FSwzAdd(_) => true, // Multi-function unit is variable latency - Op::MuFu(_) => false, + Op::Rro(_) | Op::MuFu(_) => false, // Double-precision float ALU Op::DAdd(_) diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index 57abdfaa45f..7471e48b4ea 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -226,6 +226,9 @@ fn legalize_sm50_instr( copy_alu_src_if_not_reg(b, &mut op.srcs[1], SrcType::Pred); copy_alu_src_if_not_reg(b, &mut op.srcs[2], SrcType::Pred); } + Op::Rro(op) => { + copy_alu_src_if_f20_overflow(b, &mut op.src, SrcType::F32); + } Op::MuFu(op) => { copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR); }