From 7cd9680554b16e7de07873cd3be428953be9ad07 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 4 Dec 2023 09:14:56 -0600 Subject: [PATCH] nak: Add back OpBMov with better semantics It now takes a regular Src and Dst and we handle both GPR -> Bar vs. Bar -> GPR forms in the emit code. Part-of: --- src/nouveau/compiler/nak_builder.rs | 22 +++++++++++++ src/nouveau/compiler/nak_encode_sm70.rs | 43 +++++++++++++++++++++++++ src/nouveau/compiler/nak_ir.rs | 31 +++++++++++++++++- 3 files changed, 95 insertions(+), 1 deletion(-) diff --git a/src/nouveau/compiler/nak_builder.rs b/src/nouveau/compiler/nak_builder.rs index 7280f96bf76..f06665703a5 100644 --- a/src/nouveau/compiler/nak_builder.rs +++ b/src/nouveau/compiler/nak_builder.rs @@ -354,6 +354,28 @@ pub trait SSABuilder: Builder { self.copy_to(dst.into(), src); dst } + + fn bmov_to_bar(&mut self, src: Src) -> SSARef { + assert!(src.src_ref.as_ssa().unwrap().file() == RegFile::GPR); + let dst = self.alloc_ssa(RegFile::Bar, 1); + self.push_op(OpBMov { + dst: dst.into(), + src: src, + clear: false, + }); + dst + } + + fn bmov_to_gpr(&mut self, src: Src) -> SSARef { + assert!(src.src_ref.as_ssa().unwrap().file() == RegFile::Bar); + let dst = self.alloc_ssa(RegFile::GPR, 1); + self.push_op(OpBMov { + dst: dst.into(), + src: src, + clear: false, + }); + dst + } } pub struct InstrBuilder { diff --git a/src/nouveau/compiler/nak_encode_sm70.rs b/src/nouveau/compiler/nak_encode_sm70.rs index f625a27bfb3..f5922845ce6 100644 --- a/src/nouveau/compiler/nak_encode_sm70.rs +++ b/src/nouveau/compiler/nak_encode_sm70.rs @@ -51,6 +51,14 @@ fn src_mod_is_bnot(src_mod: SrcMod) -> bool { } } +fn dst_is_bar(dst: Dst) -> bool { + match dst { + Dst::None => false, + Dst::SSA(ssa) => ssa.file() == RegFile::Bar, + Dst::Reg(reg) => reg.file() == RegFile::Bar, + } +} + impl ALUSrc { fn from_src_file(src: &Src, file: RegFile) -> ALUSrc { match src.src_ref { @@ -243,6 +251,22 @@ impl SM70Instr { } } + fn set_bar_reg(&mut self, range: Range, reg: RegRef) { + assert!(range.len() == 4); + assert!(reg.file() == RegFile::Bar); + assert!(reg.comps() == 1); + self.set_field(range, reg.base_idx()); + } + + fn set_bar_dst(&mut self, range: Range, dst: Dst) { + self.set_bar_reg(range, *dst.as_reg().unwrap()); + } + + fn set_bar_src(&mut self, range: Range, src: Src) { + assert!(src.src_mod.is_none()); + self.set_bar_reg(range, *src.src_ref.as_reg().unwrap()); + } + fn set_alu_reg( &mut self, range: Range, @@ -1673,6 +1697,24 @@ impl SM70Instr { self.set_bit(84, true); // .CLEAR } + fn encode_bmov(&mut self, op: &OpBMov) { + if dst_is_bar(op.dst) { + self.set_opcode(0x356); + + self.set_bar_dst(24..28, op.dst); + self.set_reg_src(32..40, op.src); + + self.set_bit(84, op.clear); + } else { + self.set_opcode(0x355); + + self.set_dst(op.dst); + self.set_bar_src(24..28, op.src); + + self.set_bit(84, op.clear); + } + } + fn encode_break(&mut self, op: &OpBreak) { self.set_opcode(0x942); self.set_field(16..20, op.bar.idx()); @@ -1909,6 +1951,7 @@ impl SM70Instr { Op::CCtl(op) => si.encode_cctl(&op), Op::MemBar(op) => si.encode_membar(&op), Op::BClear(op) => si.encode_bclear(&op), + Op::BMov(op) => si.encode_bmov(&op), Op::Break(op) => si.encode_break(&op), Op::BSSy(op) => si.encode_bssy(&op, ip, labels), Op::BSync(op) => si.encode_bsync(&op), diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 75bc352b2c0..272f1da4000 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -3643,6 +3643,25 @@ impl DisplayOp for OpBClear { } impl_display_for_op!(OpBClear); +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpBMov { + pub dst: Dst, + pub src: Src, + pub clear: bool, +} + +impl DisplayOp for OpBMov { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "bmov.32")?; + if self.clear { + write!(f, ".clear")?; + } + write!(f, " {}", self.src) + } +} +impl_display_for_op!(OpBMov); + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpBreak { @@ -4326,6 +4345,7 @@ pub enum Op { CCtl(OpCCtl), MemBar(OpMemBar), BClear(OpBClear), + BMov(OpBMov), Break(OpBreak), BSSy(OpBSSy), BSync(OpBSync), @@ -4695,12 +4715,13 @@ impl Instr { | Op::FSOut(_) | Op::Out(_) | Op::OutFinal(_) => false, + Op::BMov(op) => !op.clear, _ => true, } } pub fn has_fixed_latency(&self) -> bool { - match self.op { + match &self.op { // Float ALU Op::FAdd(_) | Op::FFma(_) @@ -4768,6 +4789,14 @@ impl Instr { Op::Bra(_) | Op::Exit(_) => true, Op::WarpSync(_) => false, + // BMOV: barriers only when using gprs (and only valid for the gpr), + // no barriers for the others. + Op::BMov(op) => match &op.dst { + Dst::None => true, + Dst::SSA(vec) => vec.file() == RegFile::Bar, + Dst::Reg(reg) => reg.file() == RegFile::Bar, + }, + // Geometry ops Op::Out(_) | Op::OutFinal(_) => false,