diff --git a/src/nouveau/compiler/nak/builder.rs b/src/nouveau/compiler/nak/builder.rs index 2cca5ed7c7b..67463ba4ee1 100644 --- a/src/nouveau/compiler/nak/builder.rs +++ b/src/nouveau/compiler/nak/builder.rs @@ -231,10 +231,22 @@ pub trait SSABuilder: Builder { fn iabs(&mut self, i: Src) -> SSARef { let dst = self.alloc_ssa(RegFile::GPR, 1); - self.push_op(OpIAbs { - dst: dst.into(), - src: i, - }); + if self.sm() >= 70 { + self.push_op(OpIAbs { + dst: dst.into(), + src: i, + }); + } else { + self.push_op(OpI2I { + dst: dst.into(), + src: i, + src_type: IntType::I32, + dst_type: IntType::I32, + saturate: false, + abs: true, + neg: false, + }); + } dst } diff --git a/src/nouveau/compiler/nak/encode_sm50.rs b/src/nouveau/compiler/nak/encode_sm50.rs index 2c1b22cf739..a9a650d2d15 100644 --- a/src/nouveau/compiler/nak/encode_sm50.rs +++ b/src/nouveau/compiler/nak/encode_sm50.rs @@ -728,6 +728,36 @@ impl SM50Instr { self.set_dst(op.dst); } + fn encode_i2i(&mut self, op: &OpI2I) { + match &op.src.src_ref { + SrcRef::Imm32(imm32) => { + self.set_opcode(0x38e0); + self.set_src_imm_i20(20..39, 56, *imm32); + } + SrcRef::Zero | SrcRef::Reg(_) => { + self.set_opcode(0x5ce0); + self.set_reg_src(20..28, op.src); + } + SrcRef::CBuf(cbuf) => { + self.set_opcode(0x4ce0); + self.set_src_cb(20..39, cbuf); + } + src => panic!("Unsupported src type for I2I: {src}"), + } + + self.set_bit(45, op.neg); + self.set_bit(49, op.abs); + self.set_bit(50, op.saturate); + self.set_bit(12, op.dst_type.is_signed()); + self.set_bit(13, op.src_type.is_signed()); + self.set_field(8..10, (op.dst_type.bits() / 8).ilog2()); + self.set_field(10..12, (op.src_type.bits() / 8).ilog2()); + self.set_field(41..43, 0u8); // src.B1-3 + self.set_bit(47, false); // dst.CC + + self.set_dst(op.dst); + } + fn encode_imad(&mut self, op: &OpIMad) { assert!(op.srcs[0].is_reg_or_zero()); assert!(op.srcs[1].is_reg_or_zero()); @@ -1814,37 +1844,6 @@ impl SM50Instr { self.set_reg_fmod_src(8..16, 7, 43, op.srcs[0]); } - fn encode_iabs(&mut self, op: &OpIAbs) { - assert!(op.src.is_reg_or_zero()); - - // IABS isn't a thing on SM50, we use I2I instead. - - // We always assume 32bits signed for now - let src_type = IntType::I32; - let dst_type = IntType::I32; - - match &op.src.src_ref { - SrcRef::Imm32(imm32) => { - self.set_opcode(0x38e0); - self.set_src_imm_i20(20..39, 56, *imm32); - } - SrcRef::Zero | SrcRef::Reg(_) => { - self.set_opcode(0x5ce0); - self.set_reg_src(20..28, op.src); - } - SrcRef::CBuf(cbuf) => { - self.set_opcode(0x4ce0); - self.set_src_cb(20..39, cbuf); - } - src => panic!("Unsupported src type for IABS: {src}"), - } - self.set_bit(12, dst_type.is_signed()); - self.set_bit(13, src_type.is_signed()); - self.set_field(8..10, (dst_type.bits() / 8).ilog2()); - self.set_field(10..12, (src_type.bits() / 8).ilog2()); - self.set_dst(op.dst); - } - fn encode_iadd2(&mut self, op: &OpIAdd2) { let carry_in = match op.carry_in.src_ref { SrcRef::Reg(reg) if reg.file() == RegFile::Carry => true, @@ -1943,7 +1942,6 @@ impl SM50Instr { Op::DMnMx(op) => si.encode_dmnmx(&op), Op::DMul(op) => si.encode_dmul(&op), Op::DSetP(op) => si.encode_dsetp(&op), - Op::IAbs(op) => si.encode_iabs(&op), Op::IAdd2(op) => si.encode_iadd2(&op), Op::Mov(op) => si.encode_mov(&op), Op::Sel(op) => si.encode_sel(&op), @@ -1964,6 +1962,7 @@ impl SM50Instr { Op::F2F(op) => si.encode_f2f(&op), Op::F2I(op) => si.encode_f2i(&op), Op::I2F(op) => si.encode_i2f(&op), + Op::I2I(op) => si.encode_i2i(&op), Op::IMad(op) => si.encode_imad(&op), Op::IMul(op) => si.encode_imul(&op), Op::IMnMx(op) => si.encode_imnmx(&op), diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 248e444591b..23cc138e5cc 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -3197,6 +3197,41 @@ impl DisplayOp for OpI2F { } impl_display_for_op!(OpI2F); +/// Not used on SM70+ +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpI2I { + pub dst: Dst, + + #[src_type(ALU)] + pub src: Src, + + pub src_type: IntType, + pub dst_type: IntType, + + pub saturate: bool, + pub abs: bool, + pub neg: bool, +} + +impl DisplayOp for OpI2I { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "i2i")?; + if self.saturate { + write!(f, ".sat ")?; + } + write!(f, "{}{} {}", self.dst_type, self.src_type, self.src,)?; + if self.abs { + write!(f, ".abs")?; + } + if self.neg { + write!(f, ".neg")?; + } + Ok(()) + } +} +impl_display_for_op!(OpI2I); + #[repr(C)] #[derive(DstsAsSlice)] pub struct OpFRnd { @@ -4772,6 +4807,7 @@ pub enum Op { F2F(OpF2F), F2I(OpF2I), I2F(OpI2F), + I2I(OpI2I), FRnd(OpFRnd), Mov(OpMov), Prmt(OpPrmt), @@ -5214,7 +5250,9 @@ impl Instr { | Op::Shr(_) => true, // Conversions are variable latency?!? - Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::FRnd(_) => false, + Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::I2I(_) | Op::FRnd(_) => { + false + } // Move ops Op::Mov(_) | Op::Prmt(_) | Op::Sel(_) => true, diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index 7c0639c697a..0bb37da4764 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -267,9 +267,6 @@ fn legalize_sm50_instr( copy_alu_src_if_not_reg(b, src0, SrcType::F64); copy_alu_src_if_f20_overflow(b, src1, SrcType::F64); } - Op::IAbs(op) => { - copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR); - } Op::Sel(op) => { let [ref mut src0, ref mut src1] = op.srcs; if swap_srcs_if_not_reg(src0, src1) { @@ -295,6 +292,9 @@ fn legalize_sm50_instr( Op::F2F(op) => { copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR); } + Op::I2I(op) => { + copy_alu_src_if_i20_overflow(b, &mut op.src, SrcType::ALU); + } Op::IMad(op) => { copy_alu_src_if_not_reg(b, &mut op.srcs[0], SrcType::ALU); copy_alu_src_if_not_reg(b, &mut op.srcs[1], SrcType::ALU);