nak: fix iabs on SM50 with an explicit i2i op

Fixes bug where we were not setting the src iabs modifier when lowering
an iabs op to i2i in the encoder.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27105>
This commit is contained in:
Benjamin Lee
2024-01-16 18:12:57 -08:00
committed by Marge Bot
parent 7f332f4087
commit f05350899a
4 changed files with 89 additions and 40 deletions

View File

@@ -231,10 +231,22 @@ pub trait SSABuilder: Builder {
fn iabs(&mut self, i: Src) -> SSARef {
let dst = self.alloc_ssa(RegFile::GPR, 1);
self.push_op(OpIAbs {
dst: dst.into(),
src: i,
});
if self.sm() >= 70 {
self.push_op(OpIAbs {
dst: dst.into(),
src: i,
});
} else {
self.push_op(OpI2I {
dst: dst.into(),
src: i,
src_type: IntType::I32,
dst_type: IntType::I32,
saturate: false,
abs: true,
neg: false,
});
}
dst
}

View File

@@ -728,6 +728,36 @@ impl SM50Instr {
self.set_dst(op.dst);
}
fn encode_i2i(&mut self, op: &OpI2I) {
match &op.src.src_ref {
SrcRef::Imm32(imm32) => {
self.set_opcode(0x38e0);
self.set_src_imm_i20(20..39, 56, *imm32);
}
SrcRef::Zero | SrcRef::Reg(_) => {
self.set_opcode(0x5ce0);
self.set_reg_src(20..28, op.src);
}
SrcRef::CBuf(cbuf) => {
self.set_opcode(0x4ce0);
self.set_src_cb(20..39, cbuf);
}
src => panic!("Unsupported src type for I2I: {src}"),
}
self.set_bit(45, op.neg);
self.set_bit(49, op.abs);
self.set_bit(50, op.saturate);
self.set_bit(12, op.dst_type.is_signed());
self.set_bit(13, op.src_type.is_signed());
self.set_field(8..10, (op.dst_type.bits() / 8).ilog2());
self.set_field(10..12, (op.src_type.bits() / 8).ilog2());
self.set_field(41..43, 0u8); // src.B1-3
self.set_bit(47, false); // dst.CC
self.set_dst(op.dst);
}
fn encode_imad(&mut self, op: &OpIMad) {
assert!(op.srcs[0].is_reg_or_zero());
assert!(op.srcs[1].is_reg_or_zero());
@@ -1814,37 +1844,6 @@ impl SM50Instr {
self.set_reg_fmod_src(8..16, 7, 43, op.srcs[0]);
}
fn encode_iabs(&mut self, op: &OpIAbs) {
assert!(op.src.is_reg_or_zero());
// IABS isn't a thing on SM50, we use I2I instead.
// We always assume 32bits signed for now
let src_type = IntType::I32;
let dst_type = IntType::I32;
match &op.src.src_ref {
SrcRef::Imm32(imm32) => {
self.set_opcode(0x38e0);
self.set_src_imm_i20(20..39, 56, *imm32);
}
SrcRef::Zero | SrcRef::Reg(_) => {
self.set_opcode(0x5ce0);
self.set_reg_src(20..28, op.src);
}
SrcRef::CBuf(cbuf) => {
self.set_opcode(0x4ce0);
self.set_src_cb(20..39, cbuf);
}
src => panic!("Unsupported src type for IABS: {src}"),
}
self.set_bit(12, dst_type.is_signed());
self.set_bit(13, src_type.is_signed());
self.set_field(8..10, (dst_type.bits() / 8).ilog2());
self.set_field(10..12, (src_type.bits() / 8).ilog2());
self.set_dst(op.dst);
}
fn encode_iadd2(&mut self, op: &OpIAdd2) {
let carry_in = match op.carry_in.src_ref {
SrcRef::Reg(reg) if reg.file() == RegFile::Carry => true,
@@ -1943,7 +1942,6 @@ impl SM50Instr {
Op::DMnMx(op) => si.encode_dmnmx(&op),
Op::DMul(op) => si.encode_dmul(&op),
Op::DSetP(op) => si.encode_dsetp(&op),
Op::IAbs(op) => si.encode_iabs(&op),
Op::IAdd2(op) => si.encode_iadd2(&op),
Op::Mov(op) => si.encode_mov(&op),
Op::Sel(op) => si.encode_sel(&op),
@@ -1964,6 +1962,7 @@ impl SM50Instr {
Op::F2F(op) => si.encode_f2f(&op),
Op::F2I(op) => si.encode_f2i(&op),
Op::I2F(op) => si.encode_i2f(&op),
Op::I2I(op) => si.encode_i2i(&op),
Op::IMad(op) => si.encode_imad(&op),
Op::IMul(op) => si.encode_imul(&op),
Op::IMnMx(op) => si.encode_imnmx(&op),

View File

@@ -3197,6 +3197,41 @@ impl DisplayOp for OpI2F {
}
impl_display_for_op!(OpI2F);
/// Not used on SM70+
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpI2I {
pub dst: Dst,
#[src_type(ALU)]
pub src: Src,
pub src_type: IntType,
pub dst_type: IntType,
pub saturate: bool,
pub abs: bool,
pub neg: bool,
}
impl DisplayOp for OpI2I {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "i2i")?;
if self.saturate {
write!(f, ".sat ")?;
}
write!(f, "{}{} {}", self.dst_type, self.src_type, self.src,)?;
if self.abs {
write!(f, ".abs")?;
}
if self.neg {
write!(f, ".neg")?;
}
Ok(())
}
}
impl_display_for_op!(OpI2I);
#[repr(C)]
#[derive(DstsAsSlice)]
pub struct OpFRnd {
@@ -4772,6 +4807,7 @@ pub enum Op {
F2F(OpF2F),
F2I(OpF2I),
I2F(OpI2F),
I2I(OpI2I),
FRnd(OpFRnd),
Mov(OpMov),
Prmt(OpPrmt),
@@ -5214,7 +5250,9 @@ impl Instr {
| Op::Shr(_) => true,
// Conversions are variable latency?!?
Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::FRnd(_) => false,
Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::I2I(_) | Op::FRnd(_) => {
false
}
// Move ops
Op::Mov(_) | Op::Prmt(_) | Op::Sel(_) => true,

View File

@@ -267,9 +267,6 @@ fn legalize_sm50_instr(
copy_alu_src_if_not_reg(b, src0, SrcType::F64);
copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
}
Op::IAbs(op) => {
copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR);
}
Op::Sel(op) => {
let [ref mut src0, ref mut src1] = op.srcs;
if swap_srcs_if_not_reg(src0, src1) {
@@ -295,6 +292,9 @@ fn legalize_sm50_instr(
Op::F2F(op) => {
copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR);
}
Op::I2I(op) => {
copy_alu_src_if_i20_overflow(b, &mut op.src, SrcType::ALU);
}
Op::IMad(op) => {
copy_alu_src_if_not_reg(b, &mut op.srcs[0], SrcType::ALU);
copy_alu_src_if_not_reg(b, &mut op.srcs[1], SrcType::ALU);