nak: implement rro op on SM50
This is the "range reduction operator", which is needed to preprocess srcs for some of the mufu ops on SM50. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27203>
This commit is contained in:
@@ -1646,6 +1646,33 @@ impl SM50Instr {
|
||||
self.set_bit(47, false); /* dst.CC */
|
||||
}
|
||||
|
||||
fn encode_rro(&mut self, op: &OpRro) {
|
||||
match &op.src.src_ref {
|
||||
SrcRef::Imm32(imm32) => {
|
||||
self.set_opcode(0x3890);
|
||||
self.set_src_imm_f20(20..39, 56, *imm32);
|
||||
}
|
||||
SrcRef::Zero | SrcRef::Reg(_) => {
|
||||
self.set_opcode(0x5c90);
|
||||
self.set_reg_fmod_src(20..28, 49, 45, op.src);
|
||||
}
|
||||
SrcRef::CBuf(_) => {
|
||||
self.set_opcode(0x4c90);
|
||||
self.set_cb_fmod_src(20..39, 49, 45, op.src);
|
||||
}
|
||||
src => panic!("Unsupported src type for RRO: {src}"),
|
||||
}
|
||||
|
||||
self.set_dst(op.dst);
|
||||
self.set_field(
|
||||
39..40,
|
||||
match op.op {
|
||||
RroOp::SinCos => 0u8,
|
||||
RroOp::Exp2 => 1u8,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
fn encode_mufu(&mut self, op: &OpMuFu) {
|
||||
assert!(op.src.is_reg_or_zero());
|
||||
|
||||
@@ -2014,6 +2041,7 @@ impl SM50Instr {
|
||||
Op::FSet(op) => si.encode_fset(&op),
|
||||
Op::FSetP(op) => si.encode_fsetp(&op),
|
||||
Op::FSwzAdd(op) => si.encode_fswzadd(&op),
|
||||
Op::Rro(op) => si.encode_rro(&op),
|
||||
Op::MuFu(op) => si.encode_mufu(&op),
|
||||
Op::Flo(op) => si.encode_flo(&op),
|
||||
Op::DAdd(op) => si.encode_dadd(&op),
|
||||
|
@@ -2414,6 +2414,40 @@ impl DisplayOp for OpFSwzAdd {
|
||||
}
|
||||
impl_display_for_op!(OpFSwzAdd);
|
||||
|
||||
pub enum RroOp {
|
||||
SinCos,
|
||||
Exp2,
|
||||
}
|
||||
|
||||
impl fmt::Display for RroOp {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
RroOp::SinCos => write!(f, ".sincos"),
|
||||
RroOp::Exp2 => write!(f, ".exp2"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// MuFu range reduction operator
|
||||
///
|
||||
/// Not available on SM70+
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpRro {
|
||||
pub dst: Dst,
|
||||
pub op: RroOp,
|
||||
|
||||
#[src_type(F32)]
|
||||
pub src: Src,
|
||||
}
|
||||
|
||||
impl DisplayOp for OpRro {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "rro{} {}", self.op, self.src)
|
||||
}
|
||||
}
|
||||
impl_display_for_op!(OpRro);
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
pub enum MuFuOp {
|
||||
@@ -4775,6 +4809,7 @@ pub enum Op {
|
||||
FFma(OpFFma),
|
||||
FMnMx(OpFMnMx),
|
||||
FMul(OpFMul),
|
||||
Rro(OpRro),
|
||||
MuFu(OpMuFu),
|
||||
FSet(OpFSet),
|
||||
FSetP(OpFSetP),
|
||||
@@ -5220,7 +5255,7 @@ impl Instr {
|
||||
| Op::FSwzAdd(_) => true,
|
||||
|
||||
// Multi-function unit is variable latency
|
||||
Op::MuFu(_) => false,
|
||||
Op::Rro(_) | Op::MuFu(_) => false,
|
||||
|
||||
// Double-precision float ALU
|
||||
Op::DAdd(_)
|
||||
|
@@ -226,6 +226,9 @@ fn legalize_sm50_instr(
|
||||
copy_alu_src_if_not_reg(b, &mut op.srcs[1], SrcType::Pred);
|
||||
copy_alu_src_if_not_reg(b, &mut op.srcs[2], SrcType::Pred);
|
||||
}
|
||||
Op::Rro(op) => {
|
||||
copy_alu_src_if_f20_overflow(b, &mut op.src, SrcType::F32);
|
||||
}
|
||||
Op::MuFu(op) => {
|
||||
copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR);
|
||||
}
|
||||
|
Reference in New Issue
Block a user