nak: Add an AtomCmpSrc to AtomOp::CmpExch

Pre-Volta, OpAtom works like OpSuAtom where the comparison value and the
data are packed into a vector.  We need some way of expressing this in
the IR.  We could have a separate OpAtom instruction but that seems
unnecessary.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30281>
This commit is contained in:
Faith Ekstrand
2024-07-11 22:24:42 -05:00
committed by Marge Bot
parent da4e368a6f
commit 7d1b1f5d3a
4 changed files with 38 additions and 17 deletions

View File

@@ -1874,7 +1874,11 @@ impl<'a> ShaderFromNir<'a> {
} }
} }
fn get_atomic_op(&self, intrin: &nir_intrinsic_instr) -> AtomOp { fn get_atomic_op(
&self,
intrin: &nir_intrinsic_instr,
cmp_src: AtomCmpSrc,
) -> AtomOp {
match intrin.atomic_op() { match intrin.atomic_op() {
nir_atomic_op_iadd => AtomOp::Add, nir_atomic_op_iadd => AtomOp::Add,
nir_atomic_op_imin => AtomOp::Min, nir_atomic_op_imin => AtomOp::Min,
@@ -1888,7 +1892,7 @@ impl<'a> ShaderFromNir<'a> {
nir_atomic_op_fadd => AtomOp::Add, nir_atomic_op_fadd => AtomOp::Add,
nir_atomic_op_fmin => AtomOp::Min, nir_atomic_op_fmin => AtomOp::Min,
nir_atomic_op_fmax => AtomOp::Max, nir_atomic_op_fmax => AtomOp::Max,
nir_atomic_op_cmpxchg => AtomOp::CmpExch, nir_atomic_op_cmpxchg => AtomOp::CmpExch(cmp_src),
_ => panic!("Unsupported NIR atomic op"), _ => panic!("Unsupported NIR atomic op"),
} }
} }
@@ -2137,7 +2141,7 @@ impl<'a> ShaderFromNir<'a> {
let coord = self.get_image_coord(intrin, dim); let coord = self.get_image_coord(intrin, dim);
// let sample = self.get_src(&srcs[2]); // let sample = self.get_src(&srcs[2]);
let atom_type = self.get_atomic_type(intrin); let atom_type = self.get_atomic_type(intrin);
let atom_op = self.get_atomic_op(intrin); let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Packed);
assert!( assert!(
intrin.def.bit_size() == 32 || intrin.def.bit_size() == 64 intrin.def.bit_size() == 32 || intrin.def.bit_size() == 64
@@ -2331,7 +2335,7 @@ impl<'a> ShaderFromNir<'a> {
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let data = self.get_src(&srcs[1]); let data = self.get_src(&srcs[1]);
let atom_type = self.get_atomic_type(intrin); let atom_type = self.get_atomic_type(intrin);
let atom_op = self.get_atomic_op(intrin); let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
assert!(intrin.def.num_components() == 1); assert!(intrin.def.num_components() == 1);
let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32)); let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32));
@@ -2366,7 +2370,7 @@ impl<'a> ShaderFromNir<'a> {
addr: addr, addr: addr,
cmpr: cmpr, cmpr: cmpr,
data: data, data: data,
atom_op: AtomOp::CmpExch, atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
atom_type: atom_type, atom_type: atom_type,
addr_offset: offset, addr_offset: offset,
mem_space: MemSpace::Global(MemAddrType::A64), mem_space: MemSpace::Global(MemAddrType::A64),
@@ -2837,7 +2841,7 @@ impl<'a> ShaderFromNir<'a> {
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let data = self.get_src(&srcs[1]); let data = self.get_src(&srcs[1]);
let atom_type = self.get_atomic_type(intrin); let atom_type = self.get_atomic_type(intrin);
let atom_op = self.get_atomic_op(intrin); let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
assert!(intrin.def.num_components() == 1); assert!(intrin.def.num_components() == 1);
let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32)); let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32));
@@ -2872,7 +2876,7 @@ impl<'a> ShaderFromNir<'a> {
addr: addr, addr: addr,
cmpr: cmpr, cmpr: cmpr,
data: data, data: data,
atom_op: AtomOp::CmpExch, atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
atom_type: atom_type, atom_type: atom_type,
addr_offset: offset, addr_offset: offset,
mem_space: MemSpace::Shared, mem_space: MemSpace::Shared,

View File

@@ -2239,6 +2239,14 @@ impl fmt::Display for AtomType {
} }
} }
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub enum AtomCmpSrc {
/// The cmpr value is passed as a separate source
Separate,
/// The cmpr value is packed in with the data with cmpr coming first
Packed,
}
#[allow(dead_code)] #[allow(dead_code)]
#[derive(Clone, Copy, Eq, Hash, PartialEq)] #[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub enum AtomOp { pub enum AtomOp {
@@ -2251,7 +2259,7 @@ pub enum AtomOp {
Or, Or,
Xor, Xor,
Exch, Exch,
CmpExch, CmpExch(AtomCmpSrc),
} }
impl fmt::Display for AtomOp { impl fmt::Display for AtomOp {
@@ -2266,7 +2274,8 @@ impl fmt::Display for AtomOp {
AtomOp::Or => write!(f, ".or"), AtomOp::Or => write!(f, ".or"),
AtomOp::Xor => write!(f, ".xor"), AtomOp::Xor => write!(f, ".xor"),
AtomOp::Exch => write!(f, ".exch"), AtomOp::Exch => write!(f, ".exch"),
AtomOp::CmpExch => write!(f, ".cmpexch"), AtomOp::CmpExch(AtomCmpSrc::Separate) => write!(f, ".cmpexch"),
AtomOp::CmpExch(AtomCmpSrc::Packed) => write!(f, ".cmpexch.packed"),
} }
} }
} }
@@ -4369,7 +4378,11 @@ impl DisplayOp for OpAtom {
} }
write!(f, "{:#x}", self.addr_offset)?; write!(f, "{:#x}", self.addr_offset)?;
} }
write!(f, "] {}", self.data) write!(f, "]")?;
if self.atom_op == AtomOp::CmpExch(AtomCmpSrc::Separate) {
write!(f, " {}", self.cmpr)?;
}
write!(f, " {}", self.data)
} }
} }
impl_display_for_op!(OpAtom); impl_display_for_op!(OpAtom);

View File

@@ -2213,7 +2213,7 @@ impl SM50Encoder<'_> {
AtomOp::Or => 6_u8, AtomOp::Or => 6_u8,
AtomOp::Xor => 7_u8, AtomOp::Xor => 7_u8,
AtomOp::Exch => 8_u8, AtomOp::Exch => 8_u8,
AtomOp::CmpExch => panic!("CmpXchg not yet supported"), AtomOp::CmpExch(_) => panic!("CmpXchg not yet supported"),
}, },
); );
} }
@@ -2228,8 +2228,9 @@ impl SM50Op for OpSuAtom {
} }
fn encode(&self, e: &mut SM50Encoder<'_>) { fn encode(&self, e: &mut SM50Encoder<'_>) {
if matches!(self.atom_op, AtomOp::CmpExch) { if let AtomOp::CmpExch(cmp_src) = self.atom_op {
e.set_opcode(0xeac0); e.set_opcode(0xeac0);
assert!(cmp_src == AtomCmpSrc::Packed);
} else { } else {
e.set_opcode(0xea60); e.set_opcode(0xea60);
} }
@@ -2253,7 +2254,7 @@ impl SM50Op for OpSuAtom {
AtomOp::Or => 6, AtomOp::Or => 6,
AtomOp::Xor => 7, AtomOp::Xor => 7,
AtomOp::Exch => 8, AtomOp::Exch => 8,
AtomOp::CmpExch => 0, AtomOp::CmpExch(_) => 0,
}; };
e.set_image_dim(33..36, self.image_dim); e.set_image_dim(33..36, self.image_dim);

View File

@@ -2585,8 +2585,9 @@ impl SM70Op for OpSuAtom {
} }
fn encode(&self, e: &mut SM70Encoder<'_>) { fn encode(&self, e: &mut SM70Encoder<'_>) {
if matches!(self.atom_op, AtomOp::CmpExch) { if let AtomOp::CmpExch(cmp_src) = self.atom_op {
e.set_opcode(0x396); e.set_opcode(0x396);
assert!(cmp_src == AtomCmpSrc::Packed);
} else { } else {
e.set_opcode(0x394); e.set_opcode(0x394);
}; };
@@ -2759,7 +2760,7 @@ impl SM70Encoder<'_> {
self.set_field( self.set_field(
range, range,
match atom_op { match atom_op {
AtomOp::Add | AtomOp::CmpExch => 0_u8, AtomOp::Add | AtomOp::CmpExch(_) => 0_u8,
AtomOp::Min => 1_u8, AtomOp::Min => 1_u8,
AtomOp::Max => 2_u8, AtomOp::Max => 2_u8,
AtomOp::Inc => 3_u8, AtomOp::Inc => 3_u8,
@@ -2797,9 +2798,10 @@ impl SM70Op for OpAtom {
fn encode(&self, e: &mut SM70Encoder<'_>) { fn encode(&self, e: &mut SM70Encoder<'_>) {
match self.mem_space { match self.mem_space {
MemSpace::Global(_) => { MemSpace::Global(_) => {
if self.atom_op == AtomOp::CmpExch { if let AtomOp::CmpExch(cmp_src) = self.atom_op {
e.set_opcode(0x3a9); e.set_opcode(0x3a9);
assert!(cmp_src == AtomCmpSrc::Separate);
e.set_reg_src(32..40, self.cmpr); e.set_reg_src(32..40, self.cmpr);
e.set_reg_src(64..72, self.data); e.set_reg_src(64..72, self.data);
} else { } else {
@@ -2824,9 +2826,10 @@ impl SM70Op for OpAtom {
} }
MemSpace::Local => panic!("Atomics do not support local"), MemSpace::Local => panic!("Atomics do not support local"),
MemSpace::Shared => { MemSpace::Shared => {
if self.atom_op == AtomOp::CmpExch { if let AtomOp::CmpExch(cmp_src) = self.atom_op {
e.set_opcode(0x38d); e.set_opcode(0x38d);
assert!(cmp_src == AtomCmpSrc::Separate);
e.set_reg_src(32..40, self.cmpr); e.set_reg_src(32..40, self.cmpr);
e.set_reg_src(64..72, self.data); e.set_reg_src(64..72, self.data);
} else { } else {