nak: Add an AtomCmpSrc to AtomOp::CmpExch

Pre-Volta, OpAtom works like OpSuAtom where the comparison value and the
data are packed into a vector.  We need some way of expressing this in
the IR.  We could have a separate OpAtom instruction but that seems
unnecessary.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30281>
This commit is contained in:
Faith Ekstrand
2024-07-11 22:24:42 -05:00
committed by Marge Bot
parent da4e368a6f
commit 7d1b1f5d3a
4 changed files with 38 additions and 17 deletions

View File

@@ -1874,7 +1874,11 @@ impl<'a> ShaderFromNir<'a> {
}
}
fn get_atomic_op(&self, intrin: &nir_intrinsic_instr) -> AtomOp {
fn get_atomic_op(
&self,
intrin: &nir_intrinsic_instr,
cmp_src: AtomCmpSrc,
) -> AtomOp {
match intrin.atomic_op() {
nir_atomic_op_iadd => AtomOp::Add,
nir_atomic_op_imin => AtomOp::Min,
@@ -1888,7 +1892,7 @@ impl<'a> ShaderFromNir<'a> {
nir_atomic_op_fadd => AtomOp::Add,
nir_atomic_op_fmin => AtomOp::Min,
nir_atomic_op_fmax => AtomOp::Max,
nir_atomic_op_cmpxchg => AtomOp::CmpExch,
nir_atomic_op_cmpxchg => AtomOp::CmpExch(cmp_src),
_ => panic!("Unsupported NIR atomic op"),
}
}
@@ -2137,7 +2141,7 @@ impl<'a> ShaderFromNir<'a> {
let coord = self.get_image_coord(intrin, dim);
// let sample = self.get_src(&srcs[2]);
let atom_type = self.get_atomic_type(intrin);
let atom_op = self.get_atomic_op(intrin);
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Packed);
assert!(
intrin.def.bit_size() == 32 || intrin.def.bit_size() == 64
@@ -2331,7 +2335,7 @@ impl<'a> ShaderFromNir<'a> {
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let data = self.get_src(&srcs[1]);
let atom_type = self.get_atomic_type(intrin);
let atom_op = self.get_atomic_op(intrin);
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
assert!(intrin.def.num_components() == 1);
let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32));
@@ -2366,7 +2370,7 @@ impl<'a> ShaderFromNir<'a> {
addr: addr,
cmpr: cmpr,
data: data,
atom_op: AtomOp::CmpExch,
atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
atom_type: atom_type,
addr_offset: offset,
mem_space: MemSpace::Global(MemAddrType::A64),
@@ -2837,7 +2841,7 @@ impl<'a> ShaderFromNir<'a> {
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
let data = self.get_src(&srcs[1]);
let atom_type = self.get_atomic_type(intrin);
let atom_op = self.get_atomic_op(intrin);
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
assert!(intrin.def.num_components() == 1);
let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32));
@@ -2872,7 +2876,7 @@ impl<'a> ShaderFromNir<'a> {
addr: addr,
cmpr: cmpr,
data: data,
atom_op: AtomOp::CmpExch,
atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
atom_type: atom_type,
addr_offset: offset,
mem_space: MemSpace::Shared,

View File

@@ -2239,6 +2239,14 @@ impl fmt::Display for AtomType {
}
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub enum AtomCmpSrc {
/// The cmpr value is passed as a separate source
Separate,
/// The cmpr value is packed in with the data with cmpr coming first
Packed,
}
#[allow(dead_code)]
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub enum AtomOp {
@@ -2251,7 +2259,7 @@ pub enum AtomOp {
Or,
Xor,
Exch,
CmpExch,
CmpExch(AtomCmpSrc),
}
impl fmt::Display for AtomOp {
@@ -2266,7 +2274,8 @@ impl fmt::Display for AtomOp {
AtomOp::Or => write!(f, ".or"),
AtomOp::Xor => write!(f, ".xor"),
AtomOp::Exch => write!(f, ".exch"),
AtomOp::CmpExch => write!(f, ".cmpexch"),
AtomOp::CmpExch(AtomCmpSrc::Separate) => write!(f, ".cmpexch"),
AtomOp::CmpExch(AtomCmpSrc::Packed) => write!(f, ".cmpexch.packed"),
}
}
}
@@ -4369,7 +4378,11 @@ impl DisplayOp for OpAtom {
}
write!(f, "{:#x}", self.addr_offset)?;
}
write!(f, "] {}", self.data)
write!(f, "]")?;
if self.atom_op == AtomOp::CmpExch(AtomCmpSrc::Separate) {
write!(f, " {}", self.cmpr)?;
}
write!(f, " {}", self.data)
}
}
impl_display_for_op!(OpAtom);

View File

@@ -2213,7 +2213,7 @@ impl SM50Encoder<'_> {
AtomOp::Or => 6_u8,
AtomOp::Xor => 7_u8,
AtomOp::Exch => 8_u8,
AtomOp::CmpExch => panic!("CmpXchg not yet supported"),
AtomOp::CmpExch(_) => panic!("CmpXchg not yet supported"),
},
);
}
@@ -2228,8 +2228,9 @@ impl SM50Op for OpSuAtom {
}
fn encode(&self, e: &mut SM50Encoder<'_>) {
if matches!(self.atom_op, AtomOp::CmpExch) {
if let AtomOp::CmpExch(cmp_src) = self.atom_op {
e.set_opcode(0xeac0);
assert!(cmp_src == AtomCmpSrc::Packed);
} else {
e.set_opcode(0xea60);
}
@@ -2253,7 +2254,7 @@ impl SM50Op for OpSuAtom {
AtomOp::Or => 6,
AtomOp::Xor => 7,
AtomOp::Exch => 8,
AtomOp::CmpExch => 0,
AtomOp::CmpExch(_) => 0,
};
e.set_image_dim(33..36, self.image_dim);

View File

@@ -2585,8 +2585,9 @@ impl SM70Op for OpSuAtom {
}
fn encode(&self, e: &mut SM70Encoder<'_>) {
if matches!(self.atom_op, AtomOp::CmpExch) {
if let AtomOp::CmpExch(cmp_src) = self.atom_op {
e.set_opcode(0x396);
assert!(cmp_src == AtomCmpSrc::Packed);
} else {
e.set_opcode(0x394);
};
@@ -2759,7 +2760,7 @@ impl SM70Encoder<'_> {
self.set_field(
range,
match atom_op {
AtomOp::Add | AtomOp::CmpExch => 0_u8,
AtomOp::Add | AtomOp::CmpExch(_) => 0_u8,
AtomOp::Min => 1_u8,
AtomOp::Max => 2_u8,
AtomOp::Inc => 3_u8,
@@ -2797,9 +2798,10 @@ impl SM70Op for OpAtom {
fn encode(&self, e: &mut SM70Encoder<'_>) {
match self.mem_space {
MemSpace::Global(_) => {
if self.atom_op == AtomOp::CmpExch {
if let AtomOp::CmpExch(cmp_src) = self.atom_op {
e.set_opcode(0x3a9);
assert!(cmp_src == AtomCmpSrc::Separate);
e.set_reg_src(32..40, self.cmpr);
e.set_reg_src(64..72, self.data);
} else {
@@ -2824,9 +2826,10 @@ impl SM70Op for OpAtom {
}
MemSpace::Local => panic!("Atomics do not support local"),
MemSpace::Shared => {
if self.atom_op == AtomOp::CmpExch {
if let AtomOp::CmpExch(cmp_src) = self.atom_op {
e.set_opcode(0x38d);
assert!(cmp_src == AtomCmpSrc::Separate);
e.set_reg_src(32..40, self.cmpr);
e.set_reg_src(64..72, self.data);
} else {