nak: Add an AtomCmpSrc to AtomOp::CmpExch
Pre-Volta, OpAtom works like OpSuAtom where the comparison value and the data are packed into a vector. We need some way of expressing this in the IR. We could have a separate OpAtom instruction but that seems unnecessary. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30281>
This commit is contained in:

committed by
Marge Bot

parent
da4e368a6f
commit
7d1b1f5d3a
@@ -1874,7 +1874,11 @@ impl<'a> ShaderFromNir<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_atomic_op(&self, intrin: &nir_intrinsic_instr) -> AtomOp {
|
fn get_atomic_op(
|
||||||
|
&self,
|
||||||
|
intrin: &nir_intrinsic_instr,
|
||||||
|
cmp_src: AtomCmpSrc,
|
||||||
|
) -> AtomOp {
|
||||||
match intrin.atomic_op() {
|
match intrin.atomic_op() {
|
||||||
nir_atomic_op_iadd => AtomOp::Add,
|
nir_atomic_op_iadd => AtomOp::Add,
|
||||||
nir_atomic_op_imin => AtomOp::Min,
|
nir_atomic_op_imin => AtomOp::Min,
|
||||||
@@ -1888,7 +1892,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||||||
nir_atomic_op_fadd => AtomOp::Add,
|
nir_atomic_op_fadd => AtomOp::Add,
|
||||||
nir_atomic_op_fmin => AtomOp::Min,
|
nir_atomic_op_fmin => AtomOp::Min,
|
||||||
nir_atomic_op_fmax => AtomOp::Max,
|
nir_atomic_op_fmax => AtomOp::Max,
|
||||||
nir_atomic_op_cmpxchg => AtomOp::CmpExch,
|
nir_atomic_op_cmpxchg => AtomOp::CmpExch(cmp_src),
|
||||||
_ => panic!("Unsupported NIR atomic op"),
|
_ => panic!("Unsupported NIR atomic op"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2137,7 +2141,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||||||
let coord = self.get_image_coord(intrin, dim);
|
let coord = self.get_image_coord(intrin, dim);
|
||||||
// let sample = self.get_src(&srcs[2]);
|
// let sample = self.get_src(&srcs[2]);
|
||||||
let atom_type = self.get_atomic_type(intrin);
|
let atom_type = self.get_atomic_type(intrin);
|
||||||
let atom_op = self.get_atomic_op(intrin);
|
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Packed);
|
||||||
|
|
||||||
assert!(
|
assert!(
|
||||||
intrin.def.bit_size() == 32 || intrin.def.bit_size() == 64
|
intrin.def.bit_size() == 32 || intrin.def.bit_size() == 64
|
||||||
@@ -2331,7 +2335,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
||||||
let data = self.get_src(&srcs[1]);
|
let data = self.get_src(&srcs[1]);
|
||||||
let atom_type = self.get_atomic_type(intrin);
|
let atom_type = self.get_atomic_type(intrin);
|
||||||
let atom_op = self.get_atomic_op(intrin);
|
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
|
||||||
|
|
||||||
assert!(intrin.def.num_components() == 1);
|
assert!(intrin.def.num_components() == 1);
|
||||||
let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32));
|
let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32));
|
||||||
@@ -2366,7 +2370,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||||||
addr: addr,
|
addr: addr,
|
||||||
cmpr: cmpr,
|
cmpr: cmpr,
|
||||||
data: data,
|
data: data,
|
||||||
atom_op: AtomOp::CmpExch,
|
atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
|
||||||
atom_type: atom_type,
|
atom_type: atom_type,
|
||||||
addr_offset: offset,
|
addr_offset: offset,
|
||||||
mem_space: MemSpace::Global(MemAddrType::A64),
|
mem_space: MemSpace::Global(MemAddrType::A64),
|
||||||
@@ -2837,7 +2841,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||||||
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
|
||||||
let data = self.get_src(&srcs[1]);
|
let data = self.get_src(&srcs[1]);
|
||||||
let atom_type = self.get_atomic_type(intrin);
|
let atom_type = self.get_atomic_type(intrin);
|
||||||
let atom_op = self.get_atomic_op(intrin);
|
let atom_op = self.get_atomic_op(intrin, AtomCmpSrc::Separate);
|
||||||
|
|
||||||
assert!(intrin.def.num_components() == 1);
|
assert!(intrin.def.num_components() == 1);
|
||||||
let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32));
|
let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32));
|
||||||
@@ -2872,7 +2876,7 @@ impl<'a> ShaderFromNir<'a> {
|
|||||||
addr: addr,
|
addr: addr,
|
||||||
cmpr: cmpr,
|
cmpr: cmpr,
|
||||||
data: data,
|
data: data,
|
||||||
atom_op: AtomOp::CmpExch,
|
atom_op: AtomOp::CmpExch(AtomCmpSrc::Separate),
|
||||||
atom_type: atom_type,
|
atom_type: atom_type,
|
||||||
addr_offset: offset,
|
addr_offset: offset,
|
||||||
mem_space: MemSpace::Shared,
|
mem_space: MemSpace::Shared,
|
||||||
|
@@ -2239,6 +2239,14 @@ impl fmt::Display for AtomType {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
|
||||||
|
pub enum AtomCmpSrc {
|
||||||
|
/// The cmpr value is passed as a separate source
|
||||||
|
Separate,
|
||||||
|
/// The cmpr value is packed in with the data with cmpr coming first
|
||||||
|
Packed,
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
|
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
|
||||||
pub enum AtomOp {
|
pub enum AtomOp {
|
||||||
@@ -2251,7 +2259,7 @@ pub enum AtomOp {
|
|||||||
Or,
|
Or,
|
||||||
Xor,
|
Xor,
|
||||||
Exch,
|
Exch,
|
||||||
CmpExch,
|
CmpExch(AtomCmpSrc),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for AtomOp {
|
impl fmt::Display for AtomOp {
|
||||||
@@ -2266,7 +2274,8 @@ impl fmt::Display for AtomOp {
|
|||||||
AtomOp::Or => write!(f, ".or"),
|
AtomOp::Or => write!(f, ".or"),
|
||||||
AtomOp::Xor => write!(f, ".xor"),
|
AtomOp::Xor => write!(f, ".xor"),
|
||||||
AtomOp::Exch => write!(f, ".exch"),
|
AtomOp::Exch => write!(f, ".exch"),
|
||||||
AtomOp::CmpExch => write!(f, ".cmpexch"),
|
AtomOp::CmpExch(AtomCmpSrc::Separate) => write!(f, ".cmpexch"),
|
||||||
|
AtomOp::CmpExch(AtomCmpSrc::Packed) => write!(f, ".cmpexch.packed"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -4369,7 +4378,11 @@ impl DisplayOp for OpAtom {
|
|||||||
}
|
}
|
||||||
write!(f, "{:#x}", self.addr_offset)?;
|
write!(f, "{:#x}", self.addr_offset)?;
|
||||||
}
|
}
|
||||||
write!(f, "] {}", self.data)
|
write!(f, "]")?;
|
||||||
|
if self.atom_op == AtomOp::CmpExch(AtomCmpSrc::Separate) {
|
||||||
|
write!(f, " {}", self.cmpr)?;
|
||||||
|
}
|
||||||
|
write!(f, " {}", self.data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl_display_for_op!(OpAtom);
|
impl_display_for_op!(OpAtom);
|
||||||
|
@@ -2213,7 +2213,7 @@ impl SM50Encoder<'_> {
|
|||||||
AtomOp::Or => 6_u8,
|
AtomOp::Or => 6_u8,
|
||||||
AtomOp::Xor => 7_u8,
|
AtomOp::Xor => 7_u8,
|
||||||
AtomOp::Exch => 8_u8,
|
AtomOp::Exch => 8_u8,
|
||||||
AtomOp::CmpExch => panic!("CmpXchg not yet supported"),
|
AtomOp::CmpExch(_) => panic!("CmpXchg not yet supported"),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -2228,8 +2228,9 @@ impl SM50Op for OpSuAtom {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn encode(&self, e: &mut SM50Encoder<'_>) {
|
fn encode(&self, e: &mut SM50Encoder<'_>) {
|
||||||
if matches!(self.atom_op, AtomOp::CmpExch) {
|
if let AtomOp::CmpExch(cmp_src) = self.atom_op {
|
||||||
e.set_opcode(0xeac0);
|
e.set_opcode(0xeac0);
|
||||||
|
assert!(cmp_src == AtomCmpSrc::Packed);
|
||||||
} else {
|
} else {
|
||||||
e.set_opcode(0xea60);
|
e.set_opcode(0xea60);
|
||||||
}
|
}
|
||||||
@@ -2253,7 +2254,7 @@ impl SM50Op for OpSuAtom {
|
|||||||
AtomOp::Or => 6,
|
AtomOp::Or => 6,
|
||||||
AtomOp::Xor => 7,
|
AtomOp::Xor => 7,
|
||||||
AtomOp::Exch => 8,
|
AtomOp::Exch => 8,
|
||||||
AtomOp::CmpExch => 0,
|
AtomOp::CmpExch(_) => 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
e.set_image_dim(33..36, self.image_dim);
|
e.set_image_dim(33..36, self.image_dim);
|
||||||
|
@@ -2585,8 +2585,9 @@ impl SM70Op for OpSuAtom {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn encode(&self, e: &mut SM70Encoder<'_>) {
|
fn encode(&self, e: &mut SM70Encoder<'_>) {
|
||||||
if matches!(self.atom_op, AtomOp::CmpExch) {
|
if let AtomOp::CmpExch(cmp_src) = self.atom_op {
|
||||||
e.set_opcode(0x396);
|
e.set_opcode(0x396);
|
||||||
|
assert!(cmp_src == AtomCmpSrc::Packed);
|
||||||
} else {
|
} else {
|
||||||
e.set_opcode(0x394);
|
e.set_opcode(0x394);
|
||||||
};
|
};
|
||||||
@@ -2759,7 +2760,7 @@ impl SM70Encoder<'_> {
|
|||||||
self.set_field(
|
self.set_field(
|
||||||
range,
|
range,
|
||||||
match atom_op {
|
match atom_op {
|
||||||
AtomOp::Add | AtomOp::CmpExch => 0_u8,
|
AtomOp::Add | AtomOp::CmpExch(_) => 0_u8,
|
||||||
AtomOp::Min => 1_u8,
|
AtomOp::Min => 1_u8,
|
||||||
AtomOp::Max => 2_u8,
|
AtomOp::Max => 2_u8,
|
||||||
AtomOp::Inc => 3_u8,
|
AtomOp::Inc => 3_u8,
|
||||||
@@ -2797,9 +2798,10 @@ impl SM70Op for OpAtom {
|
|||||||
fn encode(&self, e: &mut SM70Encoder<'_>) {
|
fn encode(&self, e: &mut SM70Encoder<'_>) {
|
||||||
match self.mem_space {
|
match self.mem_space {
|
||||||
MemSpace::Global(_) => {
|
MemSpace::Global(_) => {
|
||||||
if self.atom_op == AtomOp::CmpExch {
|
if let AtomOp::CmpExch(cmp_src) = self.atom_op {
|
||||||
e.set_opcode(0x3a9);
|
e.set_opcode(0x3a9);
|
||||||
|
|
||||||
|
assert!(cmp_src == AtomCmpSrc::Separate);
|
||||||
e.set_reg_src(32..40, self.cmpr);
|
e.set_reg_src(32..40, self.cmpr);
|
||||||
e.set_reg_src(64..72, self.data);
|
e.set_reg_src(64..72, self.data);
|
||||||
} else {
|
} else {
|
||||||
@@ -2824,9 +2826,10 @@ impl SM70Op for OpAtom {
|
|||||||
}
|
}
|
||||||
MemSpace::Local => panic!("Atomics do not support local"),
|
MemSpace::Local => panic!("Atomics do not support local"),
|
||||||
MemSpace::Shared => {
|
MemSpace::Shared => {
|
||||||
if self.atom_op == AtomOp::CmpExch {
|
if let AtomOp::CmpExch(cmp_src) = self.atom_op {
|
||||||
e.set_opcode(0x38d);
|
e.set_opcode(0x38d);
|
||||||
|
|
||||||
|
assert!(cmp_src == AtomCmpSrc::Separate);
|
||||||
e.set_reg_src(32..40, self.cmpr);
|
e.set_reg_src(32..40, self.cmpr);
|
||||||
e.set_reg_src(64..72, self.data);
|
e.set_reg_src(64..72, self.data);
|
||||||
} else {
|
} else {
|
||||||
|
Reference in New Issue
Block a user