nak: Implement nir_intrinsic_ldcx_nv
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29591>
This commit is contained in:

committed by
Marge Bot

parent
851b3ddd05
commit
dc7b08c41a
@@ -310,8 +310,12 @@ impl<'a> ShaderFromNir<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ssa_ref(&mut self, src: &nir_src) -> SSARef {
|
||||
SSARef::try_from(self.get_ssa(src.as_def())).unwrap()
|
||||
}
|
||||
|
||||
fn get_src(&mut self, src: &nir_src) -> Src {
|
||||
SSARef::try_from(self.get_ssa(src.as_def())).unwrap().into()
|
||||
self.get_ssa_ref(src).into()
|
||||
}
|
||||
|
||||
fn get_io_addr_offset(
|
||||
@@ -334,6 +338,15 @@ impl<'a> ShaderFromNir<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_cbuf_addr_offset(&mut self, addr: &nir_src) -> (Src, u16) {
|
||||
let (off, off_imm) = self.get_io_addr_offset(addr, 16);
|
||||
if let Ok(off_imm_u16) = u16::try_from(off_imm) {
|
||||
(off, off_imm_u16)
|
||||
} else {
|
||||
(self.get_src(addr), 0)
|
||||
}
|
||||
}
|
||||
|
||||
fn set_dst(&mut self, def: &nir_def, ssa: SSARef) {
|
||||
self.set_ssa(def, (*ssa).into());
|
||||
}
|
||||
@@ -2611,13 +2624,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||
(intrin.def.bit_size() / 8) * intrin.def.num_components();
|
||||
let idx = &srcs[0];
|
||||
|
||||
let (off, off_imm) = self.get_io_addr_offset(&srcs[1], 16);
|
||||
let (off, off_imm) =
|
||||
if let Ok(off_imm_u16) = u16::try_from(off_imm) {
|
||||
(off, off_imm_u16)
|
||||
} else {
|
||||
(self.get_src(&srcs[1]), 0)
|
||||
};
|
||||
let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]);
|
||||
|
||||
let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4));
|
||||
|
||||
@@ -2666,6 +2673,35 @@ impl<'a> ShaderFromNir<'a> {
|
||||
}
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
nir_intrinsic_ldcx_nv => {
|
||||
let size_B =
|
||||
(intrin.def.bit_size() / 8) * intrin.def.num_components();
|
||||
|
||||
let handle = self.get_ssa_ref(&srcs[0]);
|
||||
let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]);
|
||||
|
||||
let cb = CBufRef {
|
||||
buf: CBuf::BindlessSSA(handle),
|
||||
offset: off_imm,
|
||||
};
|
||||
|
||||
let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4));
|
||||
if off.is_zero() {
|
||||
for (i, comp) in dst.iter().enumerate() {
|
||||
let i = u16::try_from(i).unwrap();
|
||||
b.copy_to((*comp).into(), cb.offset(i * 4).into());
|
||||
}
|
||||
} else {
|
||||
b.push_op(OpLdc {
|
||||
dst: dst.into(),
|
||||
cb: cb.into(),
|
||||
offset: off,
|
||||
mode: LdcMode::Indexed,
|
||||
mem_type: MemType::from_size(size_B, false),
|
||||
});
|
||||
}
|
||||
self.set_dst(&intrin.def, dst);
|
||||
}
|
||||
nir_intrinsic_pin_cx_handle_nv => {
|
||||
let handle = self.get_ssa_ref(&srcs[0]);
|
||||
b.push_op(OpPin {
|
||||
|
@@ -803,7 +803,8 @@ nak_mem_vectorize_cb(unsigned align_mul, unsigned align_offset,
|
||||
assert(util_is_power_of_two_nonzero(align_mul));
|
||||
|
||||
unsigned max_bytes = 128u / 8u;
|
||||
if (low->intrinsic == nir_intrinsic_ldc_nv)
|
||||
if (low->intrinsic == nir_intrinsic_ldc_nv ||
|
||||
low->intrinsic == nir_intrinsic_ldcx_nv)
|
||||
max_bytes = 64u / 8u;
|
||||
|
||||
align_mul = MIN2(align_mul, max_bytes);
|
||||
@@ -830,10 +831,12 @@ nak_mem_access_size_align(nir_intrinsic_op intrin,
|
||||
|
||||
unsigned chunk_bytes = MIN3(bytes_pow2, align, 16);
|
||||
assert(util_is_power_of_two_nonzero(chunk_bytes));
|
||||
if (intrin == nir_intrinsic_ldc_nv)
|
||||
if (intrin == nir_intrinsic_ldc_nv ||
|
||||
intrin == nir_intrinsic_ldcx_nv)
|
||||
chunk_bytes = MIN2(chunk_bytes, 8);
|
||||
|
||||
if (intrin == nir_intrinsic_ldc_nv && align < 4) {
|
||||
if ((intrin == nir_intrinsic_ldc_nv ||
|
||||
intrin == nir_intrinsic_ldcx_nv) && align < 4) {
|
||||
/* CBufs require 4B alignment unless we're doing a ldc.u8 or ldc.i8.
|
||||
* In particular, this applies to ldc.u16 which means we either have to
|
||||
* fall back to two ldc.u8 or use ldc.u32 and shift stuff around to get
|
||||
|
Reference in New Issue
Block a user