nak: Implement nir_intrinsic_ldcx_nv

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29591>
This commit is contained in:
Faith Ekstrand
2024-05-24 16:14:27 -05:00
committed by Marge Bot
parent 851b3ddd05
commit dc7b08c41a
2 changed files with 50 additions and 11 deletions

View File

@@ -310,8 +310,12 @@ impl<'a> ShaderFromNir<'a> {
}
}
fn get_ssa_ref(&mut self, src: &nir_src) -> SSARef {
SSARef::try_from(self.get_ssa(src.as_def())).unwrap()
}
fn get_src(&mut self, src: &nir_src) -> Src {
SSARef::try_from(self.get_ssa(src.as_def())).unwrap().into()
self.get_ssa_ref(src).into()
}
fn get_io_addr_offset(
@@ -334,6 +338,15 @@ impl<'a> ShaderFromNir<'a> {
}
}
fn get_cbuf_addr_offset(&mut self, addr: &nir_src) -> (Src, u16) {
let (off, off_imm) = self.get_io_addr_offset(addr, 16);
if let Ok(off_imm_u16) = u16::try_from(off_imm) {
(off, off_imm_u16)
} else {
(self.get_src(addr), 0)
}
}
fn set_dst(&mut self, def: &nir_def, ssa: SSARef) {
self.set_ssa(def, (*ssa).into());
}
@@ -2611,13 +2624,7 @@ impl<'a> ShaderFromNir<'a> {
(intrin.def.bit_size() / 8) * intrin.def.num_components();
let idx = &srcs[0];
let (off, off_imm) = self.get_io_addr_offset(&srcs[1], 16);
let (off, off_imm) =
if let Ok(off_imm_u16) = u16::try_from(off_imm) {
(off, off_imm_u16)
} else {
(self.get_src(&srcs[1]), 0)
};
let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]);
let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4));
@@ -2666,6 +2673,35 @@ impl<'a> ShaderFromNir<'a> {
}
self.set_dst(&intrin.def, dst);
}
nir_intrinsic_ldcx_nv => {
let size_B =
(intrin.def.bit_size() / 8) * intrin.def.num_components();
let handle = self.get_ssa_ref(&srcs[0]);
let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]);
let cb = CBufRef {
buf: CBuf::BindlessSSA(handle),
offset: off_imm,
};
let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4));
if off.is_zero() {
for (i, comp) in dst.iter().enumerate() {
let i = u16::try_from(i).unwrap();
b.copy_to((*comp).into(), cb.offset(i * 4).into());
}
} else {
b.push_op(OpLdc {
dst: dst.into(),
cb: cb.into(),
offset: off,
mode: LdcMode::Indexed,
mem_type: MemType::from_size(size_B, false),
});
}
self.set_dst(&intrin.def, dst);
}
nir_intrinsic_pin_cx_handle_nv => {
let handle = self.get_ssa_ref(&srcs[0]);
b.push_op(OpPin {

View File

@@ -803,7 +803,8 @@ nak_mem_vectorize_cb(unsigned align_mul, unsigned align_offset,
assert(util_is_power_of_two_nonzero(align_mul));
unsigned max_bytes = 128u / 8u;
if (low->intrinsic == nir_intrinsic_ldc_nv)
if (low->intrinsic == nir_intrinsic_ldc_nv ||
low->intrinsic == nir_intrinsic_ldcx_nv)
max_bytes = 64u / 8u;
align_mul = MIN2(align_mul, max_bytes);
@@ -830,10 +831,12 @@ nak_mem_access_size_align(nir_intrinsic_op intrin,
unsigned chunk_bytes = MIN3(bytes_pow2, align, 16);
assert(util_is_power_of_two_nonzero(chunk_bytes));
if (intrin == nir_intrinsic_ldc_nv)
if (intrin == nir_intrinsic_ldc_nv ||
intrin == nir_intrinsic_ldcx_nv)
chunk_bytes = MIN2(chunk_bytes, 8);
if (intrin == nir_intrinsic_ldc_nv && align < 4) {
if ((intrin == nir_intrinsic_ldc_nv ||
intrin == nir_intrinsic_ldcx_nv) && align < 4) {
/* CBufs require 4B alignment unless we're doing a ldc.u8 or ldc.i8.
* In particular, this applies to ldc.u16 which means we either have to
* fall back to two ldc.u8 or use ldc.u32 and shift stuff around to get