From dc7b08c41a5d8f077ff01e7d840b4152ffe5909b Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Fri, 24 May 2024 16:14:27 -0500 Subject: [PATCH] nak: Implement nir_intrinsic_ldcx_nv Part-of: --- src/nouveau/compiler/nak/from_nir.rs | 52 +++++++++++++++++++++++----- src/nouveau/compiler/nak_nir.c | 9 +++-- 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 5564cd58711..8bfd5532bb3 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -310,8 +310,12 @@ impl<'a> ShaderFromNir<'a> { } } + fn get_ssa_ref(&mut self, src: &nir_src) -> SSARef { + SSARef::try_from(self.get_ssa(src.as_def())).unwrap() + } + fn get_src(&mut self, src: &nir_src) -> Src { - SSARef::try_from(self.get_ssa(src.as_def())).unwrap().into() + self.get_ssa_ref(src).into() } fn get_io_addr_offset( @@ -334,6 +338,15 @@ impl<'a> ShaderFromNir<'a> { } } + fn get_cbuf_addr_offset(&mut self, addr: &nir_src) -> (Src, u16) { + let (off, off_imm) = self.get_io_addr_offset(addr, 16); + if let Ok(off_imm_u16) = u16::try_from(off_imm) { + (off, off_imm_u16) + } else { + (self.get_src(addr), 0) + } + } + fn set_dst(&mut self, def: &nir_def, ssa: SSARef) { self.set_ssa(def, (*ssa).into()); } @@ -2611,13 +2624,7 @@ impl<'a> ShaderFromNir<'a> { (intrin.def.bit_size() / 8) * intrin.def.num_components(); let idx = &srcs[0]; - let (off, off_imm) = self.get_io_addr_offset(&srcs[1], 16); - let (off, off_imm) = - if let Ok(off_imm_u16) = u16::try_from(off_imm) { - (off, off_imm_u16) - } else { - (self.get_src(&srcs[1]), 0) - }; + let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]); let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4)); @@ -2666,6 +2673,35 @@ impl<'a> ShaderFromNir<'a> { } self.set_dst(&intrin.def, dst); } + nir_intrinsic_ldcx_nv => { + let size_B = + (intrin.def.bit_size() / 8) * intrin.def.num_components(); + + let handle = self.get_ssa_ref(&srcs[0]); + let (off, off_imm) = self.get_cbuf_addr_offset(&srcs[1]); + + let cb = CBufRef { + buf: CBuf::BindlessSSA(handle), + offset: off_imm, + }; + + let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4)); + if off.is_zero() { + for (i, comp) in dst.iter().enumerate() { + let i = u16::try_from(i).unwrap(); + b.copy_to((*comp).into(), cb.offset(i * 4).into()); + } + } else { + b.push_op(OpLdc { + dst: dst.into(), + cb: cb.into(), + offset: off, + mode: LdcMode::Indexed, + mem_type: MemType::from_size(size_B, false), + }); + } + self.set_dst(&intrin.def, dst); + } nir_intrinsic_pin_cx_handle_nv => { let handle = self.get_ssa_ref(&srcs[0]); b.push_op(OpPin { diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 4b226d1c7aa..ac5eb4d47e4 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -803,7 +803,8 @@ nak_mem_vectorize_cb(unsigned align_mul, unsigned align_offset, assert(util_is_power_of_two_nonzero(align_mul)); unsigned max_bytes = 128u / 8u; - if (low->intrinsic == nir_intrinsic_ldc_nv) + if (low->intrinsic == nir_intrinsic_ldc_nv || + low->intrinsic == nir_intrinsic_ldcx_nv) max_bytes = 64u / 8u; align_mul = MIN2(align_mul, max_bytes); @@ -830,10 +831,12 @@ nak_mem_access_size_align(nir_intrinsic_op intrin, unsigned chunk_bytes = MIN3(bytes_pow2, align, 16); assert(util_is_power_of_two_nonzero(chunk_bytes)); - if (intrin == nir_intrinsic_ldc_nv) + if (intrin == nir_intrinsic_ldc_nv || + intrin == nir_intrinsic_ldcx_nv) chunk_bytes = MIN2(chunk_bytes, 8); - if (intrin == nir_intrinsic_ldc_nv && align < 4) { + if ((intrin == nir_intrinsic_ldc_nv || + intrin == nir_intrinsic_ldcx_nv) && align < 4) { /* CBufs require 4B alignment unless we're doing a ldc.u8 or ldc.i8. * In particular, this applies to ldc.u16 which means we either have to * fall back to two ldc.u8 or use ldc.u32 and shift stuff around to get