ac/nir,radv: add 1 dword to ES/GS item size
This reduce LDS bank conflict and align with radeonsi, so we don't assume LDS access 16 byte aligned for both driver. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23314>
This commit is contained in:
@@ -166,8 +166,7 @@ lower_es_output_store(nir_builder *b,
|
||||
/* GFX9+: ES is merged into GS, data is passed through LDS. */
|
||||
nir_ssa_def *vertex_idx = nir_build_load_local_invocation_index(b);
|
||||
nir_ssa_def *off = nir_iadd(b, nir_imul_imm(b, vertex_idx, st->esgs_itemsize), io_off);
|
||||
nir_build_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask,
|
||||
.align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u);
|
||||
nir_build_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask);
|
||||
}
|
||||
|
||||
nir_instr_remove(instr);
|
||||
@@ -273,8 +272,7 @@ lower_gs_per_vertex_input_load(nir_builder *b,
|
||||
nir_ssa_def *off = gs_per_vertex_input_offset(b, st, intrin);
|
||||
|
||||
if (st->gfx_level >= GFX9)
|
||||
return nir_build_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off,
|
||||
.align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u);
|
||||
return nir_build_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off);
|
||||
|
||||
unsigned wave_size = 64u; /* GFX6-8 only support wave64 */
|
||||
nir_ssa_def *ring = nir_build_load_ring_esgs_amd(b);
|
||||
|
@@ -1496,6 +1496,12 @@ radv_link_shaders_info(struct radv_device *device,
|
||||
|
||||
/* Compute the ESGS item size for VS or TES as ES. */
|
||||
producer->info.esgs_itemsize = num_outputs_written * 16;
|
||||
|
||||
/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
|
||||
* conflicts, i.e. each vertex will start on a different bank.
|
||||
*/
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9 && producer->info.esgs_itemsize)
|
||||
producer->info.esgs_itemsize += 4;
|
||||
}
|
||||
|
||||
/* Compute NGG info (GFX10+) or GS info. */
|
||||
|
Reference in New Issue
Block a user