ac/nir,radv: add 1 dword to ES/GS item size

This reduce LDS bank conflict and align with radeonsi,
so we don't assume LDS access 16 byte aligned for both
driver.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23314>
This commit is contained in:
Qiang Yu
2023-05-31 16:33:34 +08:00
committed by Marge Bot
parent 2e1092095a
commit b44bbe7daa
2 changed files with 8 additions and 4 deletions

View File

@@ -166,8 +166,7 @@ lower_es_output_store(nir_builder *b,
/* GFX9+: ES is merged into GS, data is passed through LDS. */
nir_ssa_def *vertex_idx = nir_build_load_local_invocation_index(b);
nir_ssa_def *off = nir_iadd(b, nir_imul_imm(b, vertex_idx, st->esgs_itemsize), io_off);
nir_build_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask,
.align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u);
nir_build_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask);
}
nir_instr_remove(instr);
@@ -273,8 +272,7 @@ lower_gs_per_vertex_input_load(nir_builder *b,
nir_ssa_def *off = gs_per_vertex_input_offset(b, st, intrin);
if (st->gfx_level >= GFX9)
return nir_build_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off,
.align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u);
return nir_build_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off);
unsigned wave_size = 64u; /* GFX6-8 only support wave64 */
nir_ssa_def *ring = nir_build_load_ring_esgs_amd(b);

View File

@@ -1496,6 +1496,12 @@ radv_link_shaders_info(struct radv_device *device,
/* Compute the ESGS item size for VS or TES as ES. */
producer->info.esgs_itemsize = num_outputs_written * 16;
/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
* conflicts, i.e. each vertex will start on a different bank.
*/
if (device->physical_device->rad_info.gfx_level >= GFX9 && producer->info.esgs_itemsize)
producer->info.esgs_itemsize += 4;
}
/* Compute NGG info (GFX10+) or GS info. */