radeonsi: tweak LATE_ALLOC_GS numbers for faster NGG culling

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7172>
This commit is contained in:
Marek Olšák
2020-10-16 05:02:58 -04:00
committed by Marge Bot
parent b38b5aabee
commit bc3c74e2a2

View File

@@ -1168,15 +1168,18 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
late_alloc_wave64 = 0;
else if (num_cu_per_sh <= 6)
late_alloc_wave64 = num_cu_per_sh - 2; /* All CUs enabled */
else if (shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL)
late_alloc_wave64 = (num_cu_per_sh - 2) * 6;
else if (shader->key.opt.ngg_culling)
late_alloc_wave64 = num_cu_per_sh * 10;
else
late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
late_alloc_wave64 = num_cu_per_sh * 4;
/* Limit LATE_ALLOC_GS for prevent a hang (hw bug). */
if (sscreen->info.chip_class == GFX10)
late_alloc_wave64 = MIN2(late_alloc_wave64, 64);
/* Max number that fits into the register field. */
late_alloc_wave64 = MIN2(late_alloc_wave64, 127);
si_pm4_set_reg(
pm4, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64));