radeonsi: increase gfx1100/gfx1101 physical vgprs

https://reviews.llvm.org/D134522

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18825>
This commit is contained in:
Rhys Perry
2022-10-13 18:40:15 +01:00
committed by Marge Bot
parent 50073d6135
commit 0004974467
2 changed files with 16 additions and 11 deletions

View File

@@ -118,16 +118,6 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wav
if (!conf->spi_ps_input_addr)
conf->spi_ps_input_addr = conf->spi_ps_input_ena;
/* GFX 10.3 internally:
* - aligns VGPRS to 16 for Wave32 and 8 for Wave64
* - aligns LDS to 1024
*
* For shader-db stats, set num_vgprs that the hw actually uses.
*/
if (info->gfx_level == GFX10_3) {
conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8);
}
/* Enable 64-bit and 16-bit denormals, because there is no performance
* cost.
*

View File

@@ -1058,10 +1058,25 @@ static void si_calculate_max_simd_waves(struct si_shader *shader)
}
if (conf->num_vgprs) {
/* GFX 10.3 internally:
* - aligns VGPRS to 16 for Wave32 and 8 for Wave64
* - aligns LDS to 1024
*
* For shader-db stats, set num_vgprs that the hw actually uses.
*/
unsigned num_vgprs = conf->num_vgprs;
if (sscreen->info.family == CHIP_GFX1100 || sscreen->info.family == CHIP_GFX1101) {
num_vgprs = util_align_npot(num_vgprs, shader->wave_size == 32 ? 24 : 12);
} else if (sscreen->info.gfx_level == GFX10_3) {
num_vgprs = align(num_vgprs, shader->wave_size == 32 ? 16 : 8);
} else {
num_vgprs = align(num_vgprs, shader->wave_size == 32 ? 8 : 4);
}
/* Always print wave limits as Wave64, so that we can compare
* Wave32 and Wave64 with shader-db fairly. */
unsigned max_vgprs = sscreen->info.num_physical_wave64_vgprs_per_simd;
max_simd_waves = MIN2(max_simd_waves, max_vgprs / conf->num_vgprs);
max_simd_waves = MIN2(max_simd_waves, max_vgprs / num_vgprs);
}
unsigned max_lds_per_simd = sscreen->info.lds_size_per_workgroup / 4;