From 24a74b352c8f2c06e47ce32192df5cbe125e77d7 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 23 Sep 2020 09:04:39 +0200 Subject: [PATCH] radv: add a tweak for PS wave CU utilization for gfx10.3 Ported from RadeonSI. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/vulkan/si_cmd_buffer.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 772f5c5928d..cd6cf23241a 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -286,6 +286,19 @@ si_emit_graphics(struct radv_device *device, radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0); } + unsigned cu_mask_ps = 0xffffffff; + + /* It's wasteful to enable all CUs for PS if shader arrays have a + * different number of CUs. The reason is that the hardware sends the + * same number of PS waves to each shader array, so the slowest shader + * array limits the performance. Disable the extra CUs for PS in + * other shader arrays to save power and thus increase clocks for busy + * CUs. In the future, we might disable or enable this tweak only for + * certain apps. + */ + if (physical_device->rad_info.chip_class >= GFX10_3) + cu_mask_ps = u_bit_consecutive(0, physical_device->rad_info.min_good_cu_per_sa); + if (physical_device->rad_info.chip_class >= GFX7) { if (physical_device->rad_info.chip_class >= GFX10) { /* Logical CUs 16 - 31 */ @@ -294,7 +307,7 @@ si_emit_graphics(struct radv_device *device, radeon_set_sh_reg_idx(physical_device, cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS, 3, S_00B104_CU_EN(0xffff)); radeon_set_sh_reg_idx(physical_device, cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, - 3, S_00B004_CU_EN(0xffff)); + 3, S_00B004_CU_EN(cu_mask_ps >> 16)); } if (physical_device->rad_info.chip_class >= GFX9) { @@ -392,7 +405,7 @@ si_emit_graphics(struct radv_device *device, } radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, - 3, S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F)); + 3, S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F)); } if (physical_device->rad_info.chip_class >= GFX10) {