radv: compact MRTs to save PS export memory space
If there are holes between color outputs (e.g. a shader exports MRT1, but not MRT0), we can remove the holes by moving higher MRTs lower. The hardware will remap the MRTs to their correct locations if we remove holes in SPI_SHADER_COL_FORMAT but not CB_SHADER_MASK. This is good for performance because the hardware will allocate less space for color MRTs. This also allows to remove even more unused color exports because we no longer need to force previous targets to be non-zero. Only SotTR seems affected from our fossils db. fossils-db (NAVI21): Totals from 859 (0.64% of 134913) affected shaders: VGPRs: 24328 -> 24216 (-0.46%) CodeSize: 1433276 -> 1422576 (-0.75%) Instrs: 255275 -> 253728 (-0.61%) Latency: 1666836 -> 1661544 (-0.32%) InvThroughput: 346038 -> 343406 (-0.76%) Copies: 16520 -> 16506 (-0.08%) PreSGPRs: 25934 -> 25920 (-0.05%) PreVGPRs: 19903 -> 19662 (-1.21%) Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5786>
This commit is contained in:

committed by
Marge Bot

parent
49c7d28b0b
commit
8fcb4aa0eb
@@ -533,13 +533,41 @@ format_is_float32(VkFormat format)
|
||||
desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[channel].size == 32;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
radv_compact_spi_shader_col_format(const struct radv_shader *ps,
|
||||
const struct radv_blend_state *blend)
|
||||
{
|
||||
unsigned spi_shader_col_format = blend->spi_shader_col_format;
|
||||
unsigned value = 0, num_mrts = 0;
|
||||
unsigned i, num_targets;
|
||||
|
||||
/* Make sure to clear color attachments without exports because MRT holes are removed during
|
||||
* compilation for optimal performance.
|
||||
*/
|
||||
spi_shader_col_format &= ps->info.ps.colors_written;
|
||||
|
||||
/* Compute the number of MRTs. */
|
||||
num_targets = DIV_ROUND_UP(util_last_bit(spi_shader_col_format), 4);
|
||||
|
||||
/* Remove holes in spi_shader_col_format. */
|
||||
for (i = 0; i < num_targets; i++) {
|
||||
unsigned spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
|
||||
|
||||
if (spi_format) {
|
||||
value |= spi_format << (num_mrts * 4);
|
||||
num_mrts++;
|
||||
}
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_pipeline_compute_spi_color_formats(const struct radv_graphics_pipeline *pipeline,
|
||||
struct radv_blend_state *blend,
|
||||
const struct vk_graphics_pipeline_state *state)
|
||||
{
|
||||
unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0;
|
||||
unsigned num_targets;
|
||||
|
||||
for (unsigned i = 0; i < state->rp->color_attachment_count; ++i) {
|
||||
unsigned cf;
|
||||
@@ -572,16 +600,6 @@ radv_pipeline_compute_spi_color_formats(const struct radv_graphics_pipeline *pip
|
||||
col_format |= V_028714_SPI_SHADER_32_AR;
|
||||
}
|
||||
|
||||
/* If the i-th target format is set, all previous target formats must
|
||||
* be non-zero to avoid hangs.
|
||||
*/
|
||||
num_targets = (util_last_bit(col_format) + 3) / 4;
|
||||
for (unsigned i = 0; i < num_targets; i++) {
|
||||
if (!(col_format & (0xfu << (i * 4)))) {
|
||||
col_format |= V_028714_SPI_SHADER_32_R << (i * 4);
|
||||
}
|
||||
}
|
||||
|
||||
/* The output for dual source blending should have the same format as
|
||||
* the first output.
|
||||
*/
|
||||
@@ -6030,6 +6048,9 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10_3)
|
||||
gfx103_pipeline_init_vrs_state(pipeline, &state);
|
||||
|
||||
struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
|
||||
blend.spi_shader_col_format = radv_compact_spi_shader_col_format(ps, &blend);
|
||||
|
||||
/* Ensure that some export memory is always allocated, for two reasons:
|
||||
*
|
||||
* 1) Correctness: The hardware ignores the EXEC mask if no export
|
||||
@@ -6045,7 +6066,6 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv
|
||||
* color and Z formats to SPI_SHADER_ZERO. The hw will skip export
|
||||
* instructions if any are present.
|
||||
*/
|
||||
struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
|
||||
if ((device->physical_device->rad_info.gfx_level <= GFX9 || ps->info.ps.can_discard) &&
|
||||
!blend.spi_shader_col_format) {
|
||||
if (!ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask)
|
||||
|
Reference in New Issue
Block a user