radv: compact MRTs to save PS export memory space

If there are holes between color outputs (e.g. a shader exports MRT1,
but not MRT0), we can remove the holes by moving higher MRTs lower. The
hardware will remap the MRTs to their correct locations if we remove
holes in SPI_SHADER_COL_FORMAT but not CB_SHADER_MASK. This is good for
performance because the hardware will allocate less space for color
MRTs.

This also allows to remove even more unused color exports because we no
longer need to force previous targets to be non-zero. Only SotTR seems
affected from our fossils db.

fossils-db (NAVI21):
Totals from 859 (0.64% of 134913) affected shaders:
VGPRs: 24328 -> 24216 (-0.46%)
CodeSize: 1433276 -> 1422576 (-0.75%)
Instrs: 255275 -> 253728 (-0.61%)
Latency: 1666836 -> 1661544 (-0.32%)
InvThroughput: 346038 -> 343406 (-0.76%)
Copies: 16520 -> 16506 (-0.08%)
PreSGPRs: 25934 -> 25920 (-0.05%)
PreVGPRs: 19903 -> 19662 (-1.21%)

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5786>
This commit is contained in:
Samuel Pitoiset
2022-08-15 11:59:03 +02:00
committed by Marge Bot
parent 49c7d28b0b
commit 8fcb4aa0eb
3 changed files with 55 additions and 26 deletions

View File

@@ -578,7 +578,8 @@ scan_shader_output_decl(struct radv_shader_context *ctx, struct nir_variable *va
/* Initialize arguments for the shader export intrinsic */
static void
si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values,
unsigned enabled_channels, unsigned target, struct ac_export_args *args)
unsigned enabled_channels, unsigned target, unsigned index,
struct ac_export_args *args)
{
/* Specify the channels that are enabled. */
args->enabled_channels = enabled_channels;
@@ -603,7 +604,6 @@ si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values,
bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
if (ctx->stage == MESA_SHADER_FRAGMENT) {
unsigned index = target - V_008DFC_SQ_EXP_MRT;
unsigned col_format = (ctx->options->key.ps.col_format >> (4 * index)) & 0xf;
bool is_int8 = (ctx->options->key.ps.is_int8 >> index) & 1;
bool is_int10 = (ctx->options->key.ps.is_int10 >> index) & 1;
@@ -743,7 +743,7 @@ radv_export_param(struct radv_shader_context *ctx, unsigned index, LLVMValueRef
{
struct ac_export_args args;
si_llvm_init_export_args(ctx, values, enabled_channels, V_008DFC_SQ_EXP_PARAM + index, &args);
si_llvm_init_export_args(ctx, values, enabled_channels, V_008DFC_SQ_EXP_PARAM + index, 0, &args);
ac_build_export(&ctx->ac, &args);
}
@@ -915,7 +915,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
for (i = 0; i < noutput; i++) {
switch (outputs[i].slot_name) {
case VARYING_SLOT_POS:
si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS, &pos_args[0]);
si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS, 0, &pos_args[0]);
break;
case VARYING_SLOT_PSIZ:
psize_value = outputs[i].values[0];
@@ -932,7 +932,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
case VARYING_SLOT_CLIP_DIST0:
case VARYING_SLOT_CLIP_DIST1:
index = 2 + outputs[i].slot_index;
si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS + index,
si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS + index, 0,
&pos_args[index]);
break;
default:
@@ -1064,11 +1064,11 @@ handle_vs_outputs_post(struct radv_shader_context *ctx)
}
static bool
si_export_mrt_color(struct radv_shader_context *ctx, LLVMValueRef *color, unsigned index,
struct ac_export_args *args)
si_export_mrt_color(struct radv_shader_context *ctx, LLVMValueRef *color, unsigned target,
unsigned index, struct ac_export_args *args)
{
/* Export */
si_llvm_init_export_args(ctx, color, 0xf, V_008DFC_SQ_EXP_MRT + index, args);
si_llvm_init_export_args(ctx, color, 0xf, V_008DFC_SQ_EXP_MRT + target, index, args);
if (!args->enabled_channels)
return false; /* unnecessary NULL export */
@@ -1105,7 +1105,7 @@ handle_fs_outputs_post(struct radv_shader_context *ctx)
for (unsigned j = 0; j < 4; j++)
values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j));
bool ret = si_export_mrt_color(ctx, values, i - FRAG_RESULT_DATA0, &color_args[index]);
bool ret = si_export_mrt_color(ctx, values, index, i - FRAG_RESULT_DATA0, &color_args[index]);
if (ret)
index++;
}