radeonsi: remove the gl_SampleMask FS output if MSAA is disabled

It's better to remove the output than what the previous code did,
which only unset MASK_EXPORT_ENABLE.

Reviewed-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23216>
This commit is contained in:
Marek Olšák
2023-05-14 21:22:36 -04:00
parent 7b95be3575
commit 038fb6573a
7 changed files with 40 additions and 17 deletions

View File

@@ -298,6 +298,7 @@ typedef struct {
/* OpenGL only */
bool clamp_color;
bool alpha_to_one;
bool kill_samplemask;
enum pipe_compare_func alpha_func;
unsigned broadcast_last_cbuf;

View File

@@ -348,6 +348,8 @@ emit_ps_color_clamp_and_alpha_test(nir_builder *b, lower_ps_state *s)
static void
emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s)
{
uint64_t outputs_written = b->shader->info.outputs_written;
nir_ssa_def *mrtz_alpha = NULL;
if (s->options->alpha_to_coverage_via_mrtz) {
mrtz_alpha = s->outputs[FRAG_RESULT_COLOR][3] ?
@@ -359,11 +361,15 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s)
nir_ssa_def *stencil = s->outputs[FRAG_RESULT_STENCIL][0];
nir_ssa_def *sample_mask = s->outputs[FRAG_RESULT_SAMPLE_MASK][0];
if (s->options->kill_samplemask) {
sample_mask = NULL;
outputs_written &= ~BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
}
/* skip mrtz export if no one has written to any of them */
if (!depth && !stencil && !sample_mask && !mrtz_alpha)
return;
uint64_t outputs_written = b->shader->info.outputs_written;
/* use outputs_written to determine export format as we use it to set
* R_028710_SPI_SHADER_Z_FORMAT instead of relying on the real store output,
* because store output may be optimized out.

View File

@@ -704,7 +704,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args)
num_returns =
num_return_sgprs + util_bitcount(shader->selector->info.colors_written) * 4 +
shader->selector->info.writes_z + shader->selector->info.writes_stencil +
shader->selector->info.writes_samplemask + 1 /* SampleMaskIn */;
shader->ps.writes_samplemask + 1 /* SampleMaskIn */;
for (i = 0; i < num_return_sgprs; i++)
ac_add_return(&args->ac, AC_ARG_SGPR);
@@ -1149,7 +1149,7 @@ void si_shader_dump_stats_for_shader_db(struct si_screen *screen, struct si_shad
num_outputs = util_bitcount(shader->selector->info.colors_written) +
(shader->selector->info.writes_z ||
shader->selector->info.writes_stencil ||
shader->selector->info.writes_samplemask);
shader->ps.writes_samplemask);
}
util_debug_message(debug, SHADER_INFO,
@@ -1394,6 +1394,8 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
fprintf(f, " epilog.alpha_to_coverage_via_mrtz = %u\n", key->ps.part.epilog.alpha_to_coverage_via_mrtz);
fprintf(f, " epilog.clamp_color = %u\n", key->ps.part.epilog.clamp_color);
fprintf(f, " epilog.dual_src_blend_swizzle = %u\n", key->ps.part.epilog.dual_src_blend_swizzle);
fprintf(f, " epilog.rbplus_depth_only_opt = %u\n", key->ps.part.epilog.rbplus_depth_only_opt);
fprintf(f, " epilog.kill_samplemask = %u\n", key->ps.part.epilog.kill_samplemask);
fprintf(f, " mono.poly_line_smoothing = %u\n", key->ps.mono.poly_line_smoothing);
fprintf(f, " mono.point_smoothing = %u\n", key->ps.mono.point_smoothing);
fprintf(f, " mono.interpolate_at_sample_force_center = %u\n",
@@ -2232,6 +2234,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
.alpha_to_one = key->ps.part.epilog.alpha_to_one,
.alpha_func = key->ps.part.epilog.alpha_func,
.broadcast_last_cbuf = key->ps.part.epilog.last_cbuf,
.kill_samplemask = key->ps.part.epilog.kill_samplemask,
.bc_optimize_for_persp = key->ps.part.prolog.bc_optimize_for_persp,
.bc_optimize_for_linear = key->ps.part.prolog.bc_optimize_for_linear,
@@ -3076,7 +3079,8 @@ void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *ke
key->ps_epilog.color_types = info->output_color_types;
key->ps_epilog.writes_z = info->writes_z;
key->ps_epilog.writes_stencil = info->writes_stencil;
key->ps_epilog.writes_samplemask = info->writes_samplemask;
key->ps_epilog.writes_samplemask = info->writes_samplemask &&
!shader->key.ps.part.epilog.kill_samplemask;
key->ps_epilog.states = shader->key.ps.part.epilog;
}
@@ -3153,6 +3157,11 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
struct si_shader_selector *sel = shader->selector;
struct si_shader *mainp = *si_get_main_shader_part(sel, &shader->key);
if (sel->stage == MESA_SHADER_FRAGMENT) {
shader->ps.writes_samplemask = sel->info.writes_samplemask &&
!shader->key.ps.part.epilog.kill_samplemask;
}
/* LS, ES, VS are compiled on demand if the main part hasn't been
* compiled for that stage.
*

View File

@@ -606,6 +606,7 @@ struct si_ps_epilog_bits {
unsigned clamp_color : 1;
unsigned dual_src_blend_swizzle : 1; /* gfx11+ */
unsigned rbplus_depth_only_opt:1;
unsigned kill_samplemask:1;
};
union si_shader_part_key {
@@ -966,6 +967,7 @@ struct si_shader {
unsigned cb_shader_mask;
unsigned db_shader_control;
unsigned num_interp;
bool writes_samplemask;
} ps;
};

View File

@@ -1042,11 +1042,15 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
struct si_shader_selector *sel = shader->selector;
struct si_shader_context ctx;
enum ac_float_mode float_mode = nir->info.stage == MESA_SHADER_KERNEL ? AC_FLOAT_MODE_DEFAULT : AC_FLOAT_MODE_DEFAULT_OPENGL;
bool exports_color_null = false;
bool exports_mrtz = false;
bool exports_color_null = sel->info.colors_written;
bool exports_mrtz = sel->info.writes_z || sel->info.writes_stencil || sel->info.writes_samplemask;
if (!exports_mrtz && !exports_color_null)
exports_color_null = si_shader_uses_discard(shader) || sscreen->info.gfx_level < GFX10;
if (sel->stage == MESA_SHADER_FRAGMENT) {
exports_color_null = sel->info.colors_written;
exports_mrtz = sel->info.writes_z || sel->info.writes_stencil || shader->ps.writes_samplemask;
if (!exports_mrtz && !exports_color_null)
exports_color_null = si_shader_uses_discard(shader) || sscreen->info.gfx_level < GFX10;
}
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size, exports_color_null, exports_mrtz,
float_mode);

View File

@@ -1150,7 +1150,6 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
rs = (struct si_state_rasterizer *)sctx->discard_rasterizer_state;
if (old_rs->multisample_enable != rs->multisample_enable) {
si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
/* Update the small primitive filter workaround if necessary. */
@@ -1507,7 +1506,6 @@ void si_restore_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
static void si_emit_db_render_state(struct si_context *sctx)
{
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned db_shader_control, db_render_control, db_count_control, vrs_override_cntl = 0;
/* DB_RENDER_CONTROL */
@@ -1575,10 +1573,6 @@ static void si_emit_db_render_state(struct si_context *sctx)
db_shader_control = sctx->ps_db_shader_control;
/* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
if (!rs->multisample_enable)
db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
if (sctx->screen->info.has_export_conflict_bug &&
sctx->queued.named.blend->blend_enable_4bit &&
si_get_num_coverage_samples(sctx) == 1) {

View File

@@ -1803,7 +1803,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
/* DB_SHADER_CONTROL */
shader->ps.db_shader_control = S_02880C_Z_EXPORT_ENABLE(info->writes_z) |
S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->writes_stencil) |
S_02880C_MASK_EXPORT_ENABLE(info->writes_samplemask) |
S_02880C_MASK_EXPORT_ENABLE(shader->ps.writes_samplemask) |
S_02880C_KILL_ENABLE(si_shader_uses_discard(shader));
switch (info->base.fs.depth_layout) {
@@ -1887,7 +1887,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
shader->ps.spi_ps_input_addr = shader->config.spi_ps_input_addr;
shader->ps.num_interp = si_get_ps_num_interp(shader);
shader->ps.spi_shader_z_format =
ac_get_spi_shader_z_format(info->writes_z, info->writes_stencil, info->writes_samplemask,
ac_get_spi_shader_z_format(info->writes_z, info->writes_stencil, shader->ps.writes_samplemask,
shader->key.ps.part.epilog.alpha_to_coverage_via_mrtz);
/* Ensure that some export memory is always allocated, for two reasons:
@@ -1907,7 +1907,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
*
* RB+ depth-only rendering requires SPI_SHADER_32_R.
*/
bool has_mrtz = info->writes_z || info->writes_stencil || info->writes_samplemask;
bool has_mrtz = info->writes_z || info->writes_stencil || shader->ps.writes_samplemask;
if (!shader->ps.spi_shader_col_format) {
if (shader->key.ps.part.epilog.rbplus_depth_only_opt) {
@@ -2217,6 +2217,13 @@ void si_ps_key_update_framebuffer_blend_rasterizer(struct si_context *sctx)
sctx->gfx_level >= GFX11 && alpha_to_coverage &&
(sel->info.writes_z || sel->info.writes_stencil || sel->info.writes_samplemask);
/* Remove the gl_SampleMask fragment shader output if MSAA is disabled.
* This is required for correctness and it's also an optimization.
*/
key->ps.part.epilog.kill_samplemask = sel->info.writes_samplemask &&
(sctx->framebuffer.nr_samples <= 1 ||
!rs->multisample_enable);
/* If alpha-to-coverage isn't exported via MRTZ, set that we need to export alpha
* through MRT0.
*/