ir3: remove unused outputs for binning pass in NIR

We used to remove the unused outputs for the binning pass in ir3. Even
though DCE would be able to clean up most of the mess, there could still
be some left: most notably, unused control flow would not be cleaned up
(leaving some weird branches to empty blocks). Instead of teaching ir3
how to clean this up, it makes more sense to do this at the NIR level,
where we can also benefit from other optimizations.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30256>
This commit is contained in:
Job Noorman
2024-08-16 08:19:26 +02:00
committed by Marge Bot
parent bec7e79f33
commit 24b422dc3e
2 changed files with 39 additions and 66 deletions

View File

@@ -5290,14 +5290,6 @@ fixup_tg4(struct ir3_context *ctx)
}
}
static bool
output_slot_used_for_binning(gl_varying_slot slot)
{
return slot == VARYING_SLOT_POS || slot == VARYING_SLOT_PSIZ ||
slot == VARYING_SLOT_CLIP_DIST0 || slot == VARYING_SLOT_CLIP_DIST1 ||
slot == VARYING_SLOT_VIEWPORT;
}
static struct ir3_instruction *
find_end(struct ir3 *ir)
{
@@ -5310,48 +5302,6 @@ find_end(struct ir3 *ir)
unreachable("couldn't find end instruction");
}
static void
fixup_binning_pass(struct ir3_context *ctx, struct ir3_instruction *end)
{
struct ir3_shader_variant *so = ctx->so;
unsigned i, j;
/* first pass, remove unused outputs from the IR level outputs: */
for (i = 0, j = 0; i < end->srcs_count; i++) {
unsigned outidx = end->end.outidxs[i];
unsigned slot = so->outputs[outidx].slot;
if (output_slot_used_for_binning(slot)) {
end->srcs[j] = end->srcs[i];
end->end.outidxs[j] = end->end.outidxs[i];
j++;
}
}
end->srcs_count = j;
/* second pass, cleanup the unused slots in ir3_shader_variant::outputs
* table:
*/
for (i = 0, j = 0; i < so->outputs_count; i++) {
unsigned slot = so->outputs[i].slot;
if (output_slot_used_for_binning(slot)) {
so->outputs[j] = so->outputs[i];
/* fixup outidx to point to new output table entry: */
for (unsigned k = 0; k < end->srcs_count; k++) {
if (end->end.outidxs[k] == i) {
end->end.outidxs[k] = j;
break;
}
}
j++;
}
}
so->outputs_count = j;
}
static void
collect_tex_prefetches(struct ir3_context *ctx, struct ir3 *ir)
{
@@ -5601,10 +5551,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
memcpy(end->end.outidxs, outidxs, sizeof(unsigned) * outputs_count);
array_insert(ctx->block, ctx->block->keeps, end);
/* at this point, for binning pass, throw away unneeded outputs: */
if (so->binning_pass && (ctx->compiler->gen < 6))
fixup_binning_pass(ctx, end);
}
if (so->type == MESA_SHADER_FRAGMENT &&
@@ -5637,18 +5583,6 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
progress |= IR3_PASS(ir, ir3_shared_fold);
} while (progress);
/* at this point, for binning pass, throw away unneeded outputs:
* Note that for a6xx and later, we do this after ir3_cp to ensure
* that the uniform/constant layout for BS and VS matches, so that
* we can re-use same VS_CONST state group.
*/
if (so->binning_pass && (ctx->compiler->gen >= 6)) {
fixup_binning_pass(ctx, find_end(ctx->so->ir));
/* cleanup the result of removing unneeded outputs: */
while (IR3_PASS(ir, ir3_dce, so)) {
}
}
IR3_PASS(ir, ir3_sched_add_deps);
/* At this point, all the dead code should be long gone: */

View File

@@ -810,6 +810,36 @@ lower_ucp_vs(struct ir3_shader_variant *so)
return so->type == last_geom_stage;
}
static bool
output_slot_used_for_binning(gl_varying_slot slot)
{
return slot == VARYING_SLOT_POS || slot == VARYING_SLOT_PSIZ ||
slot == VARYING_SLOT_CLIP_DIST0 || slot == VARYING_SLOT_CLIP_DIST1 ||
slot == VARYING_SLOT_VIEWPORT;
}
static bool
remove_nonbinning_output(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
if (intr->intrinsic != nir_intrinsic_store_output)
return false;
nir_io_semantics io = nir_intrinsic_io_semantics(intr);
if (output_slot_used_for_binning(io.location))
return false;
nir_instr_remove(&intr->instr);
return true;
}
static bool
lower_binning(nir_shader *s)
{
return nir_shader_intrinsics_pass(s, remove_nonbinning_output,
nir_metadata_control_flow, NULL);
}
void
ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
{
@@ -856,6 +886,15 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
}
}
if (so->binning_pass) {
if (OPT(s, lower_binning)) {
progress = true;
/* outputs_written has changed. */
nir_shader_gather_info(s, nir_shader_get_entrypoint(s));
}
}
/* Note that it is intentional to use the VS lowering pass for GS, since we
* lower GS into something that looks more like a VS in ir3_nir_lower_gs():
*/