agx: Optimize out pointless else instructions

Now that they're in the right blocks, this is easy. Includes an informal proof
and the implementation itself is built around a finite state machine, which
together meant this code worked on its first try :~)

And hey, it's a pointless little instruction saving optimization I've wanted to
do for a while~

Major note is that this HAS to be done after register allocation, since it
doesn't update the control flow graph and would introduce critical edges
if it tried to actually deleted the else block. The intuitive reason for this is
simple: sometimes RA needs to insert instructions into the else block, even if
it was empty in the original NIR, so we always need an else block even if we can
delete it with this pass after RA.

   total instructions in shared programs: 1778390 -> 1776725 (-0.09%)
   instructions in affected programs: 268459 -> 266794 (-0.62%)
   helped: 1013
   HURT: 0
   Instructions are helped.

   total bytes in shared programs: 12185102 -> 12175112 (-0.08%)
   bytes in affected programs: 1927524 -> 1917534 (-0.52%)
   helped: 1013
   HURT: 0
   Bytes are helped.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24635>
This commit is contained in:
Alyssa Rosenzweig
2023-07-29 21:36:07 -04:00
committed by Marge Bot
parent 782055106f
commit e83b708676
4 changed files with 111 additions and 0 deletions

View File

@@ -2509,6 +2509,7 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
agx_lower_pseudo(ctx); agx_lower_pseudo(ctx);
agx_insert_waits(ctx); agx_insert_waits(ctx);
agx_opt_empty_else(ctx);
if (agx_should_dump(nir, AGX_DBG_SHADERS)) if (agx_should_dump(nir, AGX_DBG_SHADERS))
agx_print_shader(ctx, stdout); agx_print_shader(ctx, stdout);

View File

@@ -768,6 +768,7 @@ void agx_dce(agx_context *ctx, bool partial);
void agx_ra(agx_context *ctx); void agx_ra(agx_context *ctx);
void agx_lower_64bit_postra(agx_context *ctx); void agx_lower_64bit_postra(agx_context *ctx);
void agx_insert_waits(agx_context *ctx); void agx_insert_waits(agx_context *ctx);
void agx_opt_empty_else(agx_context *ctx);
void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission); void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission);
#ifndef NDEBUG #ifndef NDEBUG

View File

@@ -0,0 +1,108 @@
/*
* Copyright 2023 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "util/list.h"
#include "agx_builder.h"
#include "agx_compiler.h"
#include "agx_opcodes.h"
/*
* Detect blocks with the sole contents:
*
* else n=1
* logical_end
* pop_exec n=1
*
* The else instruction is a no-op. To see that, consider the pseudocode for the
* sequence of operations "else n=1; pop_exec n=1":
*
* # else n=1
* if r0l == 0:
* r0l = 1
* elif r0l == 1:
* if [...]:
* r0l = 0
* else:
* r0l = 1
* exec_mask[thread] = (r0l == 0)
*
* # pop_exec n=1
* if r0l > 0:
* r0l -= 1
* exec_mask[thread] = (r0l == 0)
*
* That logic code simplifies to:
*
* if r0l > 0:
* r0l = r0l - 1
* exec_mask[thread] = (r0l == 0)
*
* which is just "pop_exec n=1".
*
* Therefore, this pass detects these blocks and deletes the else instruction.
* This has the effect of removing empty else blocks. Logically, that creates
* critical edges, so this pass can only run late (post-RA).
*
* The pass itself uses a simple state machine for pattern matching.
*/
enum block_state {
STATE_ELSE = 0,
STATE_LOGICAL_END,
STATE_POP_EXEC,
STATE_DONE,
/* Must be last */
STATE_NONE,
};
static enum block_state
state_for_instr(const agx_instr *I)
{
switch (I->op) {
case AGX_OPCODE_ELSE_ICMP:
case AGX_OPCODE_ELSE_FCMP:
return (I->nest == 1) ? STATE_ELSE : STATE_NONE;
case AGX_OPCODE_LOGICAL_END:
return STATE_LOGICAL_END;
case AGX_OPCODE_POP_EXEC:
return (I->nest == 1) ? STATE_POP_EXEC : STATE_NONE;
default:
return STATE_NONE;
}
}
static bool
match_block(agx_block *blk)
{
enum block_state state = STATE_ELSE;
agx_foreach_instr_in_block(blk, I) {
if (state_for_instr(I) == state)
state++;
else
return false;
}
return (state == STATE_DONE);
}
void
agx_opt_empty_else(agx_context *ctx)
{
agx_foreach_block(ctx, blk) {
if (match_block(blk)) {
agx_instr *else_instr =
list_first_entry(&blk->instructions, agx_instr, link);
assert(state_for_instr(else_instr) == STATE_ELSE && "block matched");
agx_remove_instruction(else_instr);
}
}
}

View File

@@ -27,6 +27,7 @@ libasahi_agx_files = files(
'agx_print.c', 'agx_print.c',
'agx_ir.c', 'agx_ir.c',
'agx_opt_cse.c', 'agx_opt_cse.c',
'agx_opt_empty_else.c',
'agx_optimizer.c', 'agx_optimizer.c',
'agx_register_allocate.c', 'agx_register_allocate.c',
'agx_validate.c', 'agx_validate.c',