aco: add struct and helpers for exec potentially empty
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30320>
This commit is contained in:
@@ -54,15 +54,11 @@ struct if_context {
|
||||
Temp cond;
|
||||
|
||||
bool divergent_old;
|
||||
bool exec_potentially_empty_discard_old;
|
||||
bool exec_potentially_empty_break_old;
|
||||
bool exec_potentially_empty_continue_old;
|
||||
bool had_divergent_discard_old;
|
||||
bool had_divergent_discard_then;
|
||||
bool has_divergent_continue_old;
|
||||
bool has_divergent_continue_then;
|
||||
uint16_t exec_potentially_empty_break_depth_old;
|
||||
uint16_t exec_potentially_empty_continue_depth_old;
|
||||
struct exec_info exec_old;
|
||||
|
||||
unsigned BB_if_idx;
|
||||
unsigned invert_idx;
|
||||
@@ -9099,7 +9095,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
bld.pseudo(aco_opcode::p_demote_to_helper, cond);
|
||||
|
||||
if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
|
||||
ctx->cf_info.exec_potentially_empty_discard = true;
|
||||
ctx->cf_info.exec.potentially_empty_discard = true;
|
||||
|
||||
ctx->block->kind |= block_kind_uses_discard;
|
||||
ctx->program->needs_exact = true;
|
||||
@@ -9125,7 +9121,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
bld.pseudo(aco_opcode::p_discard_if, cond);
|
||||
|
||||
if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
|
||||
ctx->cf_info.exec_potentially_empty_discard = true;
|
||||
ctx->cf_info.exec.potentially_empty_discard = true;
|
||||
ctx->cf_info.had_divergent_discard |= in_exec_divergent_or_in_loop(ctx);
|
||||
ctx->block->kind |= block_kind_uses_discard;
|
||||
ctx->program->needs_exact = true;
|
||||
@@ -10251,7 +10247,7 @@ visit_undef(isel_context* ctx, nir_undef_instr* instr)
|
||||
void
|
||||
begin_loop(isel_context* ctx, loop_context* lc)
|
||||
{
|
||||
// TODO: we might want to wrap the loop around a branch if exec_potentially_empty=true
|
||||
// TODO: we might want to wrap the loop around a branch if exec.potentially_empty=true
|
||||
append_logical_end(ctx->block);
|
||||
ctx->block->kind |= block_kind_loop_preheader | block_kind_uniform;
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
@@ -10276,6 +10272,32 @@ begin_loop(isel_context* ctx, loop_context* lc)
|
||||
lc->divergent_if_old = std::exchange(ctx->cf_info.parent_if.is_divergent, false);
|
||||
}
|
||||
|
||||
void
|
||||
update_exec_info(isel_context* ctx)
|
||||
{
|
||||
if (!ctx->block->loop_nest_depth && !ctx->cf_info.parent_if.is_divergent)
|
||||
ctx->cf_info.exec.potentially_empty_discard = false;
|
||||
|
||||
ctx->cf_info.exec.potentially_empty_break &=
|
||||
ctx->block->loop_nest_depth >= ctx->cf_info.exec.potentially_empty_break_depth;
|
||||
ctx->cf_info.exec.potentially_empty_continue &=
|
||||
ctx->block->loop_nest_depth >= ctx->cf_info.exec.potentially_empty_continue_depth;
|
||||
|
||||
if (ctx->block->loop_nest_depth == ctx->cf_info.exec.potentially_empty_break_depth &&
|
||||
!ctx->cf_info.parent_if.is_divergent && !ctx->cf_info.parent_loop.has_divergent_continue) {
|
||||
ctx->cf_info.exec.potentially_empty_break = false;
|
||||
}
|
||||
if (ctx->block->loop_nest_depth == ctx->cf_info.exec.potentially_empty_continue_depth &&
|
||||
!ctx->cf_info.parent_if.is_divergent) {
|
||||
ctx->cf_info.exec.potentially_empty_continue = false;
|
||||
}
|
||||
|
||||
if (!ctx->cf_info.exec.potentially_empty_break)
|
||||
ctx->cf_info.exec.potentially_empty_break_depth = UINT16_MAX;
|
||||
if (!ctx->cf_info.exec.potentially_empty_continue)
|
||||
ctx->cf_info.exec.potentially_empty_continue_depth = UINT16_MAX;
|
||||
}
|
||||
|
||||
void
|
||||
end_loop(isel_context* ctx, loop_context* lc)
|
||||
{
|
||||
@@ -10286,14 +10308,14 @@ end_loop(isel_context* ctx, loop_context* lc)
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
append_logical_end(ctx->block);
|
||||
|
||||
/* No need to check exec_potentially_empty_break/continue originating inside the loop. In the
|
||||
/* No need to check exec.potentially_empty_break/continue originating inside the loop. In the
|
||||
* only case where it's possible at this point (divergent break after divergent continue), we
|
||||
* should continue anyway. */
|
||||
if (ctx->cf_info.exec_potentially_empty_discard ||
|
||||
(ctx->cf_info.exec_potentially_empty_break &&
|
||||
ctx->cf_info.exec_potentially_empty_break_depth < ctx->block->loop_nest_depth) ||
|
||||
(ctx->cf_info.exec_potentially_empty_continue &&
|
||||
ctx->cf_info.exec_potentially_empty_continue_depth < ctx->block->loop_nest_depth)) {
|
||||
if (ctx->cf_info.exec.potentially_empty_discard ||
|
||||
(ctx->cf_info.exec.potentially_empty_break &&
|
||||
ctx->cf_info.exec.potentially_empty_break_depth < ctx->block->loop_nest_depth) ||
|
||||
(ctx->cf_info.exec.potentially_empty_continue &&
|
||||
ctx->cf_info.exec.potentially_empty_continue_depth < ctx->block->loop_nest_depth)) {
|
||||
/* Discards can result in code running with an empty exec mask.
|
||||
* This would result in divergent breaks not ever being taken. As a
|
||||
* workaround, break the loop when the loop mask is empty instead of
|
||||
@@ -10343,12 +10365,7 @@ end_loop(isel_context* ctx, loop_context* lc)
|
||||
ctx->cf_info.parent_loop.has_divergent_continue = lc->divergent_cont_old;
|
||||
ctx->cf_info.parent_loop.has_divergent_branch = lc->divergent_branch_old;
|
||||
ctx->cf_info.parent_if.is_divergent = lc->divergent_if_old;
|
||||
if (!ctx->block->loop_nest_depth && !ctx->cf_info.parent_if.is_divergent)
|
||||
ctx->cf_info.exec_potentially_empty_discard = false;
|
||||
if (ctx->block->loop_nest_depth < ctx->cf_info.exec_potentially_empty_break_depth) {
|
||||
ctx->cf_info.exec_potentially_empty_break = false;
|
||||
ctx->cf_info.exec_potentially_empty_break_depth = UINT16_MAX;
|
||||
}
|
||||
update_exec_info(ctx);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -10375,9 +10392,9 @@ emit_loop_jump(isel_context* ctx, bool is_break)
|
||||
}
|
||||
ctx->cf_info.parent_loop.has_divergent_branch = true;
|
||||
|
||||
if (!ctx->cf_info.exec_potentially_empty_break) {
|
||||
ctx->cf_info.exec_potentially_empty_break = true;
|
||||
ctx->cf_info.exec_potentially_empty_break_depth = ctx->block->loop_nest_depth;
|
||||
if (!ctx->cf_info.exec.potentially_empty_break) {
|
||||
ctx->cf_info.exec.potentially_empty_break = true;
|
||||
ctx->cf_info.exec.potentially_empty_break_depth = ctx->block->loop_nest_depth;
|
||||
}
|
||||
} else {
|
||||
logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx];
|
||||
@@ -10398,9 +10415,9 @@ emit_loop_jump(isel_context* ctx, bool is_break)
|
||||
ctx->cf_info.parent_loop.has_divergent_continue = true;
|
||||
ctx->cf_info.parent_loop.has_divergent_branch = true;
|
||||
|
||||
if (!ctx->cf_info.exec_potentially_empty_continue) {
|
||||
ctx->cf_info.exec_potentially_empty_continue = true;
|
||||
ctx->cf_info.exec_potentially_empty_continue_depth = ctx->block->loop_nest_depth;
|
||||
if (!ctx->cf_info.exec.potentially_empty_continue) {
|
||||
ctx->cf_info.exec.potentially_empty_continue = true;
|
||||
ctx->cf_info.exec.potentially_empty_continue_depth = ctx->block->loop_nest_depth;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10610,22 +10627,13 @@ begin_divergent_if_then(isel_context* ctx, if_context* ic, Temp cond,
|
||||
ic->BB_endif = Block();
|
||||
ic->BB_endif.kind |= (block_kind_merge | (ctx->block->kind & block_kind_top_level));
|
||||
|
||||
ic->exec_potentially_empty_discard_old = ctx->cf_info.exec_potentially_empty_discard;
|
||||
ic->exec_potentially_empty_break_old = ctx->cf_info.exec_potentially_empty_break;
|
||||
ic->exec_potentially_empty_continue_old = ctx->cf_info.exec_potentially_empty_continue;
|
||||
ic->exec_potentially_empty_break_depth_old = ctx->cf_info.exec_potentially_empty_break_depth;
|
||||
ic->exec_potentially_empty_continue_depth_old =
|
||||
ctx->cf_info.exec_potentially_empty_continue_depth;
|
||||
ic->exec_old = ctx->cf_info.exec;
|
||||
ic->divergent_old = ctx->cf_info.parent_if.is_divergent;
|
||||
ic->had_divergent_discard_old = ctx->cf_info.had_divergent_discard;
|
||||
ctx->cf_info.parent_if.is_divergent = true;
|
||||
|
||||
/* divergent branches use cbranch_execz */
|
||||
ctx->cf_info.exec_potentially_empty_discard = false;
|
||||
ctx->cf_info.exec_potentially_empty_break = false;
|
||||
ctx->cf_info.exec_potentially_empty_break_depth = UINT16_MAX;
|
||||
ctx->cf_info.exec_potentially_empty_continue = false;
|
||||
ctx->cf_info.exec_potentially_empty_continue_depth = UINT16_MAX;
|
||||
ctx->cf_info.exec = exec_info();
|
||||
|
||||
/** emit logical then block */
|
||||
ctx->program->next_divergent_if_logical_depth++;
|
||||
@@ -10676,20 +10684,9 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic,
|
||||
sel_ctrl == nir_selection_control_divergent_always_taken;
|
||||
ctx->block->instructions.push_back(std::move(branch));
|
||||
|
||||
ic->exec_potentially_empty_discard_old |= ctx->cf_info.exec_potentially_empty_discard;
|
||||
ic->exec_potentially_empty_break_old |= ctx->cf_info.exec_potentially_empty_break;
|
||||
ic->exec_potentially_empty_break_depth_old = std::min(
|
||||
ic->exec_potentially_empty_break_depth_old, ctx->cf_info.exec_potentially_empty_break_depth);
|
||||
ic->exec_potentially_empty_continue_old |= ctx->cf_info.exec_potentially_empty_continue;
|
||||
ic->exec_potentially_empty_continue_depth_old =
|
||||
std::min(ic->exec_potentially_empty_continue_depth_old,
|
||||
ctx->cf_info.exec_potentially_empty_continue_depth);
|
||||
ic->exec_old.combine(ctx->cf_info.exec);
|
||||
/* divergent branches use cbranch_execz */
|
||||
ctx->cf_info.exec_potentially_empty_discard = false;
|
||||
ctx->cf_info.exec_potentially_empty_break = false;
|
||||
ctx->cf_info.exec_potentially_empty_break_depth = UINT16_MAX;
|
||||
ctx->cf_info.exec_potentially_empty_continue = false;
|
||||
ctx->cf_info.exec_potentially_empty_continue_depth = UINT16_MAX;
|
||||
ctx->cf_info.exec = exec_info();
|
||||
|
||||
ic->had_divergent_discard_then = ctx->cf_info.had_divergent_discard;
|
||||
ctx->cf_info.had_divergent_discard = ic->had_divergent_discard_old;
|
||||
@@ -10739,32 +10736,8 @@ end_divergent_if(isel_context* ctx, if_context* ic)
|
||||
append_logical_start(ctx->block);
|
||||
|
||||
ctx->cf_info.parent_if.is_divergent = ic->divergent_old;
|
||||
ctx->cf_info.exec_potentially_empty_discard |= ic->exec_potentially_empty_discard_old;
|
||||
ctx->cf_info.exec_potentially_empty_break |= ic->exec_potentially_empty_break_old;
|
||||
ctx->cf_info.exec_potentially_empty_break_depth = std::min(
|
||||
ic->exec_potentially_empty_break_depth_old, ctx->cf_info.exec_potentially_empty_break_depth);
|
||||
ctx->cf_info.exec_potentially_empty_continue |= ic->exec_potentially_empty_continue_old;
|
||||
ctx->cf_info.exec_potentially_empty_continue_depth =
|
||||
std::min(ic->exec_potentially_empty_continue_depth_old,
|
||||
ctx->cf_info.exec_potentially_empty_continue_depth);
|
||||
if (ctx->block->loop_nest_depth == ctx->cf_info.exec_potentially_empty_break_depth &&
|
||||
!ctx->cf_info.parent_if.is_divergent && !ctx->cf_info.parent_loop.has_divergent_continue) {
|
||||
ctx->cf_info.exec_potentially_empty_break = false;
|
||||
ctx->cf_info.exec_potentially_empty_break_depth = UINT16_MAX;
|
||||
}
|
||||
if (ctx->block->loop_nest_depth == ctx->cf_info.exec_potentially_empty_continue_depth &&
|
||||
!ctx->cf_info.parent_if.is_divergent) {
|
||||
ctx->cf_info.exec_potentially_empty_continue = false;
|
||||
ctx->cf_info.exec_potentially_empty_continue_depth = UINT16_MAX;
|
||||
}
|
||||
/* uniform control flow never has an empty exec-mask */
|
||||
if (!ctx->block->loop_nest_depth && !ctx->cf_info.parent_if.is_divergent) {
|
||||
ctx->cf_info.exec_potentially_empty_discard = false;
|
||||
ctx->cf_info.exec_potentially_empty_break = false;
|
||||
ctx->cf_info.exec_potentially_empty_break_depth = UINT16_MAX;
|
||||
ctx->cf_info.exec_potentially_empty_continue = false;
|
||||
ctx->cf_info.exec_potentially_empty_continue_depth = UINT16_MAX;
|
||||
}
|
||||
ctx->cf_info.exec.combine(ic->exec_old);
|
||||
update_exec_info(ctx);
|
||||
ctx->cf_info.had_divergent_discard |= ic->had_divergent_discard_then;
|
||||
|
||||
/* We shouldn't create unreachable blocks. */
|
||||
|
@@ -35,6 +35,31 @@ struct shader_io_state {
|
||||
}
|
||||
};
|
||||
|
||||
struct exec_info {
|
||||
/* Set to false when loop_nest_depth==0 && parent_if.is_divergent==false */
|
||||
bool potentially_empty_discard = false;
|
||||
uint16_t potentially_empty_break_depth = UINT16_MAX;
|
||||
/* Set to false when loop_nest_depth==exec_potentially_empty_break_depth,
|
||||
* parent_if.is_divergent==false and parent_loop.has_divergent_continue==false. Also set to
|
||||
* false if loop_nest_depth<exec_potentially_empty_break_depth. */
|
||||
bool potentially_empty_break = false;
|
||||
uint16_t potentially_empty_continue_depth = UINT16_MAX;
|
||||
/* Set to false when loop_nest_depth==exec_potentially_empty_break_depth
|
||||
* and parent_if.is_divergent==false. */
|
||||
bool potentially_empty_continue = false;
|
||||
|
||||
void combine(struct exec_info& other)
|
||||
{
|
||||
potentially_empty_discard |= other.potentially_empty_discard;
|
||||
potentially_empty_break_depth =
|
||||
std::min(potentially_empty_break_depth, other.potentially_empty_break_depth);
|
||||
potentially_empty_break |= other.potentially_empty_break;
|
||||
potentially_empty_continue_depth =
|
||||
std::min(potentially_empty_continue_depth, other.potentially_empty_continue_depth);
|
||||
potentially_empty_continue |= other.potentially_empty_continue;
|
||||
}
|
||||
};
|
||||
|
||||
struct isel_context {
|
||||
const struct aco_compiler_options* options;
|
||||
const struct ac_shader_args* args;
|
||||
@@ -58,17 +83,8 @@ struct isel_context {
|
||||
bool is_divergent = false;
|
||||
} parent_if;
|
||||
bool had_divergent_discard = false;
|
||||
bool exec_potentially_empty_discard =
|
||||
false; /* set to false when loop_nest_depth==0 && parent_if.is_divergent==false */
|
||||
uint16_t exec_potentially_empty_break_depth = UINT16_MAX;
|
||||
/* Set to false when loop_nest_depth==exec_potentially_empty_break_depth,
|
||||
* parent_if.is_divergent==false and parent_loop.has_divergent_continue==false. Also set to
|
||||
* false if loop_nest_depth<exec_potentially_empty_break_depth */
|
||||
bool exec_potentially_empty_break = false;
|
||||
uint16_t exec_potentially_empty_continue_depth = UINT16_MAX;
|
||||
/* Set to false when loop_nest_depth==exec_potentially_empty_break_depth
|
||||
* and parent_if.is_divergent==false. */
|
||||
bool exec_potentially_empty_continue = false;
|
||||
|
||||
struct exec_info exec;
|
||||
} cf_info;
|
||||
|
||||
/* NIR range analysis. */
|
||||
|
Reference in New Issue
Block a user