nir/opt_preamble: Move phis for movable if's
Add infrastructure to reconstruct if's. Later in the series, this will let us hoist loads from inside uniform if's without speculating. For now, it lets us handle phi's in nir_opt_preamble in a straightforward way. Results on AGX are good: total instructions in shared programs: 1504730 -> 1499674 (-0.34%) instructions in affected programs: 153673 -> 148617 (-3.29%) helped: 496 HURT: 0 Instructions are helped. total bytes in shared programs: 10287768 -> 10238284 (-0.48%) bytes in affected programs: 1113724 -> 1064240 (-4.44%) helped: 496 HURT: 0 Bytes are helped. total halfregs in shared programs: 452669 -> 452049 (-0.14%) halfregs in affected programs: 14825 -> 14205 (-4.18%) helped: 152 HURT: 99 Halfregs are helped. total threads in shared programs: 16469504 -> 16470784 (<.01%) threads in affected programs: 8960 -> 10240 (14.29%) helped: 10 HURT: 0 Threads are helped. Results on ir3 is a bit more of a wash but still should be a win overall: The regression in moves seems scary, but the cost model already accounts for them as evidenced by instruction count coming out ahead. total instructions in shared programs: 3108750 -> 3105993 (-0.09%) instructions in affected programs: 317367 -> 314610 (-0.87%) helped: 675 HURT: 242 Instructions are helped. total nops in shared programs: 673152 -> 675048 (0.28%) nops in affected programs: 74551 -> 76447 (2.54%) helped: 353 HURT: 347 Inconclusive result (%-change mean confidence interval includes 0). total non-nops in shared programs: 2435598 -> 2430945 (-0.19%) non-nops in affected programs: 232664 -> 228011 (-2.00%) helped: 816 HURT: 38 Non-nops are helped. total mov in shared programs: 78201 -> 84011 (7.43%) mov in affected programs: 10726 -> 16536 (54.17%) helped: 60 HURT: 781 Mov are HURT. total cov in shared programs: 74964 -> 74906 (-0.08%) cov in affected programs: 273 -> 215 (-21.25%) helped: 17 HURT: 0 Cov are helped. total dwords in shared programs: 6716814 -> 6748726 (0.48%) dwords in affected programs: 879778 -> 911690 (3.63%) helped: 12 HURT: 948 Dwords are HURT. total full in shared programs: 193210 -> 193212 (<.01%) full in affected programs: 278 -> 280 (0.72%) helped: 12 HURT: 22 Inconclusive result (value mean confidence interval includes 0). total constlen in shared programs: 493632 -> 494816 (0.24%) constlen in affected programs: 19904 -> 21088 (5.95%) helped: 9 HURT: 306 Constlen are HURT. total cat0 in shared programs: 742476 -> 745046 (0.35%) cat0 in affected programs: 84455 -> 87025 (3.04%) helped: 277 HURT: 489 Cat0 are HURT. total cat1 in shared programs: 153303 -> 159059 (3.75%) cat1 in affected programs: 17810 -> 23566 (32.32%) helped: 69 HURT: 780 Cat1 are HURT. total cat2 in shared programs: 1144508 -> 1140731 (-0.33%) cat2 in affected programs: 121284 -> 117507 (-3.11%) helped: 841 HURT: 0 Cat2 are helped. total cat3 in shared programs: 942098 -> 934804 (-0.77%) cat3 in affected programs: 87140 -> 79846 (-8.37%) helped: 855 HURT: 1 Cat3 are helped. total cat4 in shared programs: 65261 -> 65249 (-0.02%) cat4 in affected programs: 42 -> 30 (-28.57%) helped: 12 HURT: 0 Cat4 are helped. total sstall in shared programs: 237311 -> 241281 (1.67%) sstall in affected programs: 33755 -> 37725 (11.76%) helped: 179 HURT: 493 Sstall are HURT. total (ss) in shared programs: 58166 -> 58795 (1.08%) (ss) in affected programs: 4535 -> 5164 (13.87%) helped: 35 HURT: 664 (ss) are HURT. total systall in shared programs: 503784 -> 503805 (<.01%) systall in affected programs: 3170 -> 3191 (0.66%) helped: 16 HURT: 13 Inconclusive result (value mean confidence interval includes 0). total (sy) in shared programs: 27261 -> 27259 (<.01%) (sy) in affected programs: 76 -> 74 (-2.63%) helped: 8 HURT: 5 Inconclusive result (value mean confidence interval includes 0). total waves in shared programs: 439848 -> 439872 (<.01%) waves in affected programs: 160 -> 184 (15.00%) helped: 12 HURT: 0 Waves are helped. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Connor Abbott <cwabbott0@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24011>
This commit is contained in:

committed by
Marge Bot

parent
802fb8f7f3
commit
8d037d943d
@@ -21,6 +21,7 @@
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "util/set.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
@@ -74,6 +75,24 @@ typedef struct {
|
||||
/* Per-definition array of states */
|
||||
def_state *states;
|
||||
|
||||
/* Number of levels of non-uniform control flow we're in. We don't
|
||||
* reconstruct loops, so loops count as non-uniform conservatively. If-else
|
||||
* is counted if the condition is not marked can_move.
|
||||
*/
|
||||
unsigned nonuniform_cf_nesting;
|
||||
|
||||
/* Set of nir_if's that must be reconstructed in the preamble. Note an if may
|
||||
* need reconstruction even when not entirely moved. This does not account
|
||||
* for nesting: the parent CF nodes of ifs in this set must be reconstructed
|
||||
* but may not be in this set, even if the parent is another if.
|
||||
*/
|
||||
struct set *reconstructed_ifs;
|
||||
|
||||
/* Set of definitions that must be reconstructed in the preamble. This is a
|
||||
* subset of can_move instructions, determined after replacement.
|
||||
*/
|
||||
BITSET_WORD *reconstructed_defs;
|
||||
|
||||
nir_def *def;
|
||||
|
||||
const nir_opt_preamble_options *options;
|
||||
@@ -261,11 +280,25 @@ can_move_instr(nir_instr *instr, opt_preamble_ctx *ctx)
|
||||
}
|
||||
}
|
||||
|
||||
case nir_instr_type_phi:
|
||||
/* TODO: we could move an if-statement if everything inside it is
|
||||
* moveable.
|
||||
*/
|
||||
return false;
|
||||
/* We can only move phis if all of their sources are movable, and it is a phi
|
||||
* for an if-else that is itself movable.
|
||||
*/
|
||||
case nir_instr_type_phi: {
|
||||
nir_cf_node *prev_node = nir_cf_node_prev(&instr->block->cf_node);
|
||||
if (!prev_node)
|
||||
return false;
|
||||
|
||||
if (prev_node->type != nir_cf_node_if) {
|
||||
assert(prev_node->type == nir_cf_node_loop);
|
||||
return false;
|
||||
}
|
||||
|
||||
nir_if *nif = nir_cf_node_as_if(prev_node);
|
||||
if (!can_move_src(&nif->condition, ctx))
|
||||
return false;
|
||||
|
||||
return can_move_srcs(instr, ctx);
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
@@ -332,9 +365,11 @@ candidate_sort(const void *data1, const void *data2)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
static bool
|
||||
calculate_can_move_for_block(opt_preamble_ctx *ctx, nir_block *block)
|
||||
{
|
||||
bool all_can_move = true;
|
||||
|
||||
nir_foreach_instr(instr, block) {
|
||||
nir_def *def = nir_instr_def(instr);
|
||||
if (!def)
|
||||
@@ -342,29 +377,53 @@ calculate_can_move_for_block(opt_preamble_ctx *ctx, nir_block *block)
|
||||
|
||||
def_state *state = &ctx->states[def->index];
|
||||
state->can_move = can_move_instr(instr, ctx);
|
||||
all_can_move &= state->can_move;
|
||||
}
|
||||
|
||||
return all_can_move;
|
||||
}
|
||||
|
||||
static void
|
||||
static bool
|
||||
calculate_can_move_for_cf_list(opt_preamble_ctx *ctx, struct exec_list *list)
|
||||
{
|
||||
bool all_can_move = true;
|
||||
|
||||
foreach_list_typed(nir_cf_node, node, node, list) {
|
||||
switch (node->type) {
|
||||
case nir_cf_node_block: {
|
||||
calculate_can_move_for_block(ctx, nir_cf_node_as_block(node));
|
||||
case nir_cf_node_block:
|
||||
all_can_move &=
|
||||
calculate_can_move_for_block(ctx, nir_cf_node_as_block(node));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_cf_node_if: {
|
||||
nir_if *nif = nir_cf_node_as_if(node);
|
||||
calculate_can_move_for_cf_list(ctx, &nif->then_list);
|
||||
calculate_can_move_for_cf_list(ctx, &nif->else_list);
|
||||
bool uniform = can_move_src(&nif->condition, ctx);
|
||||
|
||||
if (!uniform)
|
||||
ctx->nonuniform_cf_nesting++;
|
||||
|
||||
bool if_can_move = uniform;
|
||||
if_can_move &= calculate_can_move_for_cf_list(ctx, &nif->then_list);
|
||||
if_can_move &= calculate_can_move_for_cf_list(ctx, &nif->else_list);
|
||||
|
||||
if (!uniform)
|
||||
ctx->nonuniform_cf_nesting--;
|
||||
|
||||
all_can_move &= if_can_move;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_cf_node_loop: {
|
||||
nir_loop *loop = nir_cf_node_as_loop(node);
|
||||
|
||||
/* Conservatively treat loops like conditional control flow, since an
|
||||
* instruction might be conditionally unreachabled due to an earlier
|
||||
* break in a loop that executes only one iteration.
|
||||
*/
|
||||
ctx->nonuniform_cf_nesting++;
|
||||
calculate_can_move_for_cf_list(ctx, &loop->body);
|
||||
ctx->nonuniform_cf_nesting--;
|
||||
all_can_move = false;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -372,6 +431,229 @@ calculate_can_move_for_cf_list(opt_preamble_ctx *ctx, struct exec_list *list)
|
||||
unreachable("Unexpected CF node type");
|
||||
}
|
||||
}
|
||||
|
||||
return all_can_move;
|
||||
}
|
||||
|
||||
static void
|
||||
replace_for_block(nir_builder *b, opt_preamble_ctx *ctx,
|
||||
struct hash_table *remap_table, nir_block *block)
|
||||
{
|
||||
nir_foreach_instr(instr, block) {
|
||||
nir_def *def = nir_instr_def(instr);
|
||||
if (!def)
|
||||
continue;
|
||||
|
||||
/* Only replace what we actually need. This is a micro-optimization for
|
||||
* compile-time performance of regular instructions, but it's required for
|
||||
* correctness with phi nodes, since we might not reconstruct the
|
||||
* corresponding if.
|
||||
*/
|
||||
if (!BITSET_TEST(ctx->reconstructed_defs, def->index))
|
||||
continue;
|
||||
|
||||
def_state *state = &ctx->states[def->index];
|
||||
assert(state->can_move && "reconstructed => can_move");
|
||||
|
||||
nir_instr *clone;
|
||||
|
||||
if (instr->type == nir_instr_type_phi) {
|
||||
/* Phis are special since they can't be cloned with nir_instr_clone */
|
||||
nir_phi_instr *phi = nir_instr_as_phi(instr);
|
||||
|
||||
nir_cf_node *nif_cf = nir_cf_node_prev(&block->cf_node);
|
||||
assert(nif_cf->type == nir_cf_node_if && "only if's are moveable");
|
||||
nir_if *nif = nir_cf_node_as_if(nif_cf);
|
||||
|
||||
nir_block *then_block = nir_if_last_then_block(nif);
|
||||
nir_block *else_block = nir_if_last_else_block(nif);
|
||||
|
||||
nir_def *then_def = NULL, *else_def = NULL;
|
||||
|
||||
nir_foreach_phi_src(phi_src, phi) {
|
||||
if (phi_src->pred == then_block) {
|
||||
assert(then_def == NULL);
|
||||
then_def = phi_src->src.ssa;
|
||||
} else if (phi_src->pred == else_block) {
|
||||
assert(else_def == NULL);
|
||||
else_def = phi_src->src.ssa;
|
||||
} else {
|
||||
unreachable("Invalid predecessor for phi of if");
|
||||
}
|
||||
}
|
||||
|
||||
assert(exec_list_length(&phi->srcs) == 2 && "only if's are movable");
|
||||
assert(then_def && else_def && "all sources seen");
|
||||
|
||||
/* Remap */
|
||||
then_def = _mesa_hash_table_search(remap_table, then_def)->data;
|
||||
else_def = _mesa_hash_table_search(remap_table, else_def)->data;
|
||||
|
||||
b->cursor =
|
||||
nir_before_block_after_phis(nir_cursor_current_block(b->cursor));
|
||||
|
||||
nir_def *repl = nir_if_phi(b, then_def, else_def);
|
||||
clone = repl->parent_instr;
|
||||
|
||||
_mesa_hash_table_insert(remap_table, &phi->def, repl);
|
||||
} else {
|
||||
clone = nir_instr_clone_deep(b->shader, instr, remap_table);
|
||||
nir_builder_instr_insert(b, clone);
|
||||
}
|
||||
|
||||
if (clone->type == nir_instr_type_tex) {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(clone);
|
||||
if (tex->op == nir_texop_tex) {
|
||||
/* For maximum compatibility, replace normal textures with
|
||||
* textureGrad with a gradient of 0.
|
||||
* TODO: Handle txb somehow.
|
||||
*/
|
||||
b->cursor = nir_before_instr(clone);
|
||||
|
||||
nir_def *zero =
|
||||
nir_imm_zero(b, tex->coord_components - tex->is_array, 32);
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_ddx, zero);
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_ddy, zero);
|
||||
tex->op = nir_texop_txd;
|
||||
|
||||
b->cursor = nir_after_instr(clone);
|
||||
}
|
||||
}
|
||||
|
||||
if (state->replace) {
|
||||
nir_def *clone_def = nir_instr_def(clone);
|
||||
nir_store_preamble(b, clone_def, .base = state->offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
replace_for_cf_list(nir_builder *b, opt_preamble_ctx *ctx,
|
||||
struct hash_table *remap_table, struct exec_list *list)
|
||||
{
|
||||
foreach_list_typed(nir_cf_node, node, node, list) {
|
||||
switch (node->type) {
|
||||
case nir_cf_node_block: {
|
||||
replace_for_block(b, ctx, remap_table, nir_cf_node_as_block(node));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_cf_node_if: {
|
||||
nir_if *nif = nir_cf_node_as_if(node);
|
||||
|
||||
/* If we moved something that requires reconstructing the if, do so */
|
||||
if (_mesa_set_search(ctx->reconstructed_ifs, nif)) {
|
||||
assert(can_move_src(&nif->condition, ctx));
|
||||
|
||||
struct hash_entry *entry =
|
||||
_mesa_hash_table_search(remap_table, nif->condition.ssa);
|
||||
assert(entry != NULL && "can_move condition, def dominates use");
|
||||
nir_def *remap_cond = entry->data;
|
||||
|
||||
nir_if *reconstructed_nif = NULL;
|
||||
reconstructed_nif = nir_push_if(b, remap_cond);
|
||||
|
||||
b->cursor = nir_before_cf_list(&reconstructed_nif->then_list);
|
||||
replace_for_cf_list(b, ctx, remap_table, &nif->then_list);
|
||||
|
||||
b->cursor = nir_before_cf_list(&reconstructed_nif->else_list);
|
||||
replace_for_cf_list(b, ctx, remap_table, &nif->else_list);
|
||||
|
||||
nir_pop_if(b, reconstructed_nif);
|
||||
b->cursor = nir_after_cf_node(&reconstructed_nif->cf_node);
|
||||
} else {
|
||||
replace_for_cf_list(b, ctx, remap_table, &nif->then_list);
|
||||
replace_for_cf_list(b, ctx, remap_table, &nif->else_list);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_cf_node_loop: {
|
||||
/* We don't try to reconstruct loops */
|
||||
nir_loop *loop = nir_cf_node_as_loop(node);
|
||||
replace_for_cf_list(b, ctx, remap_table, &loop->body);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Unexpected CF node type");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
mark_reconstructed(nir_src *src, void *state)
|
||||
{
|
||||
BITSET_WORD *reconstructed_defs = state;
|
||||
BITSET_SET(reconstructed_defs, src->ssa->index);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a phi is moved into the preamble, then the if it depends on must also be
|
||||
* moved. However, it is not necessary to consider any nested control flow. As
|
||||
* an example, if we have a shader:
|
||||
*
|
||||
* if (not moveable condition) {
|
||||
* if (moveable condition) {
|
||||
* x = moveable
|
||||
* }
|
||||
* y = phi x, moveable
|
||||
* z = floor y
|
||||
* }
|
||||
*
|
||||
* Then if 'z' is in the replace set, we need to reconstruct the inner if, but
|
||||
* not the outer if, unless there's also speculation to worry about.
|
||||
*
|
||||
* We do this by marking defs that need to be reconstructed, with a backwards
|
||||
* sweep of the program (compatible with reverse dominance), and marking the
|
||||
* if's preceding reconstructed phis.
|
||||
*/
|
||||
static void
|
||||
analyze_reconstructed(opt_preamble_ctx *ctx, nir_function_impl *impl)
|
||||
{
|
||||
nir_foreach_block_reverse(block, impl) {
|
||||
/* If an if-statement is reconstructed, its condition must be as well */
|
||||
nir_if *nif = nir_block_get_following_if(block);
|
||||
if (nif && _mesa_set_search(ctx->reconstructed_ifs, nif))
|
||||
BITSET_SET(ctx->reconstructed_defs, nif->condition.ssa->index);
|
||||
|
||||
nir_foreach_instr_reverse(instr, block) {
|
||||
nir_def *def = nir_instr_def(instr);
|
||||
if (!def)
|
||||
continue;
|
||||
|
||||
def_state *state = &ctx->states[def->index];
|
||||
|
||||
/* Anything that's replaced must be reconstructed */
|
||||
if (state->replace)
|
||||
BITSET_SET(ctx->reconstructed_defs, def->index);
|
||||
else if (!BITSET_TEST(ctx->reconstructed_defs, def->index))
|
||||
continue;
|
||||
|
||||
/* If it must be reconstructed, it better be moveable */
|
||||
assert(state->can_move);
|
||||
|
||||
/* Anything that depends on something reconstructed is reconstructed */
|
||||
nir_foreach_src(instr, mark_reconstructed, ctx->reconstructed_defs);
|
||||
|
||||
/* Reconstructed phis need their ifs reconstructed */
|
||||
if (instr->type == nir_instr_type_phi) {
|
||||
nir_cf_node *prev_node = nir_cf_node_prev(&instr->block->cf_node);
|
||||
|
||||
/* Invariants guaranteed by can_move_instr */
|
||||
assert(prev_node != NULL);
|
||||
assert(prev_node->type == nir_cf_node_if);
|
||||
|
||||
nir_if *nif = nir_cf_node_as_if(prev_node);
|
||||
assert(can_move_src(&nif->condition, ctx));
|
||||
|
||||
/* Mark the if for reconstruction */
|
||||
_mesa_set_add(ctx->reconstructed_ifs, nif);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -541,6 +823,14 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
|
||||
|
||||
free(candidates);
|
||||
|
||||
/* Determine which if's need to be reconstructed, based on the replacements
|
||||
* we did.
|
||||
*/
|
||||
ctx.reconstructed_ifs = _mesa_pointer_set_create(NULL);
|
||||
ctx.reconstructed_defs = calloc(BITSET_WORDS(impl->ssa_alloc),
|
||||
sizeof(BITSET_WORD));
|
||||
analyze_reconstructed(&ctx, impl);
|
||||
|
||||
/* Step 5: Actually do the replacement. */
|
||||
struct hash_table *remap_table =
|
||||
_mesa_pointer_hash_table_create(NULL);
|
||||
@@ -549,46 +839,7 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
|
||||
nir_builder preamble_builder = nir_builder_at(nir_before_impl(preamble));
|
||||
nir_builder *b = &preamble_builder;
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
nir_def *def = nir_instr_def(instr);
|
||||
if (!def)
|
||||
continue;
|
||||
|
||||
def_state *state = &ctx.states[def->index];
|
||||
if (!state->can_move)
|
||||
continue;
|
||||
|
||||
nir_instr *clone = nir_instr_clone_deep(impl->function->shader,
|
||||
instr, remap_table);
|
||||
|
||||
nir_builder_instr_insert(b, clone);
|
||||
|
||||
if (clone->type == nir_instr_type_tex) {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(clone);
|
||||
if (tex->op == nir_texop_tex) {
|
||||
/* For maximum compatibility, replace normal textures with
|
||||
* textureGrad with a gradient of 0.
|
||||
* TODO: Handle txb somehow.
|
||||
*/
|
||||
b->cursor = nir_before_instr(clone);
|
||||
|
||||
nir_def *zero =
|
||||
nir_imm_zero(b, tex->coord_components - tex->is_array, 32);
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_ddx, zero);
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_ddy, zero);
|
||||
tex->op = nir_texop_txd;
|
||||
|
||||
b->cursor = nir_after_instr(clone);
|
||||
}
|
||||
}
|
||||
|
||||
if (state->replace) {
|
||||
nir_def *clone_def = nir_instr_def(clone);
|
||||
nir_store_preamble(b, clone_def, .base = state->offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
replace_for_cf_list(b, &ctx, remap_table, &impl->body);
|
||||
|
||||
nir_builder builder = nir_builder_create(impl);
|
||||
b = &builder;
|
||||
@@ -625,5 +876,7 @@ nir_opt_preamble(nir_shader *shader, const nir_opt_preamble_options *options,
|
||||
|
||||
ralloc_free(remap_table);
|
||||
free(ctx.states);
|
||||
free(ctx.reconstructed_defs);
|
||||
_mesa_set_destroy(ctx.reconstructed_ifs, NULL);
|
||||
return true;
|
||||
}
|
||||
|
Reference in New Issue
Block a user