nir: Free instructions more often
Soon we'll be allocating instructions out of a per-shader pool, which means that if we don't free too many instructions during the main optimization loop, the final nir_sweep() call will create holes which can't be filled. By freeing instructions more aggressively, we can allocate more instructions from the freelist which will reduce the final memory usage. Modified from Connor Abbott's original patch to rebase on top of refactored DCE and so that the use-after-free in nir_algebraic_impl() is fixed. Co-authored-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Jason Ekstrand <jason.ekstrand@collabora.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12910>
This commit is contained in:
@@ -107,7 +107,8 @@ struct loop_state {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
dce_block(nir_block *block, BITSET_WORD *defs_live, struct loop_state *loop)
|
dce_block(nir_block *block, BITSET_WORD *defs_live, struct loop_state *loop,
|
||||||
|
struct exec_list *dead_instrs)
|
||||||
{
|
{
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
bool phis_changed = false;
|
bool phis_changed = false;
|
||||||
@@ -131,6 +132,7 @@ dce_block(nir_block *block, BITSET_WORD *defs_live, struct loop_state *loop)
|
|||||||
instr->pass_flags = live;
|
instr->pass_flags = live;
|
||||||
} else if (!live) {
|
} else if (!live) {
|
||||||
nir_instr_remove(instr);
|
nir_instr_remove(instr);
|
||||||
|
exec_list_push_tail(dead_instrs, &instr->node);
|
||||||
progress = true;
|
progress = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -146,20 +148,20 @@ dce_block(nir_block *block, BITSET_WORD *defs_live, struct loop_state *loop)
|
|||||||
|
|
||||||
static bool
|
static bool
|
||||||
dce_cf_list(struct exec_list *cf_list, BITSET_WORD *defs_live,
|
dce_cf_list(struct exec_list *cf_list, BITSET_WORD *defs_live,
|
||||||
struct loop_state *parent_loop)
|
struct loop_state *parent_loop, struct exec_list *dead_instrs)
|
||||||
{
|
{
|
||||||
bool progress = false;
|
bool progress = false;
|
||||||
foreach_list_typed_reverse(nir_cf_node, cf_node, node, cf_list) {
|
foreach_list_typed_reverse(nir_cf_node, cf_node, node, cf_list) {
|
||||||
switch (cf_node->type) {
|
switch (cf_node->type) {
|
||||||
case nir_cf_node_block: {
|
case nir_cf_node_block: {
|
||||||
nir_block *block = nir_cf_node_as_block(cf_node);
|
nir_block *block = nir_cf_node_as_block(cf_node);
|
||||||
progress |= dce_block(block, defs_live, parent_loop);
|
progress |= dce_block(block, defs_live, parent_loop, dead_instrs);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_cf_node_if: {
|
case nir_cf_node_if: {
|
||||||
nir_if *nif = nir_cf_node_as_if(cf_node);
|
nir_if *nif = nir_cf_node_as_if(cf_node);
|
||||||
progress |= dce_cf_list(&nif->else_list, defs_live, parent_loop);
|
progress |= dce_cf_list(&nif->else_list, defs_live, parent_loop, dead_instrs);
|
||||||
progress |= dce_cf_list(&nif->then_list, defs_live, parent_loop);
|
progress |= dce_cf_list(&nif->then_list, defs_live, parent_loop, dead_instrs);
|
||||||
mark_src_live(&nif->condition, defs_live);
|
mark_src_live(&nif->condition, defs_live);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -176,7 +178,7 @@ dce_cf_list(struct exec_list *cf_list, BITSET_WORD *defs_live,
|
|||||||
struct set *predecessors = nir_loop_first_block(loop)->predecessors;
|
struct set *predecessors = nir_loop_first_block(loop)->predecessors;
|
||||||
if (predecessors->entries == 1 &&
|
if (predecessors->entries == 1 &&
|
||||||
_mesa_set_next_entry(predecessors, NULL)->key == inner_state.preheader) {
|
_mesa_set_next_entry(predecessors, NULL)->key == inner_state.preheader) {
|
||||||
progress |= dce_cf_list(&loop->body, defs_live, parent_loop);
|
progress |= dce_cf_list(&loop->body, defs_live, parent_loop, dead_instrs);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -185,7 +187,7 @@ dce_cf_list(struct exec_list *cf_list, BITSET_WORD *defs_live,
|
|||||||
/* dce_cf_list() resets inner_state.header_phis_changed itself, so
|
/* dce_cf_list() resets inner_state.header_phis_changed itself, so
|
||||||
* it doesn't have to be done here.
|
* it doesn't have to be done here.
|
||||||
*/
|
*/
|
||||||
dce_cf_list(&loop->body, defs_live, &inner_state);
|
dce_cf_list(&loop->body, defs_live, &inner_state, dead_instrs);
|
||||||
} while (inner_state.header_phis_changed);
|
} while (inner_state.header_phis_changed);
|
||||||
|
|
||||||
/* We don't know how many times mark_cf_list() will repeat, so
|
/* We don't know how many times mark_cf_list() will repeat, so
|
||||||
@@ -199,6 +201,7 @@ dce_cf_list(struct exec_list *cf_list, BITSET_WORD *defs_live,
|
|||||||
nir_foreach_instr_safe(instr, block) {
|
nir_foreach_instr_safe(instr, block) {
|
||||||
if (!instr->pass_flags) {
|
if (!instr->pass_flags) {
|
||||||
nir_instr_remove(instr);
|
nir_instr_remove(instr);
|
||||||
|
exec_list_push_tail(dead_instrs, &instr->node);
|
||||||
progress = true;
|
progress = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -222,12 +225,17 @@ nir_opt_dce_impl(nir_function_impl *impl)
|
|||||||
BITSET_WORD *defs_live = rzalloc_array(NULL, BITSET_WORD,
|
BITSET_WORD *defs_live = rzalloc_array(NULL, BITSET_WORD,
|
||||||
BITSET_WORDS(impl->ssa_alloc));
|
BITSET_WORDS(impl->ssa_alloc));
|
||||||
|
|
||||||
|
struct exec_list dead_instrs;
|
||||||
|
exec_list_make_empty(&dead_instrs);
|
||||||
|
|
||||||
struct loop_state loop;
|
struct loop_state loop;
|
||||||
loop.preheader = NULL;
|
loop.preheader = NULL;
|
||||||
bool progress = dce_cf_list(&impl->body, defs_live, &loop);
|
bool progress = dce_cf_list(&impl->body, defs_live, &loop, &dead_instrs);
|
||||||
|
|
||||||
ralloc_free(defs_live);
|
ralloc_free(defs_live);
|
||||||
|
|
||||||
|
nir_instr_free_list(&dead_instrs);
|
||||||
|
|
||||||
if (progress) {
|
if (progress) {
|
||||||
nir_metadata_preserve(impl, nir_metadata_block_index |
|
nir_metadata_preserve(impl, nir_metadata_block_index |
|
||||||
nir_metadata_dominance);
|
nir_metadata_dominance);
|
||||||
|
@@ -685,14 +685,15 @@ nir_algebraic_update_automaton(nir_instr *new_instr,
|
|||||||
nir_instr_worklist_destroy(automaton_worklist);
|
nir_instr_worklist_destroy(automaton_worklist);
|
||||||
}
|
}
|
||||||
|
|
||||||
nir_ssa_def *
|
static nir_ssa_def *
|
||||||
nir_replace_instr(nir_builder *build, nir_alu_instr *instr,
|
nir_replace_instr(nir_builder *build, nir_alu_instr *instr,
|
||||||
struct hash_table *range_ht,
|
struct hash_table *range_ht,
|
||||||
struct util_dynarray *states,
|
struct util_dynarray *states,
|
||||||
const nir_algebraic_table *table,
|
const nir_algebraic_table *table,
|
||||||
const nir_search_expression *search,
|
const nir_search_expression *search,
|
||||||
const nir_search_value *replace,
|
const nir_search_value *replace,
|
||||||
nir_instr_worklist *algebraic_worklist)
|
nir_instr_worklist *algebraic_worklist,
|
||||||
|
struct exec_list *dead_instrs)
|
||||||
{
|
{
|
||||||
uint8_t swizzle[NIR_MAX_VEC_COMPONENTS] = { 0 };
|
uint8_t swizzle[NIR_MAX_VEC_COMPONENTS] = { 0 };
|
||||||
|
|
||||||
@@ -803,7 +804,10 @@ nir_replace_instr(nir_builder *build, nir_alu_instr *instr,
|
|||||||
* that the instr may be in the worklist still, so we can't free it
|
* that the instr may be in the worklist still, so we can't free it
|
||||||
* directly.
|
* directly.
|
||||||
*/
|
*/
|
||||||
|
assert(instr->instr.pass_flags == 0);
|
||||||
|
instr->instr.pass_flags = 1;
|
||||||
nir_instr_remove(&instr->instr);
|
nir_instr_remove(&instr->instr);
|
||||||
|
exec_list_push_tail(dead_instrs, &instr->instr.node);
|
||||||
|
|
||||||
return ssa_val;
|
return ssa_val;
|
||||||
}
|
}
|
||||||
@@ -865,7 +869,8 @@ nir_algebraic_instr(nir_builder *build, nir_instr *instr,
|
|||||||
const bool *condition_flags,
|
const bool *condition_flags,
|
||||||
const nir_algebraic_table *table,
|
const nir_algebraic_table *table,
|
||||||
struct util_dynarray *states,
|
struct util_dynarray *states,
|
||||||
nir_instr_worklist *worklist)
|
nir_instr_worklist *worklist,
|
||||||
|
struct exec_list *dead_instrs)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (instr->type != nir_instr_type_alu)
|
if (instr->type != nir_instr_type_alu)
|
||||||
@@ -891,7 +896,7 @@ nir_algebraic_instr(nir_builder *build, nir_instr *instr,
|
|||||||
!(table->values[xform->search].expression.inexact && ignore_inexact) &&
|
!(table->values[xform->search].expression.inexact && ignore_inexact) &&
|
||||||
nir_replace_instr(build, alu, range_ht, states, table,
|
nir_replace_instr(build, alu, range_ht, states, table,
|
||||||
&table->values[xform->search].expression,
|
&table->values[xform->search].expression,
|
||||||
&table->values[xform->replace].value, worklist)) {
|
&table->values[xform->replace].value, worklist, dead_instrs)) {
|
||||||
_mesa_hash_table_clear(range_ht, NULL);
|
_mesa_hash_table_clear(range_ht, NULL);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -938,25 +943,31 @@ nir_algebraic_impl(nir_function_impl *impl,
|
|||||||
*/
|
*/
|
||||||
nir_foreach_block_reverse(block, impl) {
|
nir_foreach_block_reverse(block, impl) {
|
||||||
nir_foreach_instr_reverse(instr, block) {
|
nir_foreach_instr_reverse(instr, block) {
|
||||||
|
instr->pass_flags = 0;
|
||||||
if (instr->type == nir_instr_type_alu)
|
if (instr->type == nir_instr_type_alu)
|
||||||
nir_instr_worklist_push_tail(worklist, instr);
|
nir_instr_worklist_push_tail(worklist, instr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct exec_list dead_instrs;
|
||||||
|
exec_list_make_empty(&dead_instrs);
|
||||||
|
|
||||||
nir_instr *instr;
|
nir_instr *instr;
|
||||||
while ((instr = nir_instr_worklist_pop_head(worklist))) {
|
while ((instr = nir_instr_worklist_pop_head(worklist))) {
|
||||||
/* The worklist can have an instr pushed to it multiple times if it was
|
/* The worklist can have an instr pushed to it multiple times if it was
|
||||||
* the src of multiple instrs that also got optimized, so make sure that
|
* the src of multiple instrs that also got optimized, so make sure that
|
||||||
* we don't try to re-optimize an instr we already handled.
|
* we don't try to re-optimize an instr we already handled.
|
||||||
*/
|
*/
|
||||||
if (exec_node_is_tail_sentinel(&instr->node))
|
if (instr->pass_flags)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
progress |= nir_algebraic_instr(&build, instr,
|
progress |= nir_algebraic_instr(&build, instr,
|
||||||
range_ht, condition_flags,
|
range_ht, condition_flags,
|
||||||
table, &states, worklist);
|
table, &states, worklist, &dead_instrs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nir_instr_free_list(&dead_instrs);
|
||||||
|
|
||||||
nir_instr_worklist_destroy(worklist);
|
nir_instr_worklist_destroy(worklist);
|
||||||
ralloc_free(range_ht);
|
ralloc_free(range_ht);
|
||||||
util_dynarray_fini(&states);
|
util_dynarray_fini(&states);
|
||||||
|
@@ -237,14 +237,6 @@ NIR_DEFINE_CAST(nir_search_value_as_expression, nir_search_value,
|
|||||||
nir_search_expression, value,
|
nir_search_expression, value,
|
||||||
type, nir_search_value_expression)
|
type, nir_search_value_expression)
|
||||||
|
|
||||||
nir_ssa_def *
|
|
||||||
nir_replace_instr(struct nir_builder *b, nir_alu_instr *instr,
|
|
||||||
struct hash_table *range_ht,
|
|
||||||
struct util_dynarray *states,
|
|
||||||
const nir_algebraic_table *table,
|
|
||||||
const nir_search_expression *search,
|
|
||||||
const nir_search_value *replace,
|
|
||||||
nir_instr_worklist *algebraic_worklist);
|
|
||||||
bool
|
bool
|
||||||
nir_algebraic_impl(nir_function_impl *impl,
|
nir_algebraic_impl(nir_function_impl *impl,
|
||||||
const bool *condition_flags,
|
const bool *condition_flags,
|
||||||
|
Reference in New Issue
Block a user