nir: Factor out most of the algebraic passes C code to .c/.h.
Working on the algebraic implementation, I was being driven nuts by my editor not highlighting and handling indentation for the C code. It turns out that it's basically not pass-specific code, and we can move it over to the relevant .c file. Replaces 30KB of code with 34KB of data on my i965 build. No perf diff on shader-db (n=3) Reviewed-by: Ian Romanick <ian.d.romainck@intel.com> Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
This commit is contained in:
@@ -1050,30 +1050,6 @@ _algebraic_pass_template = mako.template.Template("""
|
||||
% endfor
|
||||
*/
|
||||
|
||||
#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS
|
||||
#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS
|
||||
|
||||
struct transform {
|
||||
const nir_search_expression *search;
|
||||
const nir_search_value *replace;
|
||||
unsigned condition_offset;
|
||||
};
|
||||
|
||||
struct per_op_table {
|
||||
const uint16_t *filter;
|
||||
unsigned num_filtered_states;
|
||||
const uint16_t *table;
|
||||
};
|
||||
|
||||
/* Note: these must match the start states created in
|
||||
* TreeAutomaton._build_table()
|
||||
*/
|
||||
|
||||
/* WILDCARD_STATE = 0 is set by zeroing the state array */
|
||||
static const uint16_t CONST_STATE = 1;
|
||||
|
||||
#endif
|
||||
|
||||
<% cache = {} %>
|
||||
% for xform in xforms:
|
||||
${xform.search.render(cache)}
|
||||
@@ -1114,129 +1090,25 @@ static const struct per_op_table ${pass_name}_table[nir_num_search_ops] = {
|
||||
% endfor
|
||||
};
|
||||
|
||||
static void
|
||||
${pass_name}_pre_block(nir_block *block, uint16_t *states)
|
||||
{
|
||||
nir_foreach_instr(instr, block) {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_alu: {
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
nir_op op = alu->op;
|
||||
uint16_t search_op = nir_search_op_for_nir_op(op);
|
||||
const struct per_op_table *tbl = &${pass_name}_table[search_op];
|
||||
if (tbl->num_filtered_states == 0)
|
||||
continue;
|
||||
|
||||
/* Calculate the index into the transition table. Note the index
|
||||
* calculated must match the iteration order of Python's
|
||||
* itertools.product(), which was used to emit the transition
|
||||
* table.
|
||||
*/
|
||||
uint16_t index = 0;
|
||||
for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
|
||||
index *= tbl->num_filtered_states;
|
||||
index += tbl->filter[states[alu->src[i].src.ssa->index]];
|
||||
}
|
||||
states[alu->dest.dest.ssa.index] = tbl->table[index];
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_instr_type_load_const: {
|
||||
nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
|
||||
states[load_const->def.index] = CONST_STATE;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
${pass_name}_block(nir_builder *build, nir_block *block,
|
||||
struct hash_table *range_ht,
|
||||
const uint16_t *states, const bool *condition_flags)
|
||||
{
|
||||
bool progress = false;
|
||||
const unsigned execution_mode = build->shader->info.float_controls_execution_mode;
|
||||
|
||||
nir_foreach_instr_reverse_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
continue;
|
||||
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
if (!alu->dest.dest.is_ssa)
|
||||
continue;
|
||||
|
||||
unsigned bit_size = alu->dest.dest.ssa.bit_size;
|
||||
const bool ignore_inexact =
|
||||
nir_is_float_control_signed_zero_inf_nan_preserve(execution_mode, bit_size) ||
|
||||
nir_is_denorm_flush_to_zero(execution_mode, bit_size);
|
||||
|
||||
switch (states[alu->dest.dest.ssa.index]) {
|
||||
const struct transform *${pass_name}_transforms[] = {
|
||||
% for i in range(len(automaton.state_patterns)):
|
||||
case ${i}:
|
||||
% if automaton.state_patterns[i]:
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_state${i}_xforms); i++) {
|
||||
const struct transform *xform = &${pass_name}_state${i}_xforms[i];
|
||||
if (condition_flags[xform->condition_offset] &&
|
||||
!(xform->search->inexact && ignore_inexact) &&
|
||||
nir_replace_instr(build, alu, range_ht, xform->search, xform->replace)) {
|
||||
_mesa_hash_table_clear(range_ht, NULL);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
% endif
|
||||
break;
|
||||
% if automaton.state_patterns[i]:
|
||||
${pass_name}_state${i}_xforms,
|
||||
% else:
|
||||
NULL,
|
||||
% endif
|
||||
% endfor
|
||||
default: assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
nir_builder build;
|
||||
nir_builder_init(&build, impl);
|
||||
|
||||
/* Note: it's important here that we're allocating a zeroed array, since
|
||||
* state 0 is the default state, which means we don't have to visit
|
||||
* anything other than constants and ALU instructions.
|
||||
*/
|
||||
uint16_t *states = calloc(impl->ssa_alloc, sizeof(*states));
|
||||
|
||||
struct hash_table *range_ht = _mesa_pointer_hash_table_create(NULL);
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
${pass_name}_pre_block(block, states);
|
||||
}
|
||||
|
||||
nir_foreach_block_reverse(block, impl) {
|
||||
progress |= ${pass_name}_block(&build, block, range_ht, states, condition_flags);
|
||||
}
|
||||
|
||||
ralloc_free(range_ht);
|
||||
free(states);
|
||||
|
||||
if (progress) {
|
||||
nir_metadata_preserve(impl, nir_metadata_block_index |
|
||||
nir_metadata_dominance);
|
||||
} else {
|
||||
#ifndef NDEBUG
|
||||
impl->valid_metadata &= ~nir_metadata_not_properly_reset;
|
||||
#endif
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
};
|
||||
|
||||
const uint16_t ${pass_name}_transform_counts[] = {
|
||||
% for i in range(len(automaton.state_patterns)):
|
||||
% if automaton.state_patterns[i]:
|
||||
(uint16_t)ARRAY_SIZE(${pass_name}_state${i}_xforms),
|
||||
% else:
|
||||
0,
|
||||
% endif
|
||||
% endfor
|
||||
};
|
||||
|
||||
bool
|
||||
${pass_name}(nir_shader *shader)
|
||||
@@ -1253,8 +1125,12 @@ ${pass_name}(nir_shader *shader)
|
||||
% endfor
|
||||
|
||||
nir_foreach_function(function, shader) {
|
||||
if (function->impl)
|
||||
progress |= ${pass_name}_impl(function->impl, condition_flags);
|
||||
if (function->impl) {
|
||||
progress |= nir_algebraic_impl(function->impl, condition_flags,
|
||||
${pass_name}_transforms,
|
||||
${pass_name}_transform_counts,
|
||||
${pass_name}_table);
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
|
@@ -692,3 +692,129 @@ nir_replace_instr(nir_builder *build, nir_alu_instr *instr,
|
||||
|
||||
return ssa_val;
|
||||
}
|
||||
|
||||
static void
|
||||
nir_algebraic_automaton(nir_block *block, uint16_t *states,
|
||||
const struct per_op_table *pass_op_table)
|
||||
{
|
||||
nir_foreach_instr(instr, block) {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_alu: {
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
nir_op op = alu->op;
|
||||
uint16_t search_op = nir_search_op_for_nir_op(op);
|
||||
const struct per_op_table *tbl = &pass_op_table[search_op];
|
||||
if (tbl->num_filtered_states == 0)
|
||||
continue;
|
||||
|
||||
/* Calculate the index into the transition table. Note the index
|
||||
* calculated must match the iteration order of Python's
|
||||
* itertools.product(), which was used to emit the transition
|
||||
* table.
|
||||
*/
|
||||
uint16_t index = 0;
|
||||
for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) {
|
||||
index *= tbl->num_filtered_states;
|
||||
index += tbl->filter[states[alu->src[i].src.ssa->index]];
|
||||
}
|
||||
states[alu->dest.dest.ssa.index] = tbl->table[index];
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_instr_type_load_const: {
|
||||
nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
|
||||
states[load_const->def.index] = CONST_STATE;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
nir_algebraic_block(nir_builder *build, nir_block *block,
|
||||
struct hash_table *range_ht,
|
||||
const bool *condition_flags,
|
||||
const struct transform **transforms,
|
||||
const uint16_t *transform_counts,
|
||||
const uint16_t *states)
|
||||
{
|
||||
bool progress = false;
|
||||
const unsigned execution_mode = build->shader->info.float_controls_execution_mode;
|
||||
|
||||
nir_foreach_instr_reverse_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
continue;
|
||||
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
if (!alu->dest.dest.is_ssa)
|
||||
continue;
|
||||
|
||||
unsigned bit_size = alu->dest.dest.ssa.bit_size;
|
||||
const bool ignore_inexact =
|
||||
nir_is_float_control_signed_zero_inf_nan_preserve(execution_mode, bit_size) ||
|
||||
nir_is_denorm_flush_to_zero(execution_mode, bit_size);
|
||||
|
||||
int xform_idx = states[alu->dest.dest.ssa.index];
|
||||
for (uint16_t i = 0; i < transform_counts[xform_idx]; i++) {
|
||||
const struct transform *xform = &transforms[xform_idx][i];
|
||||
if (condition_flags[xform->condition_offset] &&
|
||||
!(xform->search->inexact && ignore_inexact) &&
|
||||
nir_replace_instr(build, alu, range_ht,
|
||||
xform->search, xform->replace)) {
|
||||
_mesa_hash_table_clear(range_ht, NULL);
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
nir_algebraic_impl(nir_function_impl *impl,
|
||||
const bool *condition_flags,
|
||||
const struct transform **transforms,
|
||||
const uint16_t *transform_counts,
|
||||
const struct per_op_table *pass_op_table)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
nir_builder build;
|
||||
nir_builder_init(&build, impl);
|
||||
|
||||
/* Note: it's important here that we're allocating a zeroed array, since
|
||||
* state 0 is the default state, which means we don't have to visit
|
||||
* anything other than constants and ALU instructions.
|
||||
*/
|
||||
uint16_t *states = calloc(impl->ssa_alloc, sizeof(*states));
|
||||
|
||||
struct hash_table *range_ht = _mesa_pointer_hash_table_create(NULL);
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_algebraic_automaton(block, states, pass_op_table);
|
||||
}
|
||||
|
||||
nir_foreach_block_reverse(block, impl) {
|
||||
progress |= nir_algebraic_block(&build, block, range_ht, condition_flags,
|
||||
transforms, transform_counts,
|
||||
states);
|
||||
}
|
||||
|
||||
ralloc_free(range_ht);
|
||||
free(states);
|
||||
|
||||
if (progress) {
|
||||
nir_metadata_preserve(impl, nir_metadata_block_index |
|
||||
nir_metadata_dominance);
|
||||
} else {
|
||||
#ifndef NDEBUG
|
||||
impl->valid_metadata &= ~nir_metadata_not_properly_reset;
|
||||
#endif
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
@@ -163,6 +163,25 @@ typedef struct {
|
||||
bool (*cond)(nir_alu_instr *instr);
|
||||
} nir_search_expression;
|
||||
|
||||
struct per_op_table {
|
||||
const uint16_t *filter;
|
||||
unsigned num_filtered_states;
|
||||
const uint16_t *table;
|
||||
};
|
||||
|
||||
struct transform {
|
||||
const nir_search_expression *search;
|
||||
const nir_search_value *replace;
|
||||
unsigned condition_offset;
|
||||
};
|
||||
|
||||
/* Note: these must match the start states created in
|
||||
* TreeAutomaton._build_table()
|
||||
*/
|
||||
|
||||
/* WILDCARD_STATE = 0 is set by zeroing the state array */
|
||||
static const uint16_t CONST_STATE = 1;
|
||||
|
||||
NIR_DEFINE_CAST(nir_search_value_as_variable, nir_search_value,
|
||||
nir_search_variable, value,
|
||||
type, nir_search_value_variable)
|
||||
@@ -178,5 +197,11 @@ nir_replace_instr(struct nir_builder *b, nir_alu_instr *instr,
|
||||
struct hash_table *range_ht,
|
||||
const nir_search_expression *search,
|
||||
const nir_search_value *replace);
|
||||
bool
|
||||
nir_algebraic_impl(nir_function_impl *impl,
|
||||
const bool *condition_flags,
|
||||
const struct transform **transforms,
|
||||
const uint16_t *transform_counts,
|
||||
const struct per_op_table *pass_op_table);
|
||||
|
||||
#endif /* _NIR_SEARCH_ */
|
||||
|
Reference in New Issue
Block a user