nir/opt_vectorize: Add a callback for filtering of vectorizing.

For NIR-to-TGSI, we don't want to revectorize 64-bit ops that we split to
scalar beyond vec2 width.  We even have some ops that we would rather
retain as scalar due to TGSI opcodes being scalar, or having more unusual
requirements.

This could be used to do the vectorize_vec2_16bit filtering, but that
shader compiler option is also used in algebraic so leave it in place for
now.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6567>
This commit is contained in:
Eric Anholt
2020-08-27 12:49:13 -07:00
parent 479d9c97eb
commit f25e169897
7 changed files with 36 additions and 22 deletions

View File

@@ -4780,7 +4780,10 @@ bool nir_opt_trivial_continues(nir_shader *shader);
bool nir_opt_undef(nir_shader *shader);
bool nir_opt_vectorize(nir_shader *shader);
typedef bool (*nir_opt_vectorize_cb)(const nir_instr *a, const nir_instr *b,
void *data);
bool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
void *data);
bool nir_opt_conditional_discard(nir_shader *shader);

View File

@@ -162,7 +162,8 @@ instr_can_rewrite(nir_instr *instr)
*/
static nir_instr *
instr_try_combine(struct nir_shader *nir, nir_instr *instr1, nir_instr *instr2)
instr_try_combine(struct nir_shader *nir, nir_instr *instr1, nir_instr *instr2,
nir_opt_vectorize_cb filter, void *data)
{
assert(instr1->type == nir_instr_type_alu);
assert(instr2->type == nir_instr_type_alu);
@@ -181,6 +182,9 @@ instr_try_combine(struct nir_shader *nir, nir_instr *instr1, nir_instr *instr2)
(total_components > 2 || alu1->dest.dest.ssa.bit_size != 16))
return NULL;
if (filter && !filter(&alu1->instr, &alu2->instr, data))
return NULL;
nir_builder b;
nir_builder_init(&b, nir_cf_node_get_function(&instr1->block->cf_node));
b.cursor = nir_after_instr(instr1);
@@ -320,13 +324,15 @@ vec_instr_stack_create(void *mem_ctx)
static bool
vec_instr_stack_push(struct nir_shader *nir, struct util_dynarray *stack,
nir_instr *instr)
nir_instr *instr,
nir_opt_vectorize_cb filter, void *data)
{
/* Walk the stack from child to parent to make live ranges shorter by
* matching the closest thing we can
*/
util_dynarray_foreach_reverse(stack, nir_instr *, stack_instr) {
nir_instr *new_instr = instr_try_combine(nir, *stack_instr, instr);
nir_instr *new_instr = instr_try_combine(nir, *stack_instr, instr,
filter, data);
if (new_instr) {
*stack_instr = new_instr;
return true;
@@ -378,20 +384,21 @@ vec_instr_set_destroy(struct set *instr_set)
static bool
vec_instr_set_add_or_rewrite(struct nir_shader *nir, struct set *instr_set,
nir_instr *instr)
nir_instr *instr,
nir_opt_vectorize_cb filter, void *data)
{
if (!instr_can_rewrite(instr))
return false;
struct util_dynarray *new_stack = vec_instr_stack_create(instr_set);
vec_instr_stack_push(nir, new_stack, instr);
vec_instr_stack_push(nir, new_stack, instr, filter, data);
struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
if (entry) {
ralloc_free(new_stack);
struct util_dynarray *stack = (struct util_dynarray *) entry->key;
return vec_instr_stack_push(nir, stack, instr);
return vec_instr_stack_push(nir, stack, instr, filter, data);
}
_mesa_set_add(instr_set, new_stack);
@@ -400,7 +407,7 @@ vec_instr_set_add_or_rewrite(struct nir_shader *nir, struct set *instr_set,
static void
vec_instr_set_remove(struct nir_shader *nir, struct set *instr_set,
nir_instr *instr)
nir_instr *instr, nir_opt_vectorize_cb filter, void *data)
{
if (!instr_can_rewrite(instr))
return;
@@ -417,7 +424,7 @@ vec_instr_set_remove(struct nir_shader *nir, struct set *instr_set,
* comparison function as well.
*/
struct util_dynarray *temp = vec_instr_stack_create(instr_set);
vec_instr_stack_push(nir, temp, instr);
vec_instr_stack_push(nir, temp, instr, filter, data);
struct set_entry *entry = _mesa_set_search(instr_set, temp);
ralloc_free(temp);
@@ -433,34 +440,37 @@ vec_instr_set_remove(struct nir_shader *nir, struct set *instr_set,
static bool
vectorize_block(struct nir_shader *nir, nir_block *block,
struct set *instr_set)
struct set *instr_set,
nir_opt_vectorize_cb filter, void *data)
{
bool progress = false;
nir_foreach_instr_safe(instr, block) {
if (vec_instr_set_add_or_rewrite(nir, instr_set, instr))
if (vec_instr_set_add_or_rewrite(nir, instr_set, instr, filter, data))
progress = true;
}
for (unsigned i = 0; i < block->num_dom_children; i++) {
nir_block *child = block->dom_children[i];
progress |= vectorize_block(nir, child, instr_set);
progress |= vectorize_block(nir, child, instr_set, filter, data);
}
nir_foreach_instr_reverse(instr, block)
vec_instr_set_remove(nir, instr_set, instr);
vec_instr_set_remove(nir, instr_set, instr, filter, data);
return progress;
}
static bool
nir_opt_vectorize_impl(struct nir_shader *nir, nir_function_impl *impl)
nir_opt_vectorize_impl(struct nir_shader *nir, nir_function_impl *impl,
nir_opt_vectorize_cb filter, void *data)
{
struct set *instr_set = vec_instr_set_create();
nir_metadata_require(impl, nir_metadata_dominance);
bool progress = vectorize_block(nir, nir_start_block(impl), instr_set);
bool progress = vectorize_block(nir, nir_start_block(impl), instr_set,
filter, data);
if (progress)
nir_metadata_preserve(impl, nir_metadata_block_index |
@@ -471,13 +481,14 @@ nir_opt_vectorize_impl(struct nir_shader *nir, nir_function_impl *impl)
}
bool
nir_opt_vectorize(nir_shader *shader)
nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
void *data)
{
bool progress = false;
nir_foreach_function(function, shader) {
if (function->impl)
progress |= nir_opt_vectorize_impl(shader, function->impl);
progress |= nir_opt_vectorize_impl(shader, function->impl, filter, data);
}
return progress;

View File

@@ -1122,7 +1122,7 @@ etna_compile_shader_nir(struct etna_shader_variant *v)
if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS))
nir_print_shader(s, stdout);
while( OPT(s, nir_opt_vectorize) );
while( OPT(s, nir_opt_vectorize, NULL, NULL) );
NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, specs);
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);

View File

@@ -201,7 +201,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
do {
progress = false;
NIR_PASS(progress, s, nir_opt_vectorize);
NIR_PASS(progress, s, nir_opt_vectorize, NULL, NULL);
} while (progress);
do {

View File

@@ -201,7 +201,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS_V(sel->nir, nir_lower_int64);
NIR_PASS_V(sel->nir, nir_opt_vectorize);
NIR_PASS_V(sel->nir, nir_opt_vectorize, NULL, NULL);
}
NIR_PASS_V(sel->nir, nir_lower_flrp, ~0, false, false);
}

View File

@@ -768,7 +768,7 @@ optimize_once(nir_shader *shader)
NIR_PASS(progress, shader, nir_opt_algebraic);
NIR_PASS(progress, shader, nir_opt_constant_folding);
NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
NIR_PASS(progress, shader, nir_opt_vectorize);
NIR_PASS(progress, shader, nir_opt_vectorize, NULL, NULL);
NIR_PASS(progress, shader, nir_opt_remove_phis);

View File

@@ -558,7 +558,7 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
nir_var_shader_out |
nir_var_function_temp);
NIR_PASS(progress, nir, nir_opt_vectorize);
NIR_PASS(progress, nir, nir_opt_vectorize, NULL, NULL);
} while (progress);
NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_is_64, NULL);