nir/lower_vec_to_movs: don't vectorize unsupports ops

If the instruction being coalesced would be vectorized but the target
doesn't support vectorizing that op, skip coalescing.
Reuse the callbacks from alu_to_scalar to describe which ops should not
be vectorized.

Signed-off-by: Erico Nunes <nunes.erico@gmail.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Eric Anholt <eric@anholt.net>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6506>
This commit is contained in:
Erico Nunes
2020-08-30 15:07:23 +02:00
committed by Marge Bot
parent b75d8052a7
commit faaba0d6af
7 changed files with 57 additions and 12 deletions

View File

@@ -4088,6 +4088,14 @@ static inline bool should_print_nir(nir_shader *shader) { return false; }
*/
typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *);
/** An instruction filtering callback with writemask
*
* Returns true if the instruction should be processed with the associated
* writemask and false otherwise.
*/
typedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *,
unsigned writemask, const void *);
/** A simple instruction lowering callback
*
* Many instruction lowering passes can be written as a simple function which
@@ -4457,7 +4465,8 @@ bool nir_lower_variable_initializers(nir_shader *shader,
nir_variable_mode modes);
bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
bool nir_lower_vec_to_movs(nir_shader *shader);
bool nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
const void *_data);
void nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
bool alpha_to_one,
const gl_state_index16 *alpha_ref_state_tokens);

View File

@@ -28,6 +28,11 @@
#include "nir.h"
#include "nir_builder.h"
struct vec_to_movs_data {
nir_instr_writemask_filter_cb cb;
const void *data;
};
/*
* Implements a simple pass that lowers vecN instructions to a series of
* moves with partial writes.
@@ -119,8 +124,10 @@ has_replicated_dest(nir_alu_instr *alu)
* can then call insert_mov as normal.
*/
static unsigned
try_coalesce(nir_alu_instr *vec, unsigned start_idx)
try_coalesce(nir_alu_instr *vec, unsigned start_idx, void *_data)
{
struct vec_to_movs_data *data = _data;
assert(start_idx < nir_op_infos[vec->op].num_inputs);
/* We will only even try if the source is SSA */
@@ -178,6 +185,7 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx)
for (unsigned i = 0; i < 4; i++)
swizzles[j][i] = src_alu->src[j].swizzle[i];
/* Generate the final write mask */
unsigned write_mask = 0;
for (unsigned i = start_idx; i < 4; i++) {
if (!(vec->dest.write_mask & (1 << i)))
@@ -187,10 +195,21 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx)
vec->src[i].src.ssa != &src_alu->dest.dest.ssa)
continue;
/* At this point, the give vec source matchese up with the ALU
write_mask |= 1 << i;
}
/* If the instruction would be vectorized but the backend
* doesn't support vectorizing this op, abort. */
if (data->cb && !data->cb(&src_alu->instr, write_mask, data->data))
return 0;
for (unsigned i = start_idx; i < 4; i++) {
if (!(write_mask & (1 << i)))
continue;
/* At this point, the given vec source matches up with the ALU
* instruction so we can re-swizzle that component to match.
*/
write_mask |= 1 << i;
if (has_replicated_dest(src_alu)) {
/* Since the destination is a single replicated value, we don't need
* to do any reswizzling
@@ -266,7 +285,7 @@ nir_lower_vec_to_movs_instr(nir_builder *b, nir_instr *instr, void *data)
* vecN had an SSA destination.
*/
if (vec_had_ssa_dest && !(finished_write_mask & (1 << i)))
finished_write_mask |= try_coalesce(vec, i);
finished_write_mask |= try_coalesce(vec, i, data);
if (!(finished_write_mask & (1 << i)))
finished_write_mask |= insert_mov(vec, i, b->shader);
@@ -279,11 +298,17 @@ nir_lower_vec_to_movs_instr(nir_builder *b, nir_instr *instr, void *data)
}
bool
nir_lower_vec_to_movs(nir_shader *shader)
nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
const void *_data)
{
struct vec_to_movs_data data = {
.cb = cb,
.data = _data,
};
return nir_shader_instructions_pass(shader,
nir_lower_vec_to_movs_instr,
nir_metadata_block_index |
nir_metadata_dominance,
NULL);
&data);
}

View File

@@ -2622,7 +2622,7 @@ nir_to_tgsi(struct nir_shader *s,
nir_lower_float_source_mods |
nir_lower_int_source_mods); /* no doubles */
NIR_PASS_V(s, nir_convert_from_ssa, true);
NIR_PASS_V(s, nir_lower_vec_to_movs);
NIR_PASS_V(s, nir_lower_vec_to_movs, NULL, NULL);
/* locals_to_regs will leave dead derefs that are good to clean up. */
NIR_PASS_V(s, nir_lower_locals_to_regs);

View File

@@ -1111,7 +1111,7 @@ ir2_nir_compile(struct ir2_context *ctx, bool binning)
OPT_V(ctx->nir, nir_convert_from_ssa, true);
OPT_V(ctx->nir, nir_move_vec_src_uses_to_dest);
OPT_V(ctx->nir, nir_lower_vec_to_movs);
OPT_V(ctx->nir, nir_lower_vec_to_movs, NULL, NULL);
OPT_V(ctx->nir, nir_opt_dce);

View File

@@ -191,6 +191,17 @@ lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
return false;
}
static bool
lima_vec_to_movs_filter_cb(const nir_instr *instr, unsigned writemask,
const void *data)
{
assert(writemask > 0);
if (util_bitcount(writemask) == 1)
return true;
return !lima_alu_to_scalar_filter_cb(instr, data);
}
void
lima_program_optimize_fs_nir(struct nir_shader *s,
struct nir_lower_tex_options *tex_options)
@@ -252,7 +263,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
NIR_PASS_V(s, nir_lower_vec_to_movs);
NIR_PASS_V(s, nir_lower_vec_to_movs, lima_vec_to_movs_filter_cb, NULL);
NIR_PASS_V(s, lima_nir_duplicate_load_uniforms);
NIR_PASS_V(s, lima_nir_duplicate_load_inputs);

View File

@@ -1183,7 +1183,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
if (!is_scalar) {
OPT(nir_move_vec_src_uses_to_dest);
OPT(nir_lower_vec_to_movs);
OPT(nir_lower_vec_to_movs, NULL, NULL);
}
OPT(nir_opt_dce);

View File

@@ -348,7 +348,7 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
/* We are a vector architecture; write combine where possible */
NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest);
NIR_PASS(progress, nir, nir_lower_vec_to_movs);
NIR_PASS(progress, nir, nir_lower_vec_to_movs, NULL, NULL);
NIR_PASS(progress, nir, nir_opt_dce);
}