nir/lower_vec_to_movs: don't vectorize unsupports ops
If the instruction being coalesced would be vectorized but the target doesn't support vectorizing that op, skip coalescing. Reuse the callbacks from alu_to_scalar to describe which ops should not be vectorized. Signed-off-by: Erico Nunes <nunes.erico@gmail.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Eric Anholt <eric@anholt.net> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6506>
This commit is contained in:
@@ -4088,6 +4088,14 @@ static inline bool should_print_nir(nir_shader *shader) { return false; }
|
||||
*/
|
||||
typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *);
|
||||
|
||||
/** An instruction filtering callback with writemask
|
||||
*
|
||||
* Returns true if the instruction should be processed with the associated
|
||||
* writemask and false otherwise.
|
||||
*/
|
||||
typedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *,
|
||||
unsigned writemask, const void *);
|
||||
|
||||
/** A simple instruction lowering callback
|
||||
*
|
||||
* Many instruction lowering passes can be written as a simple function which
|
||||
@@ -4457,7 +4465,8 @@ bool nir_lower_variable_initializers(nir_shader *shader,
|
||||
nir_variable_mode modes);
|
||||
|
||||
bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
|
||||
bool nir_lower_vec_to_movs(nir_shader *shader);
|
||||
bool nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
|
||||
const void *_data);
|
||||
void nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
|
||||
bool alpha_to_one,
|
||||
const gl_state_index16 *alpha_ref_state_tokens);
|
||||
|
@@ -28,6 +28,11 @@
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
struct vec_to_movs_data {
|
||||
nir_instr_writemask_filter_cb cb;
|
||||
const void *data;
|
||||
};
|
||||
|
||||
/*
|
||||
* Implements a simple pass that lowers vecN instructions to a series of
|
||||
* moves with partial writes.
|
||||
@@ -119,8 +124,10 @@ has_replicated_dest(nir_alu_instr *alu)
|
||||
* can then call insert_mov as normal.
|
||||
*/
|
||||
static unsigned
|
||||
try_coalesce(nir_alu_instr *vec, unsigned start_idx)
|
||||
try_coalesce(nir_alu_instr *vec, unsigned start_idx, void *_data)
|
||||
{
|
||||
struct vec_to_movs_data *data = _data;
|
||||
|
||||
assert(start_idx < nir_op_infos[vec->op].num_inputs);
|
||||
|
||||
/* We will only even try if the source is SSA */
|
||||
@@ -178,6 +185,7 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx)
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
swizzles[j][i] = src_alu->src[j].swizzle[i];
|
||||
|
||||
/* Generate the final write mask */
|
||||
unsigned write_mask = 0;
|
||||
for (unsigned i = start_idx; i < 4; i++) {
|
||||
if (!(vec->dest.write_mask & (1 << i)))
|
||||
@@ -187,10 +195,21 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx)
|
||||
vec->src[i].src.ssa != &src_alu->dest.dest.ssa)
|
||||
continue;
|
||||
|
||||
/* At this point, the give vec source matchese up with the ALU
|
||||
write_mask |= 1 << i;
|
||||
}
|
||||
|
||||
/* If the instruction would be vectorized but the backend
|
||||
* doesn't support vectorizing this op, abort. */
|
||||
if (data->cb && !data->cb(&src_alu->instr, write_mask, data->data))
|
||||
return 0;
|
||||
|
||||
for (unsigned i = start_idx; i < 4; i++) {
|
||||
if (!(write_mask & (1 << i)))
|
||||
continue;
|
||||
|
||||
/* At this point, the given vec source matches up with the ALU
|
||||
* instruction so we can re-swizzle that component to match.
|
||||
*/
|
||||
write_mask |= 1 << i;
|
||||
if (has_replicated_dest(src_alu)) {
|
||||
/* Since the destination is a single replicated value, we don't need
|
||||
* to do any reswizzling
|
||||
@@ -266,7 +285,7 @@ nir_lower_vec_to_movs_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||
* vecN had an SSA destination.
|
||||
*/
|
||||
if (vec_had_ssa_dest && !(finished_write_mask & (1 << i)))
|
||||
finished_write_mask |= try_coalesce(vec, i);
|
||||
finished_write_mask |= try_coalesce(vec, i, data);
|
||||
|
||||
if (!(finished_write_mask & (1 << i)))
|
||||
finished_write_mask |= insert_mov(vec, i, b->shader);
|
||||
@@ -279,11 +298,17 @@ nir_lower_vec_to_movs_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||
}
|
||||
|
||||
bool
|
||||
nir_lower_vec_to_movs(nir_shader *shader)
|
||||
nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
|
||||
const void *_data)
|
||||
{
|
||||
struct vec_to_movs_data data = {
|
||||
.cb = cb,
|
||||
.data = _data,
|
||||
};
|
||||
|
||||
return nir_shader_instructions_pass(shader,
|
||||
nir_lower_vec_to_movs_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
NULL);
|
||||
&data);
|
||||
}
|
||||
|
@@ -2622,7 +2622,7 @@ nir_to_tgsi(struct nir_shader *s,
|
||||
nir_lower_float_source_mods |
|
||||
nir_lower_int_source_mods); /* no doubles */
|
||||
NIR_PASS_V(s, nir_convert_from_ssa, true);
|
||||
NIR_PASS_V(s, nir_lower_vec_to_movs);
|
||||
NIR_PASS_V(s, nir_lower_vec_to_movs, NULL, NULL);
|
||||
|
||||
/* locals_to_regs will leave dead derefs that are good to clean up. */
|
||||
NIR_PASS_V(s, nir_lower_locals_to_regs);
|
||||
|
@@ -1111,7 +1111,7 @@ ir2_nir_compile(struct ir2_context *ctx, bool binning)
|
||||
OPT_V(ctx->nir, nir_convert_from_ssa, true);
|
||||
|
||||
OPT_V(ctx->nir, nir_move_vec_src_uses_to_dest);
|
||||
OPT_V(ctx->nir, nir_lower_vec_to_movs);
|
||||
OPT_V(ctx->nir, nir_lower_vec_to_movs, NULL, NULL);
|
||||
|
||||
OPT_V(ctx->nir, nir_opt_dce);
|
||||
|
||||
|
@@ -191,6 +191,17 @@ lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
lima_vec_to_movs_filter_cb(const nir_instr *instr, unsigned writemask,
|
||||
const void *data)
|
||||
{
|
||||
assert(writemask > 0);
|
||||
if (util_bitcount(writemask) == 1)
|
||||
return true;
|
||||
|
||||
return !lima_alu_to_scalar_filter_cb(instr, data);
|
||||
}
|
||||
|
||||
void
|
||||
lima_program_optimize_fs_nir(struct nir_shader *s,
|
||||
struct nir_lower_tex_options *tex_options)
|
||||
@@ -252,7 +263,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
|
||||
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
|
||||
NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
|
||||
NIR_PASS_V(s, nir_lower_vec_to_movs);
|
||||
NIR_PASS_V(s, nir_lower_vec_to_movs, lima_vec_to_movs_filter_cb, NULL);
|
||||
|
||||
NIR_PASS_V(s, lima_nir_duplicate_load_uniforms);
|
||||
NIR_PASS_V(s, lima_nir_duplicate_load_inputs);
|
||||
|
@@ -1183,7 +1183,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
|
||||
|
||||
if (!is_scalar) {
|
||||
OPT(nir_move_vec_src_uses_to_dest);
|
||||
OPT(nir_lower_vec_to_movs);
|
||||
OPT(nir_lower_vec_to_movs, NULL, NULL);
|
||||
}
|
||||
|
||||
OPT(nir_opt_dce);
|
||||
|
@@ -348,7 +348,7 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
|
||||
|
||||
/* We are a vector architecture; write combine where possible */
|
||||
NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest);
|
||||
NIR_PASS(progress, nir, nir_lower_vec_to_movs);
|
||||
NIR_PASS(progress, nir, nir_lower_vec_to_movs, NULL, NULL);
|
||||
|
||||
NIR_PASS(progress, nir, nir_opt_dce);
|
||||
}
|
||||
|
Reference in New Issue
Block a user