broadcom/compiler: add more lowerings/optimizations on v3d_optimize_nir

Optimizations that we are already calling on the Vulkan driver. As
preparation to the Vulkan frontend to use v3d_optimize_nir too.

We need to add a new parameter to v3d_optimize_nir in order to know if
we can call nir_opt_find_array_copies. As we don't track if we are
calling nir_var_lower_copies, we explicitly call it when we create the
uncompiled shader create. So instead of tracking, we assume that each
driver (v3d/v3dv) would call it when the shader is created. So when
v3d_optimize_nir is called as part of the process to compile it at the
compiler, we call it with allow_copies as false.

We exclude on purpose nir_opt_gcm as it is a case of a optimization
that could help performance even if it hurts shader db stats.

shaderdb stats:
  total instructions in shared programs: 11705923 -> 11705034 (<.01%)
  instructions in affected programs: 88350 -> 87461 (-1.01%)
  helped: 201
  HURT: 80
  Instructions are helped.

  total threads in shared programs: 375552 -> 375558 (<.01%)
  threads in affected programs: 6 -> 12 (100.00%)
  helped: 3
  HURT: 0

  total uniforms in shared programs: 3486108 -> 3485789 (<.01%)
  uniforms in affected programs: 7473 -> 7154 (-4.27%)
  helped: 90
  HURT: 1
  Uniforms are helped.

  total max-temps in shared programs: 2021860 -> 2021802 (<.01%)
  max-temps in affected programs: 800 -> 742 (-7.25%)
  helped: 21
  HURT: 3
  Max-temps are helped.

  total sfu-stalls in shared programs: 19299 -> 19296 (-0.02%)
  sfu-stalls in affected programs: 18 -> 15 (-16.67%)
  helped: 10
  HURT: 7
  Inconclusive result (value mean confidence interval includes 0).

  total inst-and-stalls in shared programs: 11725222 -> 11724330 (<.01%)
  inst-and-stalls in affected programs: 88402 -> 87510 (-1.01%)
  helped: 201
  HURT: 80
  Inst-and-stalls are helped.

  total nops in shared programs: 269674 -> 269386 (-0.11%)
  nops in affected programs: 3641 -> 3353 (-7.91%)
  helped: 103
  HURT: 29
  Nops are helped.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17185>
This commit is contained in:
Alejandro Piñeiro
2022-06-13 12:43:12 +02:00
committed by Marge Bot
parent 9cbc3ab239
commit 0bf31b0710
4 changed files with 50 additions and 6 deletions

View File

@@ -2126,7 +2126,7 @@ mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
} }
void void
v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s) v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s, bool allow_copies)
{ {
bool progress; bool progress;
unsigned lower_flrp = unsigned lower_flrp =
@@ -2137,7 +2137,29 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
do { do {
progress = false; progress = false;
NIR_PASS(progress, s, nir_split_array_vars, nir_var_function_temp);
NIR_PASS(progress, s, nir_shrink_vec_array_vars, nir_var_function_temp);
NIR_PASS(progress, s, nir_opt_deref);
NIR_PASS(progress, s, nir_lower_vars_to_ssa); NIR_PASS(progress, s, nir_lower_vars_to_ssa);
if (allow_copies) {
/* Only run this pass if nir_lower_var_copies was not called
* yet. That would lower away any copy_deref instructions and we
* don't want to introduce any more.
*/
NIR_PASS(progress, s, nir_opt_find_array_copies);
}
NIR_PASS(progress, s, nir_opt_copy_prop_vars);
NIR_PASS(progress, s, nir_opt_dead_write_vars);
NIR_PASS(progress, s, nir_opt_combine_stores, nir_var_all);
NIR_PASS(progress, s, nir_remove_dead_variables,
(nir_variable_mode)(nir_var_function_temp |
nir_var_shader_temp |
nir_var_mem_shared),
NULL);
NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS(progress, s, nir_lower_phis_to_scalar, false); NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
NIR_PASS(progress, s, nir_copy_prop); NIR_PASS(progress, s, nir_copy_prop);
@@ -2145,10 +2167,27 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
NIR_PASS(progress, s, nir_opt_dce); NIR_PASS(progress, s, nir_opt_dce);
NIR_PASS(progress, s, nir_opt_dead_cf); NIR_PASS(progress, s, nir_opt_dead_cf);
NIR_PASS(progress, s, nir_opt_cse); NIR_PASS(progress, s, nir_opt_cse);
NIR_PASS(progress, s, nir_opt_peephole_select, 0, false, false);
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_intrinsics);
NIR_PASS(progress, s, nir_opt_idiv_const, 32);
NIR_PASS(progress, s, nir_lower_alu);
if (nir_opt_trivial_continues(s)) {
progress = true;
NIR_PASS(progress, s, nir_copy_prop);
NIR_PASS(progress, s, nir_opt_dce);
}
NIR_PASS(progress, s, nir_opt_conditional_discard);
NIR_PASS(progress, s, nir_opt_remove_phis);
NIR_PASS(progress, s, nir_opt_if, false);
NIR_PASS(progress, s, nir_opt_undef);
/* Note that vectorization may undo the load/store scalarization /* Note that vectorization may undo the load/store scalarization
* pass we run for non 32-bit TMU general load/store by * pass we run for non 32-bit TMU general load/store by
* converting, for example, 2 consecutive 16-bit loads into a * converting, for example, 2 consecutive 16-bit loads into a

View File

@@ -1071,7 +1071,7 @@ vir_has_uniform(struct qinst *inst)
const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo, const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo,
uint32_t max_inline_uniform_buffers); uint32_t max_inline_uniform_buffers);
void v3d_compiler_free(const struct v3d_compiler *compiler); void v3d_compiler_free(const struct v3d_compiler *compiler);
void v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s); void v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s, bool allow_copies);
uint64_t *v3d_compile(const struct v3d_compiler *compiler, uint64_t *v3d_compile(const struct v3d_compiler *compiler,
struct v3d_key *key, struct v3d_key *key,

View File

@@ -930,7 +930,7 @@ v3d_nir_lower_vs_early(struct v3d_compile *c)
NIR_PASS(_, c->s, nir_remove_unused_io_vars, NIR_PASS(_, c->s, nir_remove_unused_io_vars,
nir_var_shader_out, used_outputs, NULL); /* demotes to globals */ nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
NIR_PASS(_, c->s, nir_lower_global_vars_to_local); NIR_PASS(_, c->s, nir_lower_global_vars_to_local);
v3d_optimize_nir(c, c->s); v3d_optimize_nir(c, c->s, false);
NIR_PASS(_, c->s, nir_remove_dead_variables, nir_var_shader_in, NULL); NIR_PASS(_, c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
/* This must go before nir_lower_io */ /* This must go before nir_lower_io */
@@ -964,7 +964,7 @@ v3d_nir_lower_gs_early(struct v3d_compile *c)
NIR_PASS(_, c->s, nir_remove_unused_io_vars, NIR_PASS(_, c->s, nir_remove_unused_io_vars,
nir_var_shader_out, used_outputs, NULL); /* demotes to globals */ nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
NIR_PASS(_, c->s, nir_lower_global_vars_to_local); NIR_PASS(_, c->s, nir_lower_global_vars_to_local);
v3d_optimize_nir(c, c->s); v3d_optimize_nir(c, c->s, false);
NIR_PASS(_, c->s, nir_remove_dead_variables, nir_var_shader_in, NULL); NIR_PASS(_, c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
/* This must go before nir_lower_io */ /* This must go before nir_lower_io */
@@ -1607,7 +1607,7 @@ v3d_attempt_compile(struct v3d_compile *c)
NIR_PASS(_, c->s, v3d_nir_lower_subgroup_intrinsics, c); NIR_PASS(_, c->s, v3d_nir_lower_subgroup_intrinsics, c);
v3d_optimize_nir(c, c->s); v3d_optimize_nir(c, c->s, false);
/* Do late algebraic optimization to turn add(a, neg(b)) back into /* Do late algebraic optimization to turn add(a, neg(b)) back into
* subs, then the mandatory cleanup after algebraic. Note that it may * subs, then the mandatory cleanup after algebraic. Note that it may

View File

@@ -318,7 +318,12 @@ v3d_uncompiled_shader_create(struct pipe_context *pctx,
NIR_PASS(_, s, nir_lower_load_const_to_scalar); NIR_PASS(_, s, nir_lower_load_const_to_scalar);
v3d_optimize_nir(NULL, s); v3d_optimize_nir(NULL, s, true);
NIR_PASS(_, s, nir_lower_var_copies);
/* Get rid of split copies */
v3d_optimize_nir(NULL, s, false);
NIR_PASS(_, s, nir_remove_dead_variables, nir_var_function_temp, NULL); NIR_PASS(_, s, nir_remove_dead_variables, nir_var_function_temp, NULL);