broadcom/compiler: add more lowerings/optimizations on v3d_optimize_nir
Optimizations that we are already calling on the Vulkan driver. As preparation to the Vulkan frontend to use v3d_optimize_nir too. We need to add a new parameter to v3d_optimize_nir in order to know if we can call nir_opt_find_array_copies. As we don't track if we are calling nir_var_lower_copies, we explicitly call it when we create the uncompiled shader create. So instead of tracking, we assume that each driver (v3d/v3dv) would call it when the shader is created. So when v3d_optimize_nir is called as part of the process to compile it at the compiler, we call it with allow_copies as false. We exclude on purpose nir_opt_gcm as it is a case of a optimization that could help performance even if it hurts shader db stats. shaderdb stats: total instructions in shared programs: 11705923 -> 11705034 (<.01%) instructions in affected programs: 88350 -> 87461 (-1.01%) helped: 201 HURT: 80 Instructions are helped. total threads in shared programs: 375552 -> 375558 (<.01%) threads in affected programs: 6 -> 12 (100.00%) helped: 3 HURT: 0 total uniforms in shared programs: 3486108 -> 3485789 (<.01%) uniforms in affected programs: 7473 -> 7154 (-4.27%) helped: 90 HURT: 1 Uniforms are helped. total max-temps in shared programs: 2021860 -> 2021802 (<.01%) max-temps in affected programs: 800 -> 742 (-7.25%) helped: 21 HURT: 3 Max-temps are helped. total sfu-stalls in shared programs: 19299 -> 19296 (-0.02%) sfu-stalls in affected programs: 18 -> 15 (-16.67%) helped: 10 HURT: 7 Inconclusive result (value mean confidence interval includes 0). total inst-and-stalls in shared programs: 11725222 -> 11724330 (<.01%) inst-and-stalls in affected programs: 88402 -> 87510 (-1.01%) helped: 201 HURT: 80 Inst-and-stalls are helped. total nops in shared programs: 269674 -> 269386 (-0.11%) nops in affected programs: 3641 -> 3353 (-7.91%) helped: 103 HURT: 29 Nops are helped. Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17185>
This commit is contained in:

committed by
Marge Bot

parent
9cbc3ab239
commit
0bf31b0710
@@ -2126,7 +2126,7 @@ mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
|
||||
}
|
||||
|
||||
void
|
||||
v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
|
||||
v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s, bool allow_copies)
|
||||
{
|
||||
bool progress;
|
||||
unsigned lower_flrp =
|
||||
@@ -2137,7 +2137,29 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
|
||||
do {
|
||||
progress = false;
|
||||
|
||||
NIR_PASS(progress, s, nir_split_array_vars, nir_var_function_temp);
|
||||
NIR_PASS(progress, s, nir_shrink_vec_array_vars, nir_var_function_temp);
|
||||
NIR_PASS(progress, s, nir_opt_deref);
|
||||
|
||||
NIR_PASS(progress, s, nir_lower_vars_to_ssa);
|
||||
if (allow_copies) {
|
||||
/* Only run this pass if nir_lower_var_copies was not called
|
||||
* yet. That would lower away any copy_deref instructions and we
|
||||
* don't want to introduce any more.
|
||||
*/
|
||||
NIR_PASS(progress, s, nir_opt_find_array_copies);
|
||||
}
|
||||
|
||||
NIR_PASS(progress, s, nir_opt_copy_prop_vars);
|
||||
NIR_PASS(progress, s, nir_opt_dead_write_vars);
|
||||
NIR_PASS(progress, s, nir_opt_combine_stores, nir_var_all);
|
||||
|
||||
NIR_PASS(progress, s, nir_remove_dead_variables,
|
||||
(nir_variable_mode)(nir_var_function_temp |
|
||||
nir_var_shader_temp |
|
||||
nir_var_mem_shared),
|
||||
NULL);
|
||||
|
||||
NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
|
||||
NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
|
||||
NIR_PASS(progress, s, nir_copy_prop);
|
||||
@@ -2145,10 +2167,27 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
|
||||
NIR_PASS(progress, s, nir_opt_dce);
|
||||
NIR_PASS(progress, s, nir_opt_dead_cf);
|
||||
NIR_PASS(progress, s, nir_opt_cse);
|
||||
NIR_PASS(progress, s, nir_opt_peephole_select, 0, false, false);
|
||||
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
|
||||
NIR_PASS(progress, s, nir_opt_algebraic);
|
||||
NIR_PASS(progress, s, nir_opt_constant_folding);
|
||||
|
||||
NIR_PASS(progress, s, nir_opt_intrinsics);
|
||||
NIR_PASS(progress, s, nir_opt_idiv_const, 32);
|
||||
NIR_PASS(progress, s, nir_lower_alu);
|
||||
|
||||
if (nir_opt_trivial_continues(s)) {
|
||||
progress = true;
|
||||
NIR_PASS(progress, s, nir_copy_prop);
|
||||
NIR_PASS(progress, s, nir_opt_dce);
|
||||
}
|
||||
|
||||
NIR_PASS(progress, s, nir_opt_conditional_discard);
|
||||
|
||||
NIR_PASS(progress, s, nir_opt_remove_phis);
|
||||
NIR_PASS(progress, s, nir_opt_if, false);
|
||||
NIR_PASS(progress, s, nir_opt_undef);
|
||||
|
||||
/* Note that vectorization may undo the load/store scalarization
|
||||
* pass we run for non 32-bit TMU general load/store by
|
||||
* converting, for example, 2 consecutive 16-bit loads into a
|
||||
|
@@ -1071,7 +1071,7 @@ vir_has_uniform(struct qinst *inst)
|
||||
const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo,
|
||||
uint32_t max_inline_uniform_buffers);
|
||||
void v3d_compiler_free(const struct v3d_compiler *compiler);
|
||||
void v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s);
|
||||
void v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s, bool allow_copies);
|
||||
|
||||
uint64_t *v3d_compile(const struct v3d_compiler *compiler,
|
||||
struct v3d_key *key,
|
||||
|
@@ -930,7 +930,7 @@ v3d_nir_lower_vs_early(struct v3d_compile *c)
|
||||
NIR_PASS(_, c->s, nir_remove_unused_io_vars,
|
||||
nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
|
||||
NIR_PASS(_, c->s, nir_lower_global_vars_to_local);
|
||||
v3d_optimize_nir(c, c->s);
|
||||
v3d_optimize_nir(c, c->s, false);
|
||||
NIR_PASS(_, c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
|
||||
|
||||
/* This must go before nir_lower_io */
|
||||
@@ -964,7 +964,7 @@ v3d_nir_lower_gs_early(struct v3d_compile *c)
|
||||
NIR_PASS(_, c->s, nir_remove_unused_io_vars,
|
||||
nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
|
||||
NIR_PASS(_, c->s, nir_lower_global_vars_to_local);
|
||||
v3d_optimize_nir(c, c->s);
|
||||
v3d_optimize_nir(c, c->s, false);
|
||||
NIR_PASS(_, c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
|
||||
|
||||
/* This must go before nir_lower_io */
|
||||
@@ -1607,7 +1607,7 @@ v3d_attempt_compile(struct v3d_compile *c)
|
||||
|
||||
NIR_PASS(_, c->s, v3d_nir_lower_subgroup_intrinsics, c);
|
||||
|
||||
v3d_optimize_nir(c, c->s);
|
||||
v3d_optimize_nir(c, c->s, false);
|
||||
|
||||
/* Do late algebraic optimization to turn add(a, neg(b)) back into
|
||||
* subs, then the mandatory cleanup after algebraic. Note that it may
|
||||
|
@@ -318,7 +318,12 @@ v3d_uncompiled_shader_create(struct pipe_context *pctx,
|
||||
|
||||
NIR_PASS(_, s, nir_lower_load_const_to_scalar);
|
||||
|
||||
v3d_optimize_nir(NULL, s);
|
||||
v3d_optimize_nir(NULL, s, true);
|
||||
|
||||
NIR_PASS(_, s, nir_lower_var_copies);
|
||||
|
||||
/* Get rid of split copies */
|
||||
v3d_optimize_nir(NULL, s, false);
|
||||
|
||||
NIR_PASS(_, s, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
|
||||
|
Reference in New Issue
Block a user