broadcom/compiler: add a compiler strategy to disable loop unrolling

Loop unrolling can increase register pressure significantly, leading to
lower thread counts and spilling.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10647>
This commit is contained in:
Iago Toral Quiroga
2021-05-03 10:14:12 +02:00
parent 4742300e6b
commit 296fe4daa6
4 changed files with 18 additions and 10 deletions

View File

@@ -1774,7 +1774,7 @@ mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
} }
void void
v3d_optimize_nir(struct nir_shader *s) v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
{ {
bool progress; bool progress;
unsigned lower_flrp = unsigned lower_flrp =
@@ -1826,7 +1826,8 @@ v3d_optimize_nir(struct nir_shader *s)
NIR_PASS(progress, s, nir_opt_undef); NIR_PASS(progress, s, nir_opt_undef);
NIR_PASS(progress, s, nir_lower_undef_to_zero); NIR_PASS(progress, s, nir_lower_undef_to_zero);
if (s->options->max_unroll_iterations > 0) { if (c && !c->disable_loop_unrolling &&
s->options->max_unroll_iterations > 0) {
NIR_PASS(progress, s, nir_opt_loop_unroll, NIR_PASS(progress, s, nir_opt_loop_unroll,
nir_var_shader_in | nir_var_shader_in |
nir_var_shader_out | nir_var_shader_out |

View File

@@ -660,6 +660,9 @@ struct v3d_compile {
*/ */
bool disable_ldunif_opt; bool disable_ldunif_opt;
/* Disables loop unrolling to reduce register pressure. */
bool disable_loop_unrolling;
/* Minimum number of threads we are willing to use to register allocate /* Minimum number of threads we are willing to use to register allocate
* a shader with the current compilation strategy. This only prevents * a shader with the current compilation strategy. This only prevents
* us from lowering the thread count to register allocate successfully, * us from lowering the thread count to register allocate successfully,
@@ -939,7 +942,7 @@ vir_has_uniform(struct qinst *inst)
const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo); const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo);
void v3d_compiler_free(const struct v3d_compiler *compiler); void v3d_compiler_free(const struct v3d_compiler *compiler);
void v3d_optimize_nir(struct nir_shader *s); void v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s);
uint64_t *v3d_compile(const struct v3d_compiler *compiler, uint64_t *v3d_compile(const struct v3d_compiler *compiler,
struct v3d_key *key, struct v3d_key *key,

View File

@@ -526,6 +526,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
void *debug_output_data, void *debug_output_data,
int program_id, int variant_id, int program_id, int variant_id,
uint32_t min_threads_for_reg_alloc, uint32_t min_threads_for_reg_alloc,
bool disable_loop_unrolling,
bool disable_constant_ubo_load_sorting, bool disable_constant_ubo_load_sorting,
bool disable_tmu_pipelining, bool disable_tmu_pipelining,
bool fallback_scheduler) bool fallback_scheduler)
@@ -545,6 +546,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
c->fallback_scheduler = fallback_scheduler; c->fallback_scheduler = fallback_scheduler;
c->disable_tmu_pipelining = disable_tmu_pipelining; c->disable_tmu_pipelining = disable_tmu_pipelining;
c->disable_constant_ubo_load_sorting = disable_constant_ubo_load_sorting; c->disable_constant_ubo_load_sorting = disable_constant_ubo_load_sorting;
c->disable_loop_unrolling = disable_loop_unrolling;
s = nir_shader_clone(c, s); s = nir_shader_clone(c, s);
c->s = s; c->s = s;
@@ -867,7 +869,7 @@ v3d_nir_lower_vs_early(struct v3d_compile *c)
NIR_PASS_V(c->s, nir_remove_unused_io_vars, NIR_PASS_V(c->s, nir_remove_unused_io_vars,
nir_var_shader_out, used_outputs, NULL); /* demotes to globals */ nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
NIR_PASS_V(c->s, nir_lower_global_vars_to_local); NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
v3d_optimize_nir(c->s); v3d_optimize_nir(c, c->s);
NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL); NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
/* This must go before nir_lower_io */ /* This must go before nir_lower_io */
@@ -901,7 +903,7 @@ v3d_nir_lower_gs_early(struct v3d_compile *c)
NIR_PASS_V(c->s, nir_remove_unused_io_vars, NIR_PASS_V(c->s, nir_remove_unused_io_vars,
nir_var_shader_out, used_outputs, NULL); /* demotes to globals */ nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
NIR_PASS_V(c->s, nir_lower_global_vars_to_local); NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
v3d_optimize_nir(c->s); v3d_optimize_nir(c, c->s);
NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL); NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
/* This must go before nir_lower_io */ /* This must go before nir_lower_io */
@@ -1417,7 +1419,7 @@ v3d_attempt_compile(struct v3d_compile *c)
NIR_PASS_V(c->s, nir_lower_wrmasks, should_split_wrmask, c->s); NIR_PASS_V(c->s, nir_lower_wrmasks, should_split_wrmask, c->s);
v3d_optimize_nir(c->s); v3d_optimize_nir(c, c->s);
/* Do late algebraic optimization to turn add(a, neg(b)) back into /* Do late algebraic optimization to turn add(a, neg(b)) back into
* subs, then the mandatory cleanup after algebraic. Note that it may * subs, then the mandatory cleanup after algebraic. Note that it may
@@ -1537,6 +1539,7 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
uint32_t min_threads_for_reg_alloc; uint32_t min_threads_for_reg_alloc;
} static const strategies[] = { } static const strategies[] = {
{ "default", 4 }, { "default", 4 },
{ "disable loop unrolling", 4 },
{ "disable UBO load sorting", 1 }, { "disable UBO load sorting", 1 },
{ "disable TMU pipelining", 1 }, { "disable TMU pipelining", 1 },
{ "fallback scheduler", 1 } { "fallback scheduler", 1 }
@@ -1547,9 +1550,10 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
debug_output, debug_output_data, debug_output, debug_output_data,
program_id, variant_id, program_id, variant_id,
strategies[i].min_threads_for_reg_alloc, strategies[i].min_threads_for_reg_alloc,
i > 0, /* Disable UBO load sorting */ i > 0, /* Disable loop unrolling */
i > 1, /* Disable TMU pipelining */ i > 1, /* Disable UBO load sorting */
i > 2 /* Fallback_scheduler */); i > 2, /* Disable TMU pipelining */
i > 3 /* Fallback_scheduler */);
v3d_attempt_compile(c); v3d_attempt_compile(c);

View File

@@ -318,7 +318,7 @@ v3d_uncompiled_shader_create(struct pipe_context *pctx,
NIR_PASS_V(s, nir_lower_load_const_to_scalar); NIR_PASS_V(s, nir_lower_load_const_to_scalar);
v3d_optimize_nir(s); v3d_optimize_nir(NULL, s);
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL); NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);