broadcom/compiler: add a compiler strategy to disable loop unrolling
Loop unrolling can increase register pressure significantly, leading to lower thread counts and spilling. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10647>
This commit is contained in:
@@ -1774,7 +1774,7 @@ mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
|
||||
}
|
||||
|
||||
void
|
||||
v3d_optimize_nir(struct nir_shader *s)
|
||||
v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
|
||||
{
|
||||
bool progress;
|
||||
unsigned lower_flrp =
|
||||
@@ -1826,7 +1826,8 @@ v3d_optimize_nir(struct nir_shader *s)
|
||||
NIR_PASS(progress, s, nir_opt_undef);
|
||||
NIR_PASS(progress, s, nir_lower_undef_to_zero);
|
||||
|
||||
if (s->options->max_unroll_iterations > 0) {
|
||||
if (c && !c->disable_loop_unrolling &&
|
||||
s->options->max_unroll_iterations > 0) {
|
||||
NIR_PASS(progress, s, nir_opt_loop_unroll,
|
||||
nir_var_shader_in |
|
||||
nir_var_shader_out |
|
||||
|
@@ -660,6 +660,9 @@ struct v3d_compile {
|
||||
*/
|
||||
bool disable_ldunif_opt;
|
||||
|
||||
/* Disables loop unrolling to reduce register pressure. */
|
||||
bool disable_loop_unrolling;
|
||||
|
||||
/* Minimum number of threads we are willing to use to register allocate
|
||||
* a shader with the current compilation strategy. This only prevents
|
||||
* us from lowering the thread count to register allocate successfully,
|
||||
@@ -939,7 +942,7 @@ vir_has_uniform(struct qinst *inst)
|
||||
|
||||
const struct v3d_compiler *v3d_compiler_init(const struct v3d_device_info *devinfo);
|
||||
void v3d_compiler_free(const struct v3d_compiler *compiler);
|
||||
void v3d_optimize_nir(struct nir_shader *s);
|
||||
void v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s);
|
||||
|
||||
uint64_t *v3d_compile(const struct v3d_compiler *compiler,
|
||||
struct v3d_key *key,
|
||||
|
@@ -526,6 +526,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
|
||||
void *debug_output_data,
|
||||
int program_id, int variant_id,
|
||||
uint32_t min_threads_for_reg_alloc,
|
||||
bool disable_loop_unrolling,
|
||||
bool disable_constant_ubo_load_sorting,
|
||||
bool disable_tmu_pipelining,
|
||||
bool fallback_scheduler)
|
||||
@@ -545,6 +546,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
|
||||
c->fallback_scheduler = fallback_scheduler;
|
||||
c->disable_tmu_pipelining = disable_tmu_pipelining;
|
||||
c->disable_constant_ubo_load_sorting = disable_constant_ubo_load_sorting;
|
||||
c->disable_loop_unrolling = disable_loop_unrolling;
|
||||
|
||||
s = nir_shader_clone(c, s);
|
||||
c->s = s;
|
||||
@@ -867,7 +869,7 @@ v3d_nir_lower_vs_early(struct v3d_compile *c)
|
||||
NIR_PASS_V(c->s, nir_remove_unused_io_vars,
|
||||
nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
|
||||
NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
|
||||
v3d_optimize_nir(c->s);
|
||||
v3d_optimize_nir(c, c->s);
|
||||
NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
|
||||
|
||||
/* This must go before nir_lower_io */
|
||||
@@ -901,7 +903,7 @@ v3d_nir_lower_gs_early(struct v3d_compile *c)
|
||||
NIR_PASS_V(c->s, nir_remove_unused_io_vars,
|
||||
nir_var_shader_out, used_outputs, NULL); /* demotes to globals */
|
||||
NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
|
||||
v3d_optimize_nir(c->s);
|
||||
v3d_optimize_nir(c, c->s);
|
||||
NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in, NULL);
|
||||
|
||||
/* This must go before nir_lower_io */
|
||||
@@ -1417,7 +1419,7 @@ v3d_attempt_compile(struct v3d_compile *c)
|
||||
|
||||
NIR_PASS_V(c->s, nir_lower_wrmasks, should_split_wrmask, c->s);
|
||||
|
||||
v3d_optimize_nir(c->s);
|
||||
v3d_optimize_nir(c, c->s);
|
||||
|
||||
/* Do late algebraic optimization to turn add(a, neg(b)) back into
|
||||
* subs, then the mandatory cleanup after algebraic. Note that it may
|
||||
@@ -1537,6 +1539,7 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
|
||||
uint32_t min_threads_for_reg_alloc;
|
||||
} static const strategies[] = {
|
||||
{ "default", 4 },
|
||||
{ "disable loop unrolling", 4 },
|
||||
{ "disable UBO load sorting", 1 },
|
||||
{ "disable TMU pipelining", 1 },
|
||||
{ "fallback scheduler", 1 }
|
||||
@@ -1547,9 +1550,10 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
|
||||
debug_output, debug_output_data,
|
||||
program_id, variant_id,
|
||||
strategies[i].min_threads_for_reg_alloc,
|
||||
i > 0, /* Disable UBO load sorting */
|
||||
i > 1, /* Disable TMU pipelining */
|
||||
i > 2 /* Fallback_scheduler */);
|
||||
i > 0, /* Disable loop unrolling */
|
||||
i > 1, /* Disable UBO load sorting */
|
||||
i > 2, /* Disable TMU pipelining */
|
||||
i > 3 /* Fallback_scheduler */);
|
||||
|
||||
v3d_attempt_compile(c);
|
||||
|
||||
|
@@ -318,7 +318,7 @@ v3d_uncompiled_shader_create(struct pipe_context *pctx,
|
||||
|
||||
NIR_PASS_V(s, nir_lower_load_const_to_scalar);
|
||||
|
||||
v3d_optimize_nir(s);
|
||||
v3d_optimize_nir(NULL, s);
|
||||
|
||||
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
|
||||
|
Reference in New Issue
Block a user