broadcom/compiler: call nir_opt_gcm with a custom strategy

nir_opt_gcm get us worse shader-db stats, but that is expected. But we
want to prevent to get worse values on spill/fills. Analyzing the
outcome with shader-db, this mostly happen with shaders that are
already complex, and are already spilling/filling.

So the best option here is adding a new strategy, that fall backs if
we get spill/fill using nir_opt_gcm.

It is not clear in which order we should disable gcm. For now we
disable it before loop unrolling.

We get a slight performance gain (in average) using nir_opt_gcm.

We don't show the shaderdb stats, as they are worse, but as mentioned,
this is expected.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17185>
This commit is contained in:
Alejandro Piñeiro
2022-10-18 14:25:14 +02:00
committed by Marge Bot
parent afc6de356a
commit 019529aa11
3 changed files with 42 additions and 20 deletions

View File

@@ -2187,6 +2187,12 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s, bool allow_copies)
NIR_PASS(progress, s, nir_opt_remove_phis); NIR_PASS(progress, s, nir_opt_remove_phis);
NIR_PASS(progress, s, nir_opt_if, false); NIR_PASS(progress, s, nir_opt_if, false);
NIR_PASS(progress, s, nir_opt_undef); NIR_PASS(progress, s, nir_opt_undef);
if (c && !c->disable_gcm) {
bool local_progress = false;
NIR_PASS(local_progress, s, nir_opt_gcm, false);
c->gcm_progress |= local_progress;
progress |= local_progress;
}
/* Note that vectorization may undo the load/store scalarization /* Note that vectorization may undo the load/store scalarization
* pass we run for non 32-bit TMU general load/store by * pass we run for non 32-bit TMU general load/store by

View File

@@ -721,6 +721,14 @@ struct v3d_compile {
bool disable_loop_unrolling; bool disable_loop_unrolling;
bool unrolled_any_loops; bool unrolled_any_loops;
/* Disables nir_opt_gcm to reduce register pressure. */
bool disable_gcm;
/* If calling nir_opt_gcm made any progress. Used to skip new rebuilds
* if possible
*/
bool gcm_progress;
/* Disables scheduling of general TMU loads (and unfiltered image load). /* Disables scheduling of general TMU loads (and unfiltered image load).
*/ */
bool disable_general_tmu_sched; bool disable_general_tmu_sched;

View File

@@ -546,6 +546,7 @@ struct v3d_compiler_strategy {
uint32_t max_threads; uint32_t max_threads;
uint32_t min_threads; uint32_t min_threads;
bool disable_general_tmu_sched; bool disable_general_tmu_sched;
bool disable_gcm;
bool disable_loop_unrolling; bool disable_loop_unrolling;
bool disable_ubo_load_sorting; bool disable_ubo_load_sorting;
bool disable_tmu_pipelining; bool disable_tmu_pipelining;
@@ -582,6 +583,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
c->disable_general_tmu_sched = strategy->disable_general_tmu_sched; c->disable_general_tmu_sched = strategy->disable_general_tmu_sched;
c->disable_tmu_pipelining = strategy->disable_tmu_pipelining; c->disable_tmu_pipelining = strategy->disable_tmu_pipelining;
c->disable_constant_ubo_load_sorting = strategy->disable_ubo_load_sorting; c->disable_constant_ubo_load_sorting = strategy->disable_ubo_load_sorting;
c->disable_gcm = strategy->disable_gcm;
c->disable_loop_unrolling = V3D_DBG(NO_LOOP_UNROLL) c->disable_loop_unrolling = V3D_DBG(NO_LOOP_UNROLL)
? true : strategy->disable_loop_unrolling; ? true : strategy->disable_loop_unrolling;
@@ -1723,17 +1725,19 @@ int v3d_shaderdb_dump(struct v3d_compile *c,
* because v3d_nir_to_vir will cap this to the actual minimum. * because v3d_nir_to_vir will cap this to the actual minimum.
*/ */
static const struct v3d_compiler_strategy strategies[] = { static const struct v3d_compiler_strategy strategies[] = {
/*0*/ { "default", 4, 4, false, false, false, false, 0 }, /*0*/ { "default", 4, 4, false, false, false, false, false, 0 },
/*1*/ { "disable general TMU sched", 4, 4, true, false, false, false, 0 }, /*1*/ { "disable general TMU sched", 4, 4, true, false, false, false, false, 0 },
/*2*/ { "disable loop unrolling", 4, 4, true, true, false, false, 0 }, /*2*/ { "disable gcm", 4, 4, true, true, false, false, false, 0 },
/*3*/ { "disable UBO load sorting", 4, 4, true, true, true, false, 0 }, /*3*/ { "disable loop unrolling", 4, 4, true, true, true, false, false, 0 },
/*4*/ { "disable TMU pipelining", 4, 4, true, true, true, true, 0 }, /*4*/ { "disable UBO load sorting", 4, 4, true, true, true, true, false, 0 },
/*5*/ { "lower thread count", 2, 1, false, false, false, false, -1 }, /*5*/ { "disable TMU pipelining", 4, 4, true, true, true, true, true, 0 },
/*6*/ { "disable general TMU sched (2t)", 2, 1, true, false, false, false, -1 }, /*6*/ { "lower thread count", 2, 1, false, false, false, false, false, -1 },
/*7*/ { "disable loop unrolling (2t)", 2, 1, true, true, false, false, -1 }, /*7*/ { "disable general TMU sched (2t)", 2, 1, true, false, false, false, false, -1 },
/*8*/ { "disable UBO load sorting (2t)", 2, 1, true, true, true, false, -1 }, /*8*/ { "disable gcm (2t)", 2, 1, true, true, false, false, false, -1 },
/*9*/ { "disable TMU pipelining (2t)", 2, 1, true, true, true, true, -1 }, /*9*/ { "disable loop unrolling (2t)", 2, 1, true, true, true, false, false, -1 },
/*10*/ { "fallback scheduler", 2, 1, true, true, true, true, -1 } /*10*/ { "disable UBO load sorting (2t)", 2, 1, true, true, true, true, false, -1 },
/*11*/ { "disable TMU pipelining (2t)", 2, 1, true, true, true, true, true, -1 },
/*12*/ { "fallback scheduler", 2, 1, true, true, true, true, true, -1 }
}; };
/** /**
@@ -1762,22 +1766,26 @@ skip_compile_strategy(struct v3d_compile *c, uint32_t idx)
switch (idx) { switch (idx) {
/* General TMU sched.: skip if we didn't emit any TMU loads */ /* General TMU sched.: skip if we didn't emit any TMU loads */
case 1: case 1:
case 6:
return !c->has_general_tmu_load;
/* Loop unrolling: skip if we didn't unroll any loops */
case 2:
case 7: case 7:
return !c->has_general_tmu_load;
/* Global code motion: skip if nir_opt_gcm didn't make any progress */
case 2:
case 8:
return !c->gcm_progress;
/* Loop unrolling: skip if we didn't unroll any loops */
case 3:
case 9:
return !c->unrolled_any_loops; return !c->unrolled_any_loops;
/* UBO load sorting: skip if we didn't sort any loads */ /* UBO load sorting: skip if we didn't sort any loads */
case 3: case 4:
case 8: case 10:
return !c->sorted_any_ubo_loads; return !c->sorted_any_ubo_loads;
/* TMU pipelining: skip if we didn't pipeline any TMU ops */ /* TMU pipelining: skip if we didn't pipeline any TMU ops */
case 4: case 5:
case 9: case 11:
return !c->pipelined_any_tmu; return !c->pipelined_any_tmu;
/* Lower thread count: skip if we already tried less that 4 threads */ /* Lower thread count: skip if we already tried less that 4 threads */
case 5: case 6:
return c->threads < 4; return c->threads < 4;
default: default:
return false; return false;