nir: add heuristic for instructions in loops with GCM
Moving instructions out of large loops tends to cause excessive spilling. This appears to be a good limit. In future it might make sense to make this a NIR options so other drivers can set their own limits. Tiger Lake total instructions in shared programs: 20930180 -> 20926952 (-0.02%) instructions in affected programs: 280768 -> 277540 (-1.15%) helped: 734 HURT: 192 helped stats (abs) min: 1 max: 61 x̄: 5.16 x̃: 4 helped stats (rel) min: 0.04% max: 10.64% x̄: 3.23% x̃: 3.14% HURT stats (abs) min: 1 max: 52 x̄: 2.90 x̃: 1 HURT stats (rel) min: 0.03% max: 9.76% x̄: 1.13% x̃: 0.61% 95% mean confidence interval for instructions value: -3.89 -3.08 95% mean confidence interval for instructions %-change: -2.49% -2.16% Instructions are helped. total cycles in shared programs: 841825217 -> 838817552 (-0.36%) cycles in affected programs: 122088078 -> 119080413 (-2.46%) helped: 941 HURT: 100 helped stats (abs) min: 1 max: 160080 x̄: 3274.31 x̃: 2660 helped stats (rel) min: <.01% max: 41.64% x̄: 5.50% x̃: 4.80% HURT stats (abs) min: 1 max: 41856 x̄: 734.62 x̃: 26 HURT stats (rel) min: <.01% max: 7.29% x̄: 0.44% x̃: 0.27% 95% mean confidence interval for cycles value: -3236.56 -2541.85 95% mean confidence interval for cycles %-change: -5.26% -4.60% Cycles are helped. total sends in shared programs: 977905 -> 977782 (-0.01%) sends in affected programs: 2279 -> 2156 (-5.40%) helped: 119 HURT: 0 helped stats (abs) min: 1 max: 4 x̄: 1.03 x̃: 1 helped stats (rel) min: 0.60% max: 14.29% x̄: 6.93% x̃: 6.67% 95% mean confidence interval for sends value: -1.09 -0.98 95% mean confidence interval for sends %-change: -7.42% -6.45% Sends are helped. LOST: 2 GAINED: 0 Ice Lake total instructions in shared programs: 19865361 -> 19861747 (-0.02%) instructions in affected programs: 185789 -> 182175 (-1.95%) helped: 593 HURT: 47 helped stats (abs) min: 1 max: 27 x̄: 6.17 x̃: 4 helped stats (rel) min: 0.19% max: 8.65% x̄: 4.53% x̃: 4.60% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.03% max: 0.23% x̄: 0.11% x̃: 0.04% 95% mean confidence interval for instructions value: -5.93 -5.37 95% mean confidence interval for instructions %-change: -4.32% -4.06% Instructions are helped. total loops in shared programs: 6120 -> 6117 (-0.05%) loops in affected programs: 6 -> 3 (-50.00%) helped: 3 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00% total cycles in shared programs: 961777176 -> 959404350 (-0.25%) cycles in affected programs: 172224180 -> 169851354 (-1.38%) helped: 936 HURT: 80 helped stats (abs) min: 1 max: 9566 x̄: 2621.08 x̃: 2550 helped stats (rel) min: <.01% max: 41.77% x̄: 4.22% x̃: 3.84% HURT stats (abs) min: 1 max: 59146 x̄: 1006.34 x̃: 24 HURT stats (rel) min: <.01% max: 3.78% x̄: 0.44% x̃: 0.25% 95% mean confidence interval for cycles value: -2513.72 -2157.20 95% mean confidence interval for cycles %-change: -4.13% -3.57% Cycles are helped. total sends in shared programs: 1019995 -> 1019872 (-0.01%) sends in affected programs: 2283 -> 2160 (-5.39%) helped: 119 HURT: 0 helped stats (abs) min: 1 max: 4 x̄: 1.03 x̃: 1 helped stats (rel) min: 0.60% max: 14.29% x̄: 6.91% x̃: 6.67% 95% mean confidence interval for sends value: -1.09 -0.98 95% mean confidence interval for sends %-change: -7.39% -6.42% Sends are helped. LOST: 4 GAINED: 0 Skylake total instructions in shared programs: 17994337 -> 17993846 (<.01%) instructions in affected programs: 146294 -> 145803 (-0.34%) helped: 190 HURT: 47 helped stats (abs) min: 1 max: 12 x̄: 2.83 x̃: 3 helped stats (rel) min: 0.14% max: 4.29% x̄: 1.08% x̃: 0.90% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.03% max: 0.22% x̄: 0.11% x̃: 0.04% 95% mean confidence interval for instructions value: -2.30 -1.84 95% mean confidence interval for instructions %-change: -0.95% -0.74% Instructions are helped. total loops in shared programs: 6029 -> 6023 (-0.10%) loops in affected programs: 12 -> 6 (-50.00%) helped: 6 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00% 95% mean confidence interval for loops value: -1.00 -1.00 95% mean confidence interval for loops %-change: -50.00% -50.00% Loops are helped. total cycles in shared programs: 939062940 -> 938023548 (-0.11%) cycles in affected programs: 169671482 -> 168632090 (-0.61%) helped: 980 HURT: 134 helped stats (abs) min: 1 max: 25000 x̄: 1075.57 x̃: 1052 helped stats (rel) min: <.01% max: 42.75% x̄: 2.51% x̃: 1.32% HURT stats (abs) min: 1 max: 837 x̄: 109.45 x̃: 20 HURT stats (rel) min: <.01% max: 5.71% x̄: 0.73% x̃: 0.21% 95% mean confidence interval for cycles value: -1005.89 -860.17 95% mean confidence interval for cycles %-change: -2.39% -1.84% Cycles are helped. total sends in shared programs: 1026848 -> 1026724 (-0.01%) sends in affected programs: 2302 -> 2178 (-5.39%) helped: 120 HURT: 0 helped stats (abs) min: 1 max: 4 x̄: 1.03 x̃: 1 helped stats (rel) min: 0.60% max: 14.29% x̄: 6.91% x̃: 6.67% 95% mean confidence interval for sends value: -1.09 -0.98 95% mean confidence interval for sends %-change: -7.40% -6.43% Sends are helped. LOST: 1 GAINED: 1 Broadwell total instructions in shared programs: 17605621 -> 17605154 (<.01%) instructions in affected programs: 145691 -> 145224 (-0.32%) helped: 184 HURT: 48 helped stats (abs) min: 1 max: 12 x̄: 2.83 x̃: 3 helped stats (rel) min: 0.13% max: 4.29% x̄: 1.09% x̃: 0.93% HURT stats (abs) min: 1 max: 7 x̄: 1.12 x̃: 1 HURT stats (rel) min: 0.03% max: 0.48% x̄: 0.12% x̃: 0.04% 95% mean confidence interval for instructions value: -2.26 -1.77 95% mean confidence interval for instructions %-change: -0.95% -0.73% Instructions are helped. total loops in shared programs: 5968 -> 5963 (-0.08%) loops in affected programs: 10 -> 5 (-50.00%) helped: 5 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00% 95% mean confidence interval for loops value: -1.00 -1.00 95% mean confidence interval for loops %-change: -50.00% -50.00% Loops are helped. total cycles in shared programs: 1000679489 -> 998592756 (-0.21%) cycles in affected programs: 173421234 -> 171334501 (-1.20%) helped: 993 HURT: 153 helped stats (abs) min: 1 max: 766608 x̄: 2118.49 x̃: 1080 helped stats (rel) min: <.01% max: 54.61% x̄: 2.61% x̃: 1.73% HURT stats (abs) min: 1 max: 2200 x̄: 110.61 x̃: 11 HURT stats (rel) min: <.01% max: 5.68% x̄: 0.63% x̃: 0.06% 95% mean confidence interval for cycles value: -3191.23 -450.54 95% mean confidence interval for cycles %-change: -2.47% -1.89% Cycles are helped. total sends in shared programs: 996341 -> 996222 (-0.01%) sends in affected programs: 2151 -> 2032 (-5.53%) helped: 115 HURT: 0 helped stats (abs) min: 1 max: 4 x̄: 1.03 x̃: 1 helped stats (rel) min: 0.60% max: 14.29% x̄: 7.07% x̃: 6.67% 95% mean confidence interval for sends value: -1.09 -0.98 95% mean confidence interval for sends %-change: -7.55% -6.58% Sends are helped. Haswell total instructions in shared programs: 16038375 -> 16038121 (<.01%) instructions in affected programs: 216797 -> 216543 (-0.12%) helped: 185 HURT: 217 helped stats (abs) min: 1 max: 12 x̄: 2.84 x̃: 3 helped stats (rel) min: 0.13% max: 4.23% x̄: 1.30% x̃: 1.20% HURT stats (abs) min: 1 max: 6 x̄: 1.25 x̃: 1 HURT stats (rel) min: 0.03% max: 5.66% x̄: 0.61% x̃: 0.40% 95% mean confidence interval for instructions value: -0.85 -0.41 95% mean confidence interval for instructions %-change: -0.40% -0.14% Instructions are helped. total loops in shared programs: 5947 -> 5942 (-0.08%) loops in affected programs: 10 -> 5 (-50.00%) helped: 5 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00% 95% mean confidence interval for loops value: -1.00 -1.00 95% mean confidence interval for loops %-change: -50.00% -50.00% Loops are helped. total cycles in shared programs: 967655093 -> 965746713 (-0.20%) cycles in affected programs: 197288924 -> 195380544 (-0.97%) helped: 950 HURT: 195 helped stats (abs) min: 1 max: 782820 x̄: 2274.79 x̃: 1260 helped stats (rel) min: <.01% max: 54.26% x̄: 3.02% x̃: 1.71% HURT stats (abs) min: 1 max: 15790 x̄: 1295.73 x̃: 21 HURT stats (rel) min: <.01% max: 119.85% x̄: 7.76% x̃: 0.11% 95% mean confidence interval for cycles value: -3014.22 -319.19 95% mean confidence interval for cycles %-change: -1.83% -0.55% Cycles are helped. total sends in shared programs: 934894 -> 934765 (-0.01%) sends in affected programs: 2192 -> 2063 (-5.89%) helped: 115 HURT: 2 helped stats (abs) min: 1 max: 4 x̄: 1.14 x̃: 1 helped stats (rel) min: 0.60% max: 28.57% x̄: 7.68% x̃: 6.67% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 16.67% max: 16.67% x̄: 16.67% x̃: 16.67% 95% mean confidence interval for sends value: -1.23 -0.98 95% mean confidence interval for sends %-change: -8.28% -6.24% Sends are helped. LOST: 1 GAINED: 18 Ivy Bridge total instructions in shared programs: 15269357 -> 15269398 (<.01%) instructions in affected programs: 190484 -> 190525 (0.02%) helped: 77 HURT: 206 helped stats (abs) min: 1 max: 6 x̄: 2.47 x̃: 3 helped stats (rel) min: 0.14% max: 5.31% x̄: 1.46% x̃: 1.65% HURT stats (abs) min: 1 max: 3 x̄: 1.12 x̃: 1 HURT stats (rel) min: 0.03% max: 2.38% x̄: 0.42% x̃: 0.40% 95% mean confidence interval for instructions value: -0.06 0.35 95% mean confidence interval for instructions %-change: -0.21% 0.03% Inconclusive result (value mean confidence interval includes 0). total loops in shared programs: 4001 -> 3996 (-0.12%) loops in affected programs: 10 -> 5 (-50.00%) helped: 5 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00% 95% mean confidence interval for loops value: -1.00 -1.00 95% mean confidence interval for loops %-change: -50.00% -50.00% Loops are helped. total cycles in shared programs: 562045564 -> 561063543 (-0.17%) cycles in affected programs: 200924872 -> 199942851 (-0.49%) helped: 748 HURT: 160 helped stats (abs) min: 2 max: 14926 x̄: 1692.94 x̃: 1620 helped stats (rel) min: <.01% max: 53.29% x̄: 3.17% x̃: 1.87% HURT stats (abs) min: 2 max: 15726 x̄: 1776.86 x̃: 36 HURT stats (rel) min: <.01% max: 114.43% x̄: 10.66% x̃: 0.21% 95% mean confidence interval for cycles value: -1237.33 -925.71 95% mean confidence interval for cycles %-change: -1.54% 0.08% Inconclusive result (%-change mean confidence interval includes 0). total sends in shared programs: 893348 -> 893330 (<.01%) sends in affected programs: 187 -> 169 (-9.63%) helped: 14 HURT: 0 helped stats (abs) min: 1 max: 2 x̄: 1.29 x̃: 1 helped stats (rel) min: 4.08% max: 22.22% x̄: 11.70% x̃: 10.10% 95% mean confidence interval for sends value: -1.56 -1.02 95% mean confidence interval for sends %-change: -14.92% -8.48% Sends are helped. LOST: 1 GAINED: 19 Sandy Bridge total instructions in shared programs: 11785227 -> 11785774 (<.01%) instructions in affected programs: 78403 -> 78950 (0.70%) helped: 65 HURT: 505 helped stats (abs) min: 1 max: 4 x̄: 2.22 x̃: 3 helped stats (rel) min: 0.14% max: 4.17% x̄: 1.19% x̃: 1.38% HURT stats (abs) min: 1 max: 5 x̄: 1.37 x̃: 1 HURT stats (rel) min: 0.24% max: 3.33% x̄: 1.57% x̃: 1.72% 95% mean confidence interval for instructions value: 0.85 1.07 95% mean confidence interval for instructions %-change: 1.16% 1.36% Instructions are HURT. total loops in shared programs: 2441 -> 2437 (-0.16%) loops in affected programs: 8 -> 4 (-50.00%) helped: 4 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00% 95% mean confidence interval for loops value: -1.00 -1.00 95% mean confidence interval for loops %-change: -50.00% -50.00% Loops are helped. total cycles in shared programs: 497178796 -> 496669298 (-0.10%) cycles in affected programs: 51483322 -> 50973824 (-0.99%) helped: 476 HURT: 137 helped stats (abs) min: 2 max: 7502 x̄: 1079.36 x̃: 1260 helped stats (rel) min: <.01% max: 42.50% x̄: 2.31% x̃: 0.86% HURT stats (abs) min: 2 max: 754 x̄: 31.23 x̃: 18 HURT stats (rel) min: <.01% max: 3.01% x̄: 0.09% x̃: 0.02% 95% mean confidence interval for cycles value: -901.99 -760.32 95% mean confidence interval for cycles %-change: -2.20% -1.36% Cycles are helped. total sends in shared programs: 642919 -> 642915 (<.01%) sends in affected programs: 32 -> 28 (-12.50%) helped: 4 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 11.11% max: 14.29% x̄: 12.70% x̃: 12.70% 95% mean confidence interval for sends value: -1.00 -1.00 95% mean confidence interval for sends %-change: -15.61% -9.78% Sends are helped. Iron Lake total instructions in shared programs: 8180061 -> 8180248 (<.01%) instructions in affected programs: 65004 -> 65191 (0.29%) helped: 59 HURT: 253 helped stats (abs) min: 1 max: 4 x̄: 2.24 x̃: 3 helped stats (rel) min: 0.16% max: 2.23% x̄: 1.04% x̃: 1.29% HURT stats (abs) min: 1 max: 5 x̄: 1.26 x̃: 1 HURT stats (rel) min: 0.21% max: 3.85% x̄: 0.93% x̃: 0.60% 95% mean confidence interval for instructions value: 0.43 0.77 95% mean confidence interval for instructions %-change: 0.45% 0.68% Instructions are HURT. total loops in shared programs: 863 -> 861 (-0.23%) loops in affected programs: 4 -> 2 (-50.00%) helped: 2 HURT: 0 total cycles in shared programs: 239357490 -> 238907668 (-0.19%) cycles in affected programs: 17314006 -> 16864184 (-2.60%) helped: 176 HURT: 34 helped stats (abs) min: 4 max: 13400 x̄: 2558.05 x̃: 2920 helped stats (rel) min: 0.01% max: 35.58% x̄: 3.76% x̃: 2.69% HURT stats (abs) min: 2 max: 14 x̄: 11.59 x̃: 14 HURT stats (rel) min: <.01% max: 0.06% x̄: 0.03% x̃: 0.03% 95% mean confidence interval for cycles value: -2440.68 -1843.34 95% mean confidence interval for cycles %-change: -3.78% -2.51% Cycles are helped. GM45 total instructions in shared programs: 4985293 -> 4985401 (<.01%) instructions in affected programs: 58807 -> 58915 (0.18%) helped: 57 HURT: 202 helped stats (abs) min: 1 max: 4 x̄: 2.26 x̃: 3 helped stats (rel) min: 0.15% max: 2.23% x̄: 1.06% x̃: 1.29% HURT stats (abs) min: 1 max: 5 x̄: 1.17 x̃: 1 HURT stats (rel) min: 0.21% max: 3.85% x̄: 0.76% x̃: 0.48% 95% mean confidence interval for instructions value: 0.22 0.61 95% mean confidence interval for instructions %-change: 0.24% 0.48% Instructions are HURT. total loops in shared programs: 639 -> 638 (-0.16%) loops in affected programs: 2 -> 1 (-50.00%) helped: 1 HURT: 0 total cycles in shared programs: 153794236 -> 153546274 (-0.16%) cycles in affected programs: 9947778 -> 9699816 (-2.49%) helped: 110 HURT: 31 helped stats (abs) min: 4 max: 13400 x̄: 2257.51 x̃: 1796 helped stats (rel) min: 0.01% max: 35.58% x̄: 4.33% x̃: 2.45% HURT stats (abs) min: 2 max: 14 x̄: 11.74 x̃: 14 HURT stats (rel) min: <.01% max: 0.06% x̄: 0.03% x̃: 0.03% 95% mean confidence interval for cycles value: -2113.77 -1403.42 95% mean confidence interval for cycles %-change: -4.27% -2.47% Cycles are helped. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2899 Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12064>
This commit is contained in:

committed by
Marge Bot

parent
a7f2e683de
commit
6538b3e566
@@ -37,10 +37,23 @@
|
||||
* verify correcness.
|
||||
*/
|
||||
|
||||
/* This is used to stop GCM moving instruction out of a loop if the loop
|
||||
* contains too many instructions and moving them would create excess spilling.
|
||||
*
|
||||
* TODO: Figure out a better way to decide if we should remove instructions from
|
||||
* a loop.
|
||||
*/
|
||||
#define MAX_LOOP_INSTRUCTIONS 100
|
||||
|
||||
struct gcm_block_info {
|
||||
/* Number of loops this block is inside */
|
||||
unsigned loop_depth;
|
||||
|
||||
unsigned loop_instr_count;
|
||||
|
||||
/* The loop the block is nested inside or NULL */
|
||||
nir_loop *loop;
|
||||
|
||||
/* The last instruction inserted into this block. This is used as we
|
||||
* traverse the instructions and insert them back into the program to
|
||||
* put them in the right order.
|
||||
@@ -80,27 +93,63 @@ struct gcm_state {
|
||||
struct gcm_instr_info *instr_infos;
|
||||
};
|
||||
|
||||
static unsigned
|
||||
get_loop_instr_count(struct exec_list *cf_list)
|
||||
{
|
||||
unsigned loop_instr_count = 0;
|
||||
foreach_list_typed(nir_cf_node, node, node, cf_list) {
|
||||
switch (node->type) {
|
||||
case nir_cf_node_block: {
|
||||
nir_block *block = nir_cf_node_as_block(node);
|
||||
nir_foreach_instr(instr, block) {
|
||||
loop_instr_count++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_cf_node_if: {
|
||||
nir_if *if_stmt = nir_cf_node_as_if(node);
|
||||
loop_instr_count += get_loop_instr_count(&if_stmt->then_list);
|
||||
loop_instr_count += get_loop_instr_count(&if_stmt->else_list);
|
||||
break;
|
||||
}
|
||||
case nir_cf_node_loop: {
|
||||
nir_loop *loop = nir_cf_node_as_loop(node);
|
||||
loop_instr_count += get_loop_instr_count(&loop->body);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("Invalid CF node type");
|
||||
}
|
||||
}
|
||||
|
||||
return loop_instr_count;
|
||||
}
|
||||
|
||||
/* Recursively walks the CFG and builds the block_info structure */
|
||||
static void
|
||||
gcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state,
|
||||
unsigned loop_depth)
|
||||
nir_loop *loop, unsigned loop_depth,
|
||||
unsigned loop_instr_count)
|
||||
{
|
||||
foreach_list_typed(nir_cf_node, node, node, cf_list) {
|
||||
switch (node->type) {
|
||||
case nir_cf_node_block: {
|
||||
nir_block *block = nir_cf_node_as_block(node);
|
||||
state->blocks[block->index].loop_depth = loop_depth;
|
||||
state->blocks[block->index].loop_instr_count = loop_instr_count;
|
||||
state->blocks[block->index].loop = loop;
|
||||
break;
|
||||
}
|
||||
case nir_cf_node_if: {
|
||||
nir_if *if_stmt = nir_cf_node_as_if(node);
|
||||
gcm_build_block_info(&if_stmt->then_list, state, loop_depth);
|
||||
gcm_build_block_info(&if_stmt->else_list, state, loop_depth);
|
||||
gcm_build_block_info(&if_stmt->then_list, state, loop, loop_depth, ~0u);
|
||||
gcm_build_block_info(&if_stmt->else_list, state, loop, loop_depth, ~0u);
|
||||
break;
|
||||
}
|
||||
case nir_cf_node_loop: {
|
||||
nir_loop *loop = nir_cf_node_as_loop(node);
|
||||
gcm_build_block_info(&loop->body, state, loop_depth + 1);
|
||||
gcm_build_block_info(&loop->body, state, loop, loop_depth + 1,
|
||||
get_loop_instr_count(&loop->body));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -342,6 +391,46 @@ gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state)
|
||||
nir_foreach_src(instr, gcm_schedule_early_src, state);
|
||||
}
|
||||
|
||||
static bool
|
||||
set_block_for_loop_instr(struct gcm_state *state, nir_instr *instr,
|
||||
nir_block *block)
|
||||
{
|
||||
if (nir_block_dominates(instr->block, block))
|
||||
return true;
|
||||
|
||||
/* If the loop only executes a single time i.e its wrapped in a:
|
||||
* do{ ... break; } while(true)
|
||||
* Don't move the instruction as it will not help anything.
|
||||
*/
|
||||
nir_loop *loop = state->blocks[instr->block->index].loop;
|
||||
if (loop->info->limiting_terminator == NULL && !loop->info->complex_loop &&
|
||||
nir_block_ends_in_break(nir_loop_last_block(loop)))
|
||||
return false;
|
||||
|
||||
/* Being too aggressive with how we pull instructions out of loops can
|
||||
* result in extra register pressure and spilling. For example its fairly
|
||||
* common for loops in compute shaders to calculate SSBO offsets using
|
||||
* the workgroup id, subgroup id and subgroup invocation, pulling all
|
||||
* these calculations outside the loop causes register pressure.
|
||||
*
|
||||
* To work around these issues for now we only allow constant and texture
|
||||
* instructions to be moved outside their original loops, or instructions
|
||||
* where the total loop instruction count is less than
|
||||
* MAX_LOOP_INSTRUCTIONS.
|
||||
*
|
||||
* TODO: figure out some more heuristics to allow more to be moved out of
|
||||
* loops.
|
||||
*/
|
||||
if (state->blocks[instr->block->index].loop_instr_count < MAX_LOOP_INSTRUCTIONS)
|
||||
return true;
|
||||
|
||||
if (instr->type == nir_instr_type_load_const ||
|
||||
instr->type == nir_instr_type_tex)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static nir_block *
|
||||
gcm_choose_block_for_instr(nir_instr *instr, nir_block *early_block,
|
||||
nir_block *late_block, struct gcm_state *state)
|
||||
@@ -350,22 +439,9 @@ gcm_choose_block_for_instr(nir_instr *instr, nir_block *early_block,
|
||||
|
||||
nir_block *best = late_block;
|
||||
for (nir_block *block = late_block; block != NULL; block = block->imm_dom) {
|
||||
/* Being too aggressive with how we pull instructions out of loops can
|
||||
* result in extra register pressure and spilling. For example its fairly
|
||||
* common for loops in compute shaders to calculate SSBO offsets using
|
||||
* the workgroup id, subgroup id and subgroup invocation, pulling all
|
||||
* these calculations outside the loop causes register pressure.
|
||||
*
|
||||
* To work around these issues for now we only allow constant and texture
|
||||
* instructions to be moved outside their original loops.
|
||||
*
|
||||
* TODO: figure out some heuristics to allow more to be moved out of loops.
|
||||
*/
|
||||
if (state->blocks[block->index].loop_depth <
|
||||
state->blocks[best->index].loop_depth &&
|
||||
(nir_block_dominates(instr->block, block) ||
|
||||
instr->type == nir_instr_type_load_const ||
|
||||
instr->type == nir_instr_type_tex))
|
||||
set_block_for_loop_instr(state, instr, block))
|
||||
best = block;
|
||||
else if (block == instr->block)
|
||||
best = block;
|
||||
@@ -557,10 +633,12 @@ gcm_place_instr(nir_instr *instr, struct gcm_state *state)
|
||||
}
|
||||
|
||||
static bool
|
||||
opt_gcm_impl(nir_function_impl *impl, bool value_number)
|
||||
opt_gcm_impl(nir_shader *shader, nir_function_impl *impl, bool value_number)
|
||||
{
|
||||
nir_metadata_require(impl, nir_metadata_block_index |
|
||||
nir_metadata_dominance);
|
||||
nir_metadata_require(impl, nir_metadata_loop_analysis,
|
||||
shader->options->force_indirect_unrolling);
|
||||
|
||||
/* A previous pass may have left pass_flags dirty, so clear it all out. */
|
||||
nir_foreach_block(block, impl)
|
||||
@@ -575,7 +653,7 @@ opt_gcm_impl(nir_function_impl *impl, bool value_number)
|
||||
exec_list_make_empty(&state.instrs);
|
||||
state.blocks = rzalloc_array(NULL, struct gcm_block_info, impl->num_blocks);
|
||||
|
||||
gcm_build_block_info(&impl->body, &state, 0);
|
||||
gcm_build_block_info(&impl->body, &state, NULL, 0, ~0u);
|
||||
|
||||
gcm_pin_instructions(impl, &state);
|
||||
|
||||
@@ -610,7 +688,8 @@ opt_gcm_impl(nir_function_impl *impl, bool value_number)
|
||||
ralloc_free(state.instr_infos);
|
||||
|
||||
nir_metadata_preserve(impl, nir_metadata_block_index |
|
||||
nir_metadata_dominance);
|
||||
nir_metadata_dominance |
|
||||
nir_metadata_loop_analysis);
|
||||
|
||||
return state.progress;
|
||||
}
|
||||
@@ -622,7 +701,7 @@ nir_opt_gcm(nir_shader *shader, bool value_number)
|
||||
|
||||
nir_foreach_function(function, shader) {
|
||||
if (function->impl)
|
||||
progress |= opt_gcm_impl(function->impl, value_number);
|
||||
progress |= opt_gcm_impl(shader, function->impl, value_number);
|
||||
}
|
||||
|
||||
return progress;
|
||||
|
Reference in New Issue
Block a user