pan/mdg: Schedule based on liveness
By estimating liveness in the scheduler and choosing instructions likely to reduce register pressure, on average we can decrease pressure given a sufficiently larger window. On the other hand, decreasing pressure instead of leaning too heavily on the search window enables us to use a much larger search window without inflating pressure too much. So by doing both in lockstep, we benefit pretty well. total instructions in shared programs: 49458 -> 48540 (-1.86%) instructions in affected programs: 26931 -> 26013 (-3.41%) helped: 221 HURT: 15 helped stats (abs) min: 1 max: 36 x̄: 4.37 x̃: 2 helped stats (rel) min: 0.31% max: 16.90% x̄: 4.97% x̃: 3.85% HURT stats (abs) min: 1 max: 4 x̄: 3.13 x̃: 3 HURT stats (rel) min: 0.50% max: 7.14% x̄: 4.53% x̃: 4.55% 95% mean confidence interval for instructions value: -4.65 -3.13 95% mean confidence interval for instructions %-change: -4.94% -3.81% Instructions are helped. total bundles in shared programs: 25199 -> 23446 (-6.96%) bundles in affected programs: 21600 -> 19847 (-8.12%) helped: 277 HURT: 170 helped stats (abs) min: 1 max: 45 x̄: 7.33 x̃: 6 helped stats (rel) min: 1.06% max: 33.83% x̄: 11.01% x̃: 8.57% HURT stats (abs) min: 1 max: 6 x̄: 1.63 x̃: 1 HURT stats (rel) min: 1.19% max: 40.00% x̄: 13.36% x̃: 11.11% 95% mean confidence interval for bundles value: -4.61 -3.23 95% mean confidence interval for bundles %-change: -3.00% -0.49% Bundles are helped. total quadwords in shared programs: 40269 -> 39652 (-1.53%) quadwords in affected programs: 35881 -> 35264 (-1.72%) helped: 242 HURT: 244 helped stats (abs) min: 1 max: 36 x̄: 4.61 x̃: 3 helped stats (rel) min: 0.39% max: 16.33% x̄: 5.33% x̃: 5.13% HURT stats (abs) min: 1 max: 20 x̄: 2.04 x̃: 1 HURT stats (rel) min: 0.81% max: 21.74% x̄: 7.57% x̃: 6.25% 95% mean confidence interval for quadwords value: -1.71 -0.83 95% mean confidence interval for quadwords %-change: 0.46% 1.82% Inconclusive result (value mean confidence interval and %-change mean confidence interval disagree). total registers in shared programs: 3786 -> 3336 (-11.89%) registers in affected programs: 2161 -> 1711 (-20.82%) helped: 262 HURT: 35 helped stats (abs) min: 1 max: 7 x̄: 1.87 x̃: 1 helped stats (rel) min: 6.25% max: 66.67% x̄: 28.91% x̃: 25.00% HURT stats (abs) min: 1 max: 3 x̄: 1.11 x̃: 1 HURT stats (rel) min: 7.69% max: 100.00% x̄: 19.76% x̃: 12.50% 95% mean confidence interval for registers value: -1.70 -1.33 95% mean confidence interval for registers %-change: -25.56% -20.79% Registers are helped. total threads in shared programs: 2453 -> 2592 (5.67%) threads in affected programs: 160 -> 299 (86.87%) helped: 79 HURT: 6 helped stats (abs) min: 1 max: 2 x̄: 1.85 x̃: 2 helped stats (rel) min: 100.00% max: 100.00% x̄: 100.00% x̃: 100.00% HURT stats (abs) min: 1 max: 2 x̄: 1.17 x̃: 1 HURT stats (rel) min: 50.00% max: 50.00% x̄: 50.00% x̃: 50.00% 95% mean confidence interval for threads value: 1.45 1.82 95% mean confidence interval for threads %-change: 81.08% 97.75% Threads are [helped]. total spills in shared programs: 168 -> 17 (-89.88%) spills in affected programs: 167 -> 16 (-90.42%) helped: 13 HURT: 0 total fills in shared programs: 186 -> 35 (-81.18%) fills in affected programs: 186 -> 35 (-81.18%) helped: 14 HURT: 0 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5513>
This commit is contained in:
@@ -572,9 +572,56 @@ mir_has_unit(midgard_instruction *ins, unsigned unit)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Net change in liveness if an instruction were scheduled. Loosely based on
|
||||
* ir3's scheduler. */
|
||||
|
||||
static int
|
||||
mir_live_effect(uint16_t *liveness, midgard_instruction *ins, bool destructive)
|
||||
{
|
||||
/* TODO: what if dest is used multiple times? */
|
||||
int free_live = 0;
|
||||
|
||||
if (ins->dest < SSA_FIXED_MINIMUM) {
|
||||
unsigned bytemask = mir_bytemask(ins);
|
||||
bytemask = util_next_power_of_two(bytemask + 1) - 1;
|
||||
free_live += util_bitcount(liveness[ins->dest] & bytemask);
|
||||
|
||||
if (destructive)
|
||||
liveness[ins->dest] &= ~bytemask;
|
||||
}
|
||||
|
||||
int new_live = 0;
|
||||
|
||||
mir_foreach_src(ins, s) {
|
||||
unsigned S = ins->src[s];
|
||||
|
||||
bool dupe = false;
|
||||
|
||||
for (unsigned q = 0; q < s; ++q)
|
||||
dupe |= (ins->src[q] == S);
|
||||
|
||||
if (dupe)
|
||||
continue;
|
||||
|
||||
if (S < SSA_FIXED_MINIMUM) {
|
||||
unsigned bytemask = mir_bytemask_of_read_components(ins, S);
|
||||
bytemask = util_next_power_of_two(bytemask + 1) - 1;
|
||||
|
||||
/* Count only the new components */
|
||||
new_live += util_bitcount(bytemask & ~(liveness[S]));
|
||||
|
||||
if (destructive)
|
||||
liveness[S] |= bytemask;
|
||||
}
|
||||
}
|
||||
|
||||
return new_live - free_live;
|
||||
}
|
||||
|
||||
static midgard_instruction *
|
||||
mir_choose_instruction(
|
||||
midgard_instruction **instructions,
|
||||
uint16_t *liveness,
|
||||
BITSET_WORD *worklist, unsigned count,
|
||||
struct midgard_predicate *predicate)
|
||||
{
|
||||
@@ -595,6 +642,7 @@ mir_choose_instruction(
|
||||
unsigned i;
|
||||
|
||||
signed best_index = -1;
|
||||
signed best_effect = INT_MAX;
|
||||
bool best_conditional = false;
|
||||
|
||||
/* Enforce a simple metric limiting distance to keep down register
|
||||
@@ -602,7 +650,7 @@ mir_choose_instruction(
|
||||
* results */
|
||||
|
||||
unsigned max_active = 0;
|
||||
unsigned max_distance = 6;
|
||||
unsigned max_distance = 36;
|
||||
|
||||
BITSET_FOREACH_SET(i, worklist, count) {
|
||||
max_active = MAX2(max_active, i);
|
||||
@@ -655,15 +703,19 @@ mir_choose_instruction(
|
||||
if (conditional && no_cond)
|
||||
continue;
|
||||
|
||||
/* Simulate in-order scheduling */
|
||||
if ((signed) i < best_index)
|
||||
int effect = mir_live_effect(liveness, instructions[i], false);
|
||||
|
||||
if (effect > best_effect)
|
||||
continue;
|
||||
|
||||
if (effect == best_effect && (signed) i < best_index)
|
||||
continue;
|
||||
|
||||
best_effect = effect;
|
||||
best_index = i;
|
||||
best_conditional = conditional;
|
||||
}
|
||||
|
||||
|
||||
/* Did we find anything? */
|
||||
|
||||
if (best_index < 0)
|
||||
@@ -686,6 +738,7 @@ mir_choose_instruction(
|
||||
|
||||
/* Once we schedule a conditional, we can't again */
|
||||
predicate->no_cond |= best_conditional;
|
||||
mir_live_effect(liveness, instructions[best_index], true);
|
||||
}
|
||||
|
||||
return instructions[best_index];
|
||||
@@ -697,6 +750,7 @@ mir_choose_instruction(
|
||||
static unsigned
|
||||
mir_choose_bundle(
|
||||
midgard_instruction **instructions,
|
||||
uint16_t *liveness,
|
||||
BITSET_WORD *worklist, unsigned count)
|
||||
{
|
||||
/* At the moment, our algorithm is very simple - use the bundle of the
|
||||
@@ -709,7 +763,7 @@ mir_choose_bundle(
|
||||
.exclude = ~0
|
||||
};
|
||||
|
||||
midgard_instruction *chosen = mir_choose_instruction(instructions, worklist, count, &predicate);
|
||||
midgard_instruction *chosen = mir_choose_instruction(instructions, liveness, worklist, count, &predicate);
|
||||
|
||||
if (chosen)
|
||||
return chosen->type;
|
||||
@@ -721,6 +775,7 @@ mir_choose_bundle(
|
||||
static void
|
||||
mir_choose_alu(midgard_instruction **slot,
|
||||
midgard_instruction **instructions,
|
||||
uint16_t *liveness,
|
||||
BITSET_WORD *worklist, unsigned len,
|
||||
struct midgard_predicate *predicate,
|
||||
unsigned unit)
|
||||
@@ -731,7 +786,7 @@ mir_choose_alu(midgard_instruction **slot,
|
||||
|
||||
/* Try to schedule something, if not */
|
||||
predicate->unit = unit;
|
||||
*slot = mir_choose_instruction(instructions, worklist, len, predicate);
|
||||
*slot = mir_choose_instruction(instructions, liveness, worklist, len, predicate);
|
||||
|
||||
/* Store unit upon scheduling */
|
||||
if (*slot && !((*slot)->compact_branch))
|
||||
@@ -898,6 +953,7 @@ mir_schedule_condition(compiler_context *ctx,
|
||||
static midgard_bundle
|
||||
mir_schedule_texture(
|
||||
midgard_instruction **instructions,
|
||||
uint16_t *liveness,
|
||||
BITSET_WORD *worklist, unsigned len,
|
||||
bool is_vertex)
|
||||
{
|
||||
@@ -908,7 +964,7 @@ mir_schedule_texture(
|
||||
};
|
||||
|
||||
midgard_instruction *ins =
|
||||
mir_choose_instruction(instructions, worklist, len, &predicate);
|
||||
mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
|
||||
|
||||
mir_update_worklist(worklist, len, instructions, ins);
|
||||
|
||||
@@ -926,6 +982,7 @@ mir_schedule_texture(
|
||||
static midgard_bundle
|
||||
mir_schedule_ldst(
|
||||
midgard_instruction **instructions,
|
||||
uint16_t *liveness,
|
||||
BITSET_WORD *worklist, unsigned len)
|
||||
{
|
||||
struct midgard_predicate predicate = {
|
||||
@@ -937,10 +994,10 @@ mir_schedule_ldst(
|
||||
/* Try to pick two load/store ops. Second not gauranteed to exist */
|
||||
|
||||
midgard_instruction *ins =
|
||||
mir_choose_instruction(instructions, worklist, len, &predicate);
|
||||
mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
|
||||
|
||||
midgard_instruction *pair =
|
||||
mir_choose_instruction(instructions, worklist, len, &predicate);
|
||||
mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
|
||||
|
||||
struct midgard_bundle out = {
|
||||
.tag = TAG_LOAD_STORE_4,
|
||||
@@ -962,6 +1019,7 @@ mir_schedule_zs_write(
|
||||
compiler_context *ctx,
|
||||
struct midgard_predicate *predicate,
|
||||
midgard_instruction **instructions,
|
||||
uint16_t *liveness,
|
||||
BITSET_WORD *worklist, unsigned len,
|
||||
midgard_instruction *branch,
|
||||
midgard_instruction **smul,
|
||||
@@ -985,7 +1043,7 @@ mir_schedule_zs_write(
|
||||
|
||||
predicate->unit = unit_names[i];
|
||||
midgard_instruction *ins =
|
||||
mir_choose_instruction(instructions, worklist, len, predicate);
|
||||
mir_choose_instruction(instructions, liveness, worklist, len, predicate);
|
||||
|
||||
if (ins) {
|
||||
ins->unit = unit_names[i];
|
||||
@@ -1028,6 +1086,7 @@ static midgard_bundle
|
||||
mir_schedule_alu(
|
||||
compiler_context *ctx,
|
||||
midgard_instruction **instructions,
|
||||
uint16_t *liveness,
|
||||
BITSET_WORD *worklist, unsigned len)
|
||||
{
|
||||
struct midgard_bundle bundle = {};
|
||||
@@ -1048,7 +1107,7 @@ mir_schedule_alu(
|
||||
midgard_instruction *sadd = NULL;
|
||||
midgard_instruction *branch = NULL;
|
||||
|
||||
mir_choose_alu(&branch, instructions, worklist, len, &predicate, ALU_ENAB_BR_COMPACT);
|
||||
mir_choose_alu(&branch, instructions, liveness, worklist, len, &predicate, ALU_ENAB_BR_COMPACT);
|
||||
mir_update_worklist(worklist, len, instructions, branch);
|
||||
unsigned writeout = branch ? branch->writeout : 0;
|
||||
|
||||
@@ -1123,19 +1182,19 @@ mir_schedule_alu(
|
||||
}
|
||||
|
||||
if (writeout & PAN_WRITEOUT_Z)
|
||||
mir_schedule_zs_write(ctx, &predicate, instructions, worklist, len, branch, &smul, &vadd, &vlut, false);
|
||||
mir_schedule_zs_write(ctx, &predicate, instructions, liveness, worklist, len, branch, &smul, &vadd, &vlut, false);
|
||||
|
||||
if (writeout & PAN_WRITEOUT_S)
|
||||
mir_schedule_zs_write(ctx, &predicate, instructions, worklist, len, branch, &smul, &vadd, &vlut, true);
|
||||
mir_schedule_zs_write(ctx, &predicate, instructions, liveness, worklist, len, branch, &smul, &vadd, &vlut, true);
|
||||
|
||||
mir_choose_alu(&smul, instructions, worklist, len, &predicate, UNIT_SMUL);
|
||||
mir_choose_alu(&smul, instructions, liveness, worklist, len, &predicate, UNIT_SMUL);
|
||||
|
||||
for (unsigned moves = 0; moves < 2; ++moves) {
|
||||
predicate.moves = moves;
|
||||
predicate.no_mask = writeout ? (1 << 3) : 0;
|
||||
mir_choose_alu(&vlut, instructions, worklist, len, &predicate, UNIT_VLUT);
|
||||
mir_choose_alu(&vlut, instructions, liveness, worklist, len, &predicate, UNIT_VLUT);
|
||||
predicate.no_mask = 0;
|
||||
mir_choose_alu(&vadd, instructions, worklist, len, &predicate, UNIT_VADD);
|
||||
mir_choose_alu(&vadd, instructions, liveness, worklist, len, &predicate, UNIT_VADD);
|
||||
}
|
||||
|
||||
mir_update_worklist(worklist, len, instructions, vlut);
|
||||
@@ -1158,7 +1217,7 @@ mir_schedule_alu(
|
||||
}
|
||||
|
||||
/* Stage 2, let's schedule sadd before vmul for writeout */
|
||||
mir_choose_alu(&sadd, instructions, worklist, len, &predicate, UNIT_SADD);
|
||||
mir_choose_alu(&sadd, instructions, liveness, worklist, len, &predicate, UNIT_SADD);
|
||||
|
||||
/* Check if writeout reads its own register */
|
||||
|
||||
@@ -1191,7 +1250,7 @@ mir_schedule_alu(
|
||||
predicate.mask = writeout_mask ^ full_mask;
|
||||
|
||||
struct midgard_instruction *peaked =
|
||||
mir_choose_instruction(instructions, worklist, len, &predicate);
|
||||
mir_choose_instruction(instructions, liveness, worklist, len, &predicate);
|
||||
|
||||
if (peaked) {
|
||||
vmul = peaked;
|
||||
@@ -1224,7 +1283,7 @@ mir_schedule_alu(
|
||||
}
|
||||
}
|
||||
|
||||
mir_choose_alu(&vmul, instructions, worklist, len, &predicate, UNIT_VMUL);
|
||||
mir_choose_alu(&vmul, instructions, liveness, worklist, len, &predicate, UNIT_VMUL);
|
||||
|
||||
mir_update_worklist(worklist, len, instructions, vmul);
|
||||
mir_update_worklist(worklist, len, instructions, sadd);
|
||||
@@ -1298,6 +1357,7 @@ schedule_block(compiler_context *ctx, midgard_block *block)
|
||||
/* Allocate the worklist */
|
||||
size_t sz = BITSET_WORDS(len) * sizeof(BITSET_WORD);
|
||||
BITSET_WORD *worklist = calloc(sz, 1);
|
||||
uint16_t *liveness = calloc(node_count, 2);
|
||||
mir_initialize_worklist(worklist, instructions, len);
|
||||
|
||||
struct util_dynarray bundles;
|
||||
@@ -1307,15 +1367,15 @@ schedule_block(compiler_context *ctx, midgard_block *block)
|
||||
unsigned blend_offset = 0;
|
||||
|
||||
for (;;) {
|
||||
unsigned tag = mir_choose_bundle(instructions, worklist, len);
|
||||
unsigned tag = mir_choose_bundle(instructions, liveness, worklist, len);
|
||||
midgard_bundle bundle;
|
||||
|
||||
if (tag == TAG_TEXTURE_4)
|
||||
bundle = mir_schedule_texture(instructions, worklist, len, ctx->stage != MESA_SHADER_FRAGMENT);
|
||||
bundle = mir_schedule_texture(instructions, liveness, worklist, len, ctx->stage != MESA_SHADER_FRAGMENT);
|
||||
else if (tag == TAG_LOAD_STORE_4)
|
||||
bundle = mir_schedule_ldst(instructions, worklist, len);
|
||||
bundle = mir_schedule_ldst(instructions, liveness, worklist, len);
|
||||
else if (tag == TAG_ALU_4)
|
||||
bundle = mir_schedule_alu(ctx, instructions, worklist, len);
|
||||
bundle = mir_schedule_alu(ctx, instructions, liveness, worklist, len);
|
||||
else
|
||||
break;
|
||||
|
||||
@@ -1360,6 +1420,7 @@ schedule_block(compiler_context *ctx, midgard_block *block)
|
||||
|
||||
free(instructions); /* Allocated by flatten_mir() */
|
||||
free(worklist);
|
||||
free(liveness);
|
||||
}
|
||||
|
||||
void
|
||||
|
Reference in New Issue
Block a user