aco: calculate register demand per instruction as maximum necessary to execute the instruction

Previously, the register demand per instruction was calculated as the number of
live variables in the register file after executing an instruction plus additional
temporary registers, necessary during the execution of the instruction.
With this change, now it also includes all variables which are live right before
executing an instruction, i.e. killed Operands.

Care has been taken so that the invariant

register_demand[idx] = register_demand[idx - 1] - get_temp_registers(prev_instr)
                        + get_live_changes(instr) + get_temp_registers(instr)

still holds.

Slight changes in scheduling:

Totals from 316 (0.40% of 79395) affected shaders: (GFX11)

Instrs: 301329 -> 300777 (-0.18%); split: -0.31%, +0.12%
CodeSize: 1577976 -> 1576204 (-0.11%); split: -0.21%, +0.10%
SpillSGPRs: 448 -> 447 (-0.22%)
Latency: 1736349 -> 1726182 (-0.59%); split: -2.01%, +1.42%
InvThroughput: 243894 -> 243883 (-0.00%); split: -0.03%, +0.03%
VClause: 6134 -> 6280 (+2.38%); split: -1.04%, +3.42%
SClause: 6142 -> 6137 (-0.08%); split: -0.13%, +0.05%
Copies: 14037 -> 14032 (-0.04%); split: -0.56%, +0.52%
Branches: 3284 -> 3283 (-0.03%)
VALU: 182750 -> 182718 (-0.02%); split: -0.04%, +0.03%
SALU: 18522 -> 18538 (+0.09%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29804>
This commit is contained in:
Daniel Schürmann
2024-06-20 10:03:11 +02:00
committed by Marge Bot
parent 4c2f231cc0
commit 001c8caae0
2 changed files with 17 additions and 15 deletions

View File

@@ -49,28 +49,30 @@ handle_def_fixed_to_op(RegisterDemand* demand, RegisterDemand demand_before, Ins
RegisterDemand
get_temp_registers(aco_ptr<Instruction>& instr)
{
RegisterDemand temp_registers;
RegisterDemand demand_before;
RegisterDemand demand_after;
for (Definition def : instr->definitions) {
if (!def.isTemp())
continue;
if (def.isKill())
temp_registers += def.getTemp();
demand_after += def.getTemp();
else if (def.isTemp())
demand_before -= def.getTemp();
}
for (Operand op : instr->operands) {
if (op.isTemp() && op.isLateKill() && op.isFirstKill())
temp_registers += op.getTemp();
if (op.isFirstKill()) {
demand_before += op.getTemp();
if (op.isLateKill())
demand_after += op.getTemp();
}
}
int op_idx = get_op_fixed_to_def(instr.get());
if (op_idx != -1 && !instr->operands[op_idx].isKill()) {
RegisterDemand before_instr;
before_instr -= get_live_changes(instr);
handle_def_fixed_to_op(&temp_registers, before_instr, instr.get(), op_idx);
}
if (op_idx != -1 && !instr->operands[op_idx].isKill())
demand_before += instr->definitions[0].getTemp();
return temp_registers;
demand_after.update(demand_before);
return demand_after;
}
RegisterDemand
@@ -190,6 +192,8 @@ process_live_temps_per_block(Program* program, Block* block, unsigned& worklist,
RegisterDemand before_instr = new_demand;
handle_def_fixed_to_op(&register_demand[idx], before_instr, insn, op_idx);
}
register_demand[idx].update(new_demand);
}
/* handle phi definitions */

View File

@@ -993,11 +993,9 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand s
spilled_registers -= new_tmp;
}
/* check if register demand is low enough before and after the current instruction */
/* check if register demand is low enough during and after the current instruction */
if (block->register_demand.exceeds(ctx.target_pressure)) {
RegisterDemand new_demand = ctx.program->live.register_demand[block_idx][idx];
new_demand.update(get_demand_before(ctx, block_idx, idx));
/* if reg pressure is too high, spill variable with furthest next use */
while ((new_demand - spilled_registers).exceeds(ctx.target_pressure)) {