From 001c8caae0a9feb951d9aafb27bbbdce11f315eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 20 Jun 2024 10:03:11 +0200 Subject: [PATCH] aco: calculate register demand per instruction as maximum necessary to execute the instruction Previously, the register demand per instruction was calculated as the number of live variables in the register file after executing an instruction plus additional temporary registers, necessary during the execution of the instruction. With this change, now it also includes all variables which are live right before executing an instruction, i.e. killed Operands. Care has been taken so that the invariant register_demand[idx] = register_demand[idx - 1] - get_temp_registers(prev_instr) + get_live_changes(instr) + get_temp_registers(instr) still holds. Slight changes in scheduling: Totals from 316 (0.40% of 79395) affected shaders: (GFX11) Instrs: 301329 -> 300777 (-0.18%); split: -0.31%, +0.12% CodeSize: 1577976 -> 1576204 (-0.11%); split: -0.21%, +0.10% SpillSGPRs: 448 -> 447 (-0.22%) Latency: 1736349 -> 1726182 (-0.59%); split: -2.01%, +1.42% InvThroughput: 243894 -> 243883 (-0.00%); split: -0.03%, +0.03% VClause: 6134 -> 6280 (+2.38%); split: -1.04%, +3.42% SClause: 6142 -> 6137 (-0.08%); split: -0.13%, +0.05% Copies: 14037 -> 14032 (-0.04%); split: -0.56%, +0.52% Branches: 3284 -> 3283 (-0.03%) VALU: 182750 -> 182718 (-0.02%); split: -0.04%, +0.03% SALU: 18522 -> 18538 (+0.09%) Part-of: --- src/amd/compiler/aco_live_var_analysis.cpp | 28 ++++++++++++---------- src/amd/compiler/aco_spill.cpp | 4 +--- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 9160cb55583..374e2f33f8f 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -49,28 +49,30 @@ handle_def_fixed_to_op(RegisterDemand* demand, RegisterDemand demand_before, Ins RegisterDemand get_temp_registers(aco_ptr& instr) { - RegisterDemand temp_registers; + RegisterDemand demand_before; + RegisterDemand demand_after; for (Definition def : instr->definitions) { - if (!def.isTemp()) - continue; if (def.isKill()) - temp_registers += def.getTemp(); + demand_after += def.getTemp(); + else if (def.isTemp()) + demand_before -= def.getTemp(); } for (Operand op : instr->operands) { - if (op.isTemp() && op.isLateKill() && op.isFirstKill()) - temp_registers += op.getTemp(); + if (op.isFirstKill()) { + demand_before += op.getTemp(); + if (op.isLateKill()) + demand_after += op.getTemp(); + } } int op_idx = get_op_fixed_to_def(instr.get()); - if (op_idx != -1 && !instr->operands[op_idx].isKill()) { - RegisterDemand before_instr; - before_instr -= get_live_changes(instr); - handle_def_fixed_to_op(&temp_registers, before_instr, instr.get(), op_idx); - } + if (op_idx != -1 && !instr->operands[op_idx].isKill()) + demand_before += instr->definitions[0].getTemp(); - return temp_registers; + demand_after.update(demand_before); + return demand_after; } RegisterDemand @@ -190,6 +192,8 @@ process_live_temps_per_block(Program* program, Block* block, unsigned& worklist, RegisterDemand before_instr = new_demand; handle_def_fixed_to_op(®ister_demand[idx], before_instr, insn, op_idx); } + + register_demand[idx].update(new_demand); } /* handle phi definitions */ diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 65951058f8d..da9efc65677 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -993,11 +993,9 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand s spilled_registers -= new_tmp; } - /* check if register demand is low enough before and after the current instruction */ + /* check if register demand is low enough during and after the current instruction */ if (block->register_demand.exceeds(ctx.target_pressure)) { - RegisterDemand new_demand = ctx.program->live.register_demand[block_idx][idx]; - new_demand.update(get_demand_before(ctx, block_idx, idx)); /* if reg pressure is too high, spill variable with furthest next use */ while ((new_demand - spilled_registers).exceeds(ctx.target_pressure)) {