aco: calculate correct register demand for branch instructions

Since copies for the successor's linear phis are inserted before the
branch, we should consider the definitions and operands of the successor's
linear phis.

Fixes a Detroit: Become Human spilling failure with GCM+GVN.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12035>
This commit is contained in:
Rhys Perry
2021-07-26 17:55:48 +01:00
committed by Marge Bot
parent 87a8349411
commit 5a536eca9c

View File

@@ -82,9 +82,15 @@ get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr,
} }
namespace { namespace {
struct PhiInfo {
uint16_t logical_phi_sgpr_ops = 0;
uint16_t linear_phi_ops = 0;
uint16_t linear_phi_defs = 0;
};
void void
process_live_temps_per_block(Program* program, live& lives, Block* block, process_live_temps_per_block(Program* program, live& lives, Block* block, unsigned& worklist,
unsigned& worklist, std::vector<uint16_t>& phi_sgpr_ops) std::vector<PhiInfo>& phi_info)
{ {
std::vector<RegisterDemand>& register_demand = lives.register_demand[block->index]; std::vector<RegisterDemand>& register_demand = lives.register_demand[block->index];
RegisterDemand new_demand; RegisterDemand new_demand;
@@ -96,7 +102,7 @@ process_live_temps_per_block(Program* program, live& lives, Block* block,
/* initialize register demand */ /* initialize register demand */
for (unsigned t : live) for (unsigned t : live)
new_demand += Temp(t, program->temp_rc[t]); new_demand += Temp(t, program->temp_rc[t]);
new_demand.sgpr -= phi_sgpr_ops[block->index]; new_demand.sgpr -= phi_info[block->index].logical_phi_sgpr_ops;
/* traverse the instructions backwards */ /* traverse the instructions backwards */
int idx; int idx;
@@ -129,7 +135,7 @@ process_live_temps_per_block(Program* program, live& lives, Block* block,
/* GEN */ /* GEN */
if (insn->opcode == aco_opcode::p_logical_end) { if (insn->opcode == aco_opcode::p_logical_end) {
new_demand.sgpr += phi_sgpr_ops[block->index]; new_demand.sgpr += phi_info[block->index].logical_phi_sgpr_ops;
} else { } else {
/* we need to do this in a separate loop because the next one can /* we need to do this in a separate loop because the next one can
* setKill() for several operands at once and we don't want to * setKill() for several operands at once and we don't want to
@@ -170,6 +176,7 @@ process_live_temps_per_block(Program* program, live& lives, Block* block,
block->register_demand = block_register_demand; block->register_demand = block_register_demand;
/* handle phi definitions */ /* handle phi definitions */
uint16_t linear_phi_defs = 0;
int phi_idx = idx; int phi_idx = idx;
while (phi_idx >= 0) { while (phi_idx >= 0) {
register_demand[phi_idx] = new_demand; register_demand[phi_idx] = new_demand;
@@ -192,9 +199,17 @@ process_live_temps_per_block(Program* program, live& lives, Block* block,
else else
definition.setKill(true); definition.setKill(true);
if (insn->opcode == aco_opcode::p_linear_phi) {
assert(definition.getTemp().type() == RegType::sgpr);
linear_phi_defs += definition.size();
}
phi_idx--; phi_idx--;
} }
for (unsigned pred_idx : block->linear_preds)
phi_info[pred_idx].linear_phi_defs = linear_phi_defs;
/* now, we need to merge the live-ins into the live-out sets */ /* now, we need to merge the live-ins into the live-out sets */
for (unsigned t : live) { for (unsigned t : live) {
RegClass rc = program->temp_rc[t]; RegClass rc = program->temp_rc[t];
@@ -231,8 +246,12 @@ process_live_temps_per_block(Program* program, live& lives, Block* block,
const bool inserted = lives.live_out[preds[i]].insert(operand.tempId()).second; const bool inserted = lives.live_out[preds[i]].insert(operand.tempId()).second;
if (inserted) { if (inserted) {
worklist = std::max(worklist, preds[i] + 1); worklist = std::max(worklist, preds[i] + 1);
if (insn->opcode == aco_opcode::p_phi && operand.getTemp().type() == RegType::sgpr) if (insn->opcode == aco_opcode::p_phi && operand.getTemp().type() == RegType::sgpr) {
phi_sgpr_ops[preds[i]] += operand.size(); phi_info[preds[i]].logical_phi_sgpr_ops += operand.size();
} else if (insn->opcode == aco_opcode::p_linear_phi) {
assert(operand.getTemp().type() == RegType::sgpr);
phi_info[preds[i]].linear_phi_ops += operand.size();
}
} }
/* set if the operand is killed by this (or another) phi instruction */ /* set if the operand is killed by this (or another) phi instruction */
@@ -386,7 +405,7 @@ live_var_analysis(Program* program)
result.live_out.resize(program->blocks.size()); result.live_out.resize(program->blocks.size());
result.register_demand.resize(program->blocks.size()); result.register_demand.resize(program->blocks.size());
unsigned worklist = program->blocks.size(); unsigned worklist = program->blocks.size();
std::vector<uint16_t> phi_sgpr_ops(program->blocks.size()); std::vector<PhiInfo> phi_info(program->blocks.size());
RegisterDemand new_demand; RegisterDemand new_demand;
program->needs_vcc = false; program->needs_vcc = false;
@@ -396,10 +415,16 @@ live_var_analysis(Program* program)
while (worklist) { while (worklist) {
unsigned block_idx = --worklist; unsigned block_idx = --worklist;
process_live_temps_per_block(program, result, &program->blocks[block_idx], worklist, process_live_temps_per_block(program, result, &program->blocks[block_idx], worklist,
phi_sgpr_ops); phi_info);
new_demand.update(program->blocks[block_idx].register_demand); new_demand.update(program->blocks[block_idx].register_demand);
} }
/* Handle branches: we will insert copies created for linear phis just before the branch. */
for (Block& block : program->blocks) {
result.register_demand[block.index].back().sgpr += phi_info[block.index].linear_phi_defs;
result.register_demand[block.index].back().sgpr -= phi_info[block.index].linear_phi_ops;
}
/* calculate the program's register demand and number of waves */ /* calculate the program's register demand and number of waves */
if (program->progress < CompilationProgress::after_ra) if (program->progress < CompilationProgress::after_ra)
update_vgpr_sgpr_demand(program, new_demand); update_vgpr_sgpr_demand(program, new_demand);