aco: only reserve sgprs for vcc if it's used
pipeline-db (Vega): Totals: SGPRS: 5186302 -> 5075616 (-2.13 %) VGPRS: 3704580 -> 3704580 (0.00 %) Spilled SGPRs: 144859 -> 144859 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Scratch size: 4124 -> 4124 (0.00 %) dwords per thread Code Size: 247315944 -> 247315944 (0.00 %) bytes LDS: 1311 -> 1311 (0.00 %) blocks Max Waves: 674560 -> 674562 (0.00 %) Totals from affected shaders: SGPRS: 536992 -> 426306 (-20.61 %) VGPRS: 356404 -> 356404 (0.00 %) Spilled SGPRs: 0 -> 0 (0.00 %) Spilled VGPRs: 0 -> 0 (0.00 %) Scratch size: 0 -> 0 (0.00 %) dwords per thread Code Size: 8498748 -> 8498748 (0.00 %) bytes LDS: 8 -> 8 (0.00 %) blocks Max Waves: 113832 -> 113834 (0.00 %) There are some small code size changes in a few RotTR shaders and a small increase in max_waves in two Detroit: Become Human shaders. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3906> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3906>
This commit is contained in:
@@ -1007,9 +1007,6 @@ setup_isel_context(Program* program,
|
|||||||
program->sgpr_limit = 104;
|
program->sgpr_limit = 104;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: we don't have to allocate VCC if we don't need it */
|
|
||||||
program->needs_vcc = true;
|
|
||||||
|
|
||||||
calc_min_waves(program);
|
calc_min_waves(program);
|
||||||
program->vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves);
|
program->vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves);
|
||||||
program->sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
|
program->sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
|
||||||
|
@@ -87,6 +87,8 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block,
|
|||||||
if (!definition.isTemp()) {
|
if (!definition.isTemp()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if ((definition.isFixed() || definition.hasHint()) && definition.physReg() == vcc)
|
||||||
|
program->needs_vcc = true;
|
||||||
|
|
||||||
const Temp temp = definition.getTemp();
|
const Temp temp = definition.getTemp();
|
||||||
size_t n = 0;
|
size_t n = 0;
|
||||||
@@ -120,9 +122,10 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block,
|
|||||||
for (unsigned i = 0; i < insn->operands.size(); ++i)
|
for (unsigned i = 0; i < insn->operands.size(); ++i)
|
||||||
{
|
{
|
||||||
Operand& operand = insn->operands[i];
|
Operand& operand = insn->operands[i];
|
||||||
if (!operand.isTemp()) {
|
if (!operand.isTemp())
|
||||||
continue;
|
continue;
|
||||||
}
|
if (operand.isFixed() && operand.physReg() == vcc)
|
||||||
|
program->needs_vcc = true;
|
||||||
const Temp temp = operand.getTemp();
|
const Temp temp = operand.getTemp();
|
||||||
const bool inserted = temp.is_linear()
|
const bool inserted = temp.is_linear()
|
||||||
? live_sgprs.insert(temp).second
|
? live_sgprs.insert(temp).second
|
||||||
@@ -161,6 +164,8 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block,
|
|||||||
assert(is_phi(insn));
|
assert(is_phi(insn));
|
||||||
assert(insn->definitions.size() == 1 && insn->definitions[0].isTemp());
|
assert(insn->definitions.size() == 1 && insn->definitions[0].isTemp());
|
||||||
Definition& definition = insn->definitions[0];
|
Definition& definition = insn->definitions[0];
|
||||||
|
if ((definition.isFixed() || definition.hasHint()) && definition.physReg() == vcc)
|
||||||
|
program->needs_vcc = true;
|
||||||
const Temp temp = definition.getTemp();
|
const Temp temp = definition.getTemp();
|
||||||
size_t n = 0;
|
size_t n = 0;
|
||||||
|
|
||||||
@@ -205,9 +210,10 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block,
|
|||||||
: block->linear_preds;
|
: block->linear_preds;
|
||||||
for (unsigned i = 0; i < preds.size(); ++i) {
|
for (unsigned i = 0; i < preds.size(); ++i) {
|
||||||
Operand &operand = insn->operands[i];
|
Operand &operand = insn->operands[i];
|
||||||
if (!operand.isTemp()) {
|
if (!operand.isTemp())
|
||||||
continue;
|
continue;
|
||||||
}
|
if (operand.isFixed() && operand.physReg() == vcc)
|
||||||
|
program->needs_vcc = true;
|
||||||
/* check if we changed an already processed block */
|
/* check if we changed an already processed block */
|
||||||
const bool inserted = live_temps[preds[i]].insert(operand.getTemp()).second;
|
const bool inserted = live_temps[preds[i]].insert(operand.getTemp()).second;
|
||||||
if (inserted) {
|
if (inserted) {
|
||||||
@@ -364,6 +370,8 @@ live live_var_analysis(Program* program,
|
|||||||
std::vector<uint16_t> phi_sgpr_ops(program->blocks.size());
|
std::vector<uint16_t> phi_sgpr_ops(program->blocks.size());
|
||||||
RegisterDemand new_demand;
|
RegisterDemand new_demand;
|
||||||
|
|
||||||
|
program->needs_vcc = false;
|
||||||
|
|
||||||
/* this implementation assumes that the block idx corresponds to the block's position in program->blocks vector */
|
/* this implementation assumes that the block idx corresponds to the block's position in program->blocks vector */
|
||||||
for (Block& block : program->blocks)
|
for (Block& block : program->blocks)
|
||||||
worklist.insert(block.index);
|
worklist.insert(block.index);
|
||||||
|
@@ -390,6 +390,8 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio
|
|||||||
if ((op.getTemp().type() == RegType::vgpr && op.physReg() + op.size() > 256 + program->config->num_vgprs) ||
|
if ((op.getTemp().type() == RegType::vgpr && op.physReg() + op.size() > 256 + program->config->num_vgprs) ||
|
||||||
(op.getTemp().type() == RegType::sgpr && op.physReg() + op.size() > program->config->num_sgprs && op.physReg() < program->sgpr_limit))
|
(op.getTemp().type() == RegType::sgpr && op.physReg() + op.size() > program->config->num_sgprs && op.physReg() < program->sgpr_limit))
|
||||||
err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d has an out-of-bounds register assignment", i);
|
err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d has an out-of-bounds register assignment", i);
|
||||||
|
if (op.physReg() == vcc && !program->needs_vcc)
|
||||||
|
err |= ra_fail(output, loc, Location(), "Operand %d fixed to vcc but needs_vcc=false", i);
|
||||||
if (!assignments[op.tempId()].firstloc.block)
|
if (!assignments[op.tempId()].firstloc.block)
|
||||||
assignments[op.tempId()].firstloc = loc;
|
assignments[op.tempId()].firstloc = loc;
|
||||||
if (!assignments[op.tempId()].defloc.block)
|
if (!assignments[op.tempId()].defloc.block)
|
||||||
@@ -407,6 +409,8 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio
|
|||||||
if ((def.getTemp().type() == RegType::vgpr && def.physReg() + def.size() > 256 + program->config->num_vgprs) ||
|
if ((def.getTemp().type() == RegType::vgpr && def.physReg() + def.size() > 256 + program->config->num_vgprs) ||
|
||||||
(def.getTemp().type() == RegType::sgpr && def.physReg() + def.size() > program->config->num_sgprs && def.physReg() < program->sgpr_limit))
|
(def.getTemp().type() == RegType::sgpr && def.physReg() + def.size() > program->config->num_sgprs && def.physReg() < program->sgpr_limit))
|
||||||
err |= ra_fail(output, loc, assignments.at(def.tempId()).firstloc, "Definition %d has an out-of-bounds register assignment", i);
|
err |= ra_fail(output, loc, assignments.at(def.tempId()).firstloc, "Definition %d has an out-of-bounds register assignment", i);
|
||||||
|
if (def.physReg() == vcc && !program->needs_vcc)
|
||||||
|
err |= ra_fail(output, loc, Location(), "Definition %d fixed to vcc but needs_vcc=false", i);
|
||||||
if (!assignments[def.tempId()].firstloc.block)
|
if (!assignments[def.tempId()].firstloc.block)
|
||||||
assignments[def.tempId()].firstloc = loc;
|
assignments[def.tempId()].firstloc = loc;
|
||||||
assignments[def.tempId()].defloc = loc;
|
assignments[def.tempId()].defloc = loc;
|
||||||
|
Reference in New Issue
Block a user