aco/ra: always reuse def register for literal copy

Foz-DB Vega10:
Totals from 4056 (6.43% of 63053) affected shaders:
Instrs: 5540797 -> 5540760 (-0.00%); split: -0.00%, +0.00%
CodeSize: 29680912 -> 29680812 (-0.00%); split: -0.00%, +0.00%
SGPRs: 307808 -> 307680 (-0.04%)
Latency: 82483179 -> 82484437 (+0.00%); split: -0.00%, +0.00%
InvThroughput: 45546645 -> 45547091 (+0.00%); split: -0.00%, +0.00%
SClause: 185641 -> 185648 (+0.00%); split: -0.00%, +0.01%
Copies: 642229 -> 642214 (-0.00%); split: -0.01%, +0.01%
SALU: 643132 -> 643107 (-0.00%); split: -0.01%, +0.01%

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30932>
This commit is contained in:
Georg Lehmann
2024-08-30 12:50:23 +02:00
committed by Marge Bot
parent 5fb54d1fde
commit 8f3bb1fb2e

View File

@@ -3242,18 +3242,14 @@ register_allocation(Program* program, ra_test_policy policy)
*/
if (instr->operands.size() && instr->operands[0].isLiteral() &&
program->gfx_level < GFX10) {
/* disable definitions and re-enable operands */
RegisterFile tmp_file(register_file);
for (const Definition& def : instr->definitions)
tmp_file.clear(def);
for (const Operand& op : instr->operands) {
if (op.isTemp() && op.isFirstKill())
tmp_file.block(op.physReg(), op.regClass());
}
/* Re-use the register we already allocated for the definition.
* This works because the instruction cannot have any other SGPR operand.
*/
Temp tmp = program->allocateTmp(instr->operands[0].size() == 2 ? s2 : s1);
ctx.assignments.emplace_back();
PhysReg reg = get_reg(ctx, tmp_file, tmp, parallelcopy, instr);
update_renames(ctx, register_file, parallelcopy, instr, rename_not_killed_ops);
const Definition& def =
instr->isVOPC() ? instr->definitions[0] : instr->definitions.back();
assert(def.regClass() == s2);
ctx.assignments.emplace_back(def.physReg(), tmp.regClass());
Instruction* copy =
create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO, 1, 1);
@@ -3261,10 +3257,10 @@ register_allocation(Program* program, ra_test_policy policy)
if (copy->operands[0].bytes() < 4)
copy->operands[0] = Operand::c32(copy->operands[0].constantValue());
copy->definitions[0] = Definition(tmp);
copy->definitions[0].setFixed(reg);
copy->definitions[0].setFixed(def.physReg());
instr->operands[0] = Operand(tmp);
instr->operands[0].setFixed(reg);
instr->operands[0].setFixed(def.physReg());
instr->operands[0].setFirstKill(true);
instructions.emplace_back(copy);