aco/ra: set Pseudo_instruction::scratch_sgpr to SCC if it doesn't need to be preserved
Also ensure that 'needs_scratch_reg' is always true if SCC might be overwritten.
Few changes, because some p_split_vector get SCC as scratch reg assigned,
and thus, can inhibit some postRA optimizations.
Totals from 3 (0.00% of 79395) affected shaders: (Navi31)
Instrs: 10501 -> 10500 (-0.01%); split: -0.02%, +0.01%
CodeSize: 51580 -> 51520 (-0.12%); split: -0.12%, +0.01%
Latency: 84166 -> 84174 (+0.01%)
InvThroughput: 13109 -> 13111 (+0.02%)
SALU: 859 -> 860 (+0.12%)
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32217>
(cherry picked from commit a04e096339
)
This commit is contained in:

committed by
Dylan Baker

parent
09ad1fbdf2
commit
b1f8e15781
@@ -1124,7 +1124,7 @@
|
|||||||
"description": "aco/ra: set Pseudo_instruction::scratch_sgpr to SCC if it doesn't need to be preserved",
|
"description": "aco/ra: set Pseudo_instruction::scratch_sgpr to SCC if it doesn't need to be preserved",
|
||||||
"nominated": true,
|
"nominated": true,
|
||||||
"nomination_type": 1,
|
"nomination_type": 1,
|
||||||
"resolution": 0,
|
"resolution": 1,
|
||||||
"main_sha": null,
|
"main_sha": null,
|
||||||
"because_sha": null,
|
"because_sha": null,
|
||||||
"notes": null
|
"notes": null
|
||||||
|
@@ -1936,8 +1936,11 @@ handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (preserve_scc && it->second.def.getTemp().type() == RegType::sgpr)
|
if (it->second.def.getTemp().type() == RegType::sgpr) {
|
||||||
assert(!(it->second.def.physReg() == pi->scratch_sgpr));
|
assert(it->second.def.physReg() != pi->scratch_sgpr);
|
||||||
|
assert(pi->needs_scratch_reg);
|
||||||
|
assert(!preserve_scc || pi->scratch_sgpr != scc);
|
||||||
|
}
|
||||||
|
|
||||||
/* to resolve the cycle, we have to swap the src reg with the dst reg */
|
/* to resolve the cycle, we have to swap the src reg with the dst reg */
|
||||||
copy_operation swap = it->second;
|
copy_operation swap = it->second;
|
||||||
|
@@ -2039,12 +2039,17 @@ handle_pseudo(ra_ctx& ctx, const RegisterFile& reg_file, Instruction* instr)
|
|||||||
reads_linear = true;
|
reads_linear = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!writes_linear || !reads_linear || !reg_file[scc])
|
if (!writes_linear || !reads_linear)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
instr->pseudo().needs_scratch_reg = true;
|
instr->pseudo().needs_scratch_reg = true;
|
||||||
instr->pseudo().tmp_in_scc = reg_file[scc];
|
instr->pseudo().tmp_in_scc = reg_file[scc];
|
||||||
|
|
||||||
|
if (!reg_file[scc]) {
|
||||||
|
instr->pseudo().scratch_sgpr = scc;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
int reg = ctx.max_used_sgpr;
|
int reg = ctx.max_used_sgpr;
|
||||||
for (; reg >= 0 && reg_file[PhysReg{(unsigned)reg}]; reg--)
|
for (; reg >= 0 && reg_file[PhysReg{(unsigned)reg}]; reg--)
|
||||||
;
|
;
|
||||||
@@ -2933,18 +2938,16 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
|
|||||||
for (unsigned i = 0; i < parallelcopy.size(); i++) {
|
for (unsigned i = 0; i < parallelcopy.size(); i++) {
|
||||||
linear_vgpr |= parallelcopy[i].first.regClass().is_linear_vgpr();
|
linear_vgpr |= parallelcopy[i].first.regClass().is_linear_vgpr();
|
||||||
|
|
||||||
if (temp_in_scc && parallelcopy[i].first.isTemp() &&
|
if (!sgpr_operands_alias_defs && parallelcopy[i].first.isTemp() &&
|
||||||
parallelcopy[i].first.getTemp().type() == RegType::sgpr) {
|
parallelcopy[i].first.getTemp().type() == RegType::sgpr) {
|
||||||
if (!sgpr_operands_alias_defs) {
|
unsigned reg = parallelcopy[i].first.physReg().reg();
|
||||||
unsigned reg = parallelcopy[i].first.physReg().reg();
|
unsigned size = parallelcopy[i].first.getTemp().size();
|
||||||
unsigned size = parallelcopy[i].first.getTemp().size();
|
sgpr_operands[reg / 64u] |= u_bit_consecutive64(reg % 64u, size);
|
||||||
sgpr_operands[reg / 64u] |= u_bit_consecutive64(reg % 64u, size);
|
|
||||||
|
|
||||||
reg = parallelcopy[i].second.physReg().reg();
|
reg = parallelcopy[i].second.physReg().reg();
|
||||||
size = parallelcopy[i].second.getTemp().size();
|
size = parallelcopy[i].second.getTemp().size();
|
||||||
if (sgpr_operands[reg / 64u] & u_bit_consecutive64(reg % 64u, size))
|
if (sgpr_operands[reg / 64u] & u_bit_consecutive64(reg % 64u, size))
|
||||||
sgpr_operands_alias_defs = true;
|
sgpr_operands_alias_defs = true;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pc->operands[i] = parallelcopy[i].first;
|
pc->operands[i] = parallelcopy[i].first;
|
||||||
@@ -2974,6 +2977,7 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
|
|||||||
} else {
|
} else {
|
||||||
pc->pseudo().needs_scratch_reg = sgpr_operands_alias_defs || linear_vgpr;
|
pc->pseudo().needs_scratch_reg = sgpr_operands_alias_defs || linear_vgpr;
|
||||||
pc->pseudo().tmp_in_scc = false;
|
pc->pseudo().tmp_in_scc = false;
|
||||||
|
pc->pseudo().scratch_sgpr = scc;
|
||||||
}
|
}
|
||||||
|
|
||||||
instructions.emplace_back(std::move(pc));
|
instructions.emplace_back(std::move(pc));
|
||||||
@@ -3064,7 +3068,6 @@ register_allocation(Program* program, ra_test_policy policy)
|
|||||||
for (; instr_it != block.instructions.end(); ++instr_it) {
|
for (; instr_it != block.instructions.end(); ++instr_it) {
|
||||||
aco_ptr<Instruction>& instr = *instr_it;
|
aco_ptr<Instruction>& instr = *instr_it;
|
||||||
std::vector<std::pair<Operand, Definition>> parallelcopy;
|
std::vector<std::pair<Operand, Definition>> parallelcopy;
|
||||||
bool temp_in_scc = register_file[scc];
|
|
||||||
|
|
||||||
if (instr->opcode == aco_opcode::p_branch) {
|
if (instr->opcode == aco_opcode::p_branch) {
|
||||||
/* unconditional branches are handled after phis of the target */
|
/* unconditional branches are handled after phis of the target */
|
||||||
@@ -3121,6 +3124,7 @@ register_allocation(Program* program, ra_test_policy policy)
|
|||||||
ctx.war_hint.set(operand.physReg().reg() + j);
|
ctx.war_hint.set(operand.physReg().reg() + j);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
bool temp_in_scc = register_file[scc];
|
||||||
|
|
||||||
/* remove dead vars from register file */
|
/* remove dead vars from register file */
|
||||||
for (const Operand& op : instr->operands) {
|
for (const Operand& op : instr->operands) {
|
||||||
|
@@ -555,7 +555,7 @@ BEGIN_TEST(regalloc.linear_vgpr.compact_for_future_def)
|
|||||||
finish_ra_test(ra_test_policy());
|
finish_ra_test(ra_test_policy());
|
||||||
|
|
||||||
//~gfx8_cbranch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] scc:1 scratch:s1
|
//~gfx8_cbranch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] scc:1 scratch:s1
|
||||||
//~gfx8_branch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] scc:0 scratch:s0
|
//~gfx8_branch>> lv1: %ltmp2_2:v[29] = p_parallelcopy %ltmp2:v[28] scc:0 scratch:s253
|
||||||
aco_ptr<Instruction>& parallelcopy = program->blocks[0].instructions[6];
|
aco_ptr<Instruction>& parallelcopy = program->blocks[0].instructions[6];
|
||||||
aco_print_instr(program->gfx_level, parallelcopy.get(), output);
|
aco_print_instr(program->gfx_level, parallelcopy.get(), output);
|
||||||
if (parallelcopy->isPseudo()) {
|
if (parallelcopy->isPseudo()) {
|
||||||
|
Reference in New Issue
Block a user