aco/ra: use bitset for sgpr_operands_alias_defs

We cannot rely on SGPR Temps being fully aligned to 64 SGPRs.

Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32217>
(cherry picked from commit 17da551133)
This commit is contained in:
Daniel Schürmann
2024-11-19 15:35:55 +01:00
committed by Dylan Baker
parent b1f8e15781
commit 9448cd6071
2 changed files with 13 additions and 14 deletions

View File

@@ -1114,7 +1114,7 @@
"description": "aco/ra: use bitset for sgpr_operands_alias_defs",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View File

@@ -2933,21 +2933,20 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
pc.reset(create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO, parallelcopy.size(),
parallelcopy.size()));
bool linear_vgpr = false;
bool sgpr_operands_alias_defs = false;
uint64_t sgpr_operands[4] = {0, 0, 0, 0};
bool may_swap_sgprs = false;
std::bitset<256> sgpr_operands;
for (unsigned i = 0; i < parallelcopy.size(); i++) {
linear_vgpr |= parallelcopy[i].first.regClass().is_linear_vgpr();
if (!sgpr_operands_alias_defs && parallelcopy[i].first.isTemp() &&
if (!may_swap_sgprs && parallelcopy[i].first.isTemp() &&
parallelcopy[i].first.getTemp().type() == RegType::sgpr) {
unsigned reg = parallelcopy[i].first.physReg().reg();
unsigned size = parallelcopy[i].first.getTemp().size();
sgpr_operands[reg / 64u] |= u_bit_consecutive64(reg % 64u, size);
reg = parallelcopy[i].second.physReg().reg();
size = parallelcopy[i].second.getTemp().size();
if (sgpr_operands[reg / 64u] & u_bit_consecutive64(reg % 64u, size))
sgpr_operands_alias_defs = true;
unsigned op_reg = parallelcopy[i].first.physReg().reg();
unsigned def_reg = parallelcopy[i].second.physReg().reg();
for (unsigned j = 0; j < parallelcopy[i].first.size(); j++) {
sgpr_operands.set(op_reg + j);
if (sgpr_operands.test(def_reg + j))
may_swap_sgprs = true;
}
}
pc->operands[i] = parallelcopy[i].first;
@@ -2961,7 +2960,7 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
add_rename(ctx, orig, pc->definitions[i].getTemp());
}
if (temp_in_scc && (sgpr_operands_alias_defs || linear_vgpr)) {
if (temp_in_scc && (may_swap_sgprs || linear_vgpr)) {
/* disable definitions and re-enable operands */
RegisterFile tmp_file(register_file);
for (const Definition& def : instr->definitions) {
@@ -2975,7 +2974,7 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
handle_pseudo(ctx, tmp_file, pc.get());
} else {
pc->pseudo().needs_scratch_reg = sgpr_operands_alias_defs || linear_vgpr;
pc->pseudo().needs_scratch_reg = may_swap_sgprs || linear_vgpr;
pc->pseudo().tmp_in_scc = false;
pc->pseudo().scratch_sgpr = scc;
}