aco/ra: use bitset for sgpr_operands_alias_defs
We cannot rely on SGPR Temps being fully aligned to 64 SGPRs. Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32217>
This commit is contained in:

committed by
Marge Bot

parent
a04e096339
commit
17da551133
@@ -2933,21 +2933,20 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
|
|||||||
pc.reset(create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO, parallelcopy.size(),
|
pc.reset(create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO, parallelcopy.size(),
|
||||||
parallelcopy.size()));
|
parallelcopy.size()));
|
||||||
bool linear_vgpr = false;
|
bool linear_vgpr = false;
|
||||||
bool sgpr_operands_alias_defs = false;
|
bool may_swap_sgprs = false;
|
||||||
uint64_t sgpr_operands[4] = {0, 0, 0, 0};
|
std::bitset<256> sgpr_operands;
|
||||||
for (unsigned i = 0; i < parallelcopy.size(); i++) {
|
for (unsigned i = 0; i < parallelcopy.size(); i++) {
|
||||||
linear_vgpr |= parallelcopy[i].first.regClass().is_linear_vgpr();
|
linear_vgpr |= parallelcopy[i].first.regClass().is_linear_vgpr();
|
||||||
|
|
||||||
if (!sgpr_operands_alias_defs && parallelcopy[i].first.isTemp() &&
|
if (!may_swap_sgprs && parallelcopy[i].first.isTemp() &&
|
||||||
parallelcopy[i].first.getTemp().type() == RegType::sgpr) {
|
parallelcopy[i].first.getTemp().type() == RegType::sgpr) {
|
||||||
unsigned reg = parallelcopy[i].first.physReg().reg();
|
unsigned op_reg = parallelcopy[i].first.physReg().reg();
|
||||||
unsigned size = parallelcopy[i].first.getTemp().size();
|
unsigned def_reg = parallelcopy[i].second.physReg().reg();
|
||||||
sgpr_operands[reg / 64u] |= u_bit_consecutive64(reg % 64u, size);
|
for (unsigned j = 0; j < parallelcopy[i].first.size(); j++) {
|
||||||
|
sgpr_operands.set(op_reg + j);
|
||||||
reg = parallelcopy[i].second.physReg().reg();
|
if (sgpr_operands.test(def_reg + j))
|
||||||
size = parallelcopy[i].second.getTemp().size();
|
may_swap_sgprs = true;
|
||||||
if (sgpr_operands[reg / 64u] & u_bit_consecutive64(reg % 64u, size))
|
}
|
||||||
sgpr_operands_alias_defs = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pc->operands[i] = parallelcopy[i].first;
|
pc->operands[i] = parallelcopy[i].first;
|
||||||
@@ -2961,7 +2960,7 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
|
|||||||
add_rename(ctx, orig, pc->definitions[i].getTemp());
|
add_rename(ctx, orig, pc->definitions[i].getTemp());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (temp_in_scc && (sgpr_operands_alias_defs || linear_vgpr)) {
|
if (temp_in_scc && (may_swap_sgprs || linear_vgpr)) {
|
||||||
/* disable definitions and re-enable operands */
|
/* disable definitions and re-enable operands */
|
||||||
RegisterFile tmp_file(register_file);
|
RegisterFile tmp_file(register_file);
|
||||||
for (const Definition& def : instr->definitions) {
|
for (const Definition& def : instr->definitions) {
|
||||||
@@ -2975,7 +2974,7 @@ emit_parallel_copy_internal(ra_ctx& ctx, std::vector<std::pair<Operand, Definiti
|
|||||||
|
|
||||||
handle_pseudo(ctx, tmp_file, pc.get());
|
handle_pseudo(ctx, tmp_file, pc.get());
|
||||||
} else {
|
} else {
|
||||||
pc->pseudo().needs_scratch_reg = sgpr_operands_alias_defs || linear_vgpr;
|
pc->pseudo().needs_scratch_reg = may_swap_sgprs || linear_vgpr;
|
||||||
pc->pseudo().tmp_in_scc = false;
|
pc->pseudo().tmp_in_scc = false;
|
||||||
pc->pseudo().scratch_sgpr = scc;
|
pc->pseudo().scratch_sgpr = scc;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user