diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index b57bb83904a..81e4072a2b7 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -38,8 +38,10 @@ namespace aco { namespace { +struct ra_ctx; + unsigned get_subdword_operand_stride(chip_class chip, const aco_ptr& instr, unsigned idx, RegClass rc); -void add_subdword_operand(chip_class chip, aco_ptr& instr, unsigned idx, unsigned byte, RegClass rc); +void add_subdword_operand(ra_ctx& ctx, aco_ptr& instr, unsigned idx, unsigned byte, RegClass rc); std::pair get_subdword_definition_info(Program *program, const aco_ptr& instr, RegClass rc); void add_subdword_definition(Program *program, aco_ptr& instr, unsigned idx, PhysReg reg, bool is_partial); @@ -352,8 +354,22 @@ unsigned get_subdword_operand_stride(chip_class chip, const aco_ptr return 4; } -void add_subdword_operand(chip_class chip, aco_ptr& instr, unsigned idx, unsigned byte, RegClass rc) +void update_phi_map(ra_ctx& ctx, Instruction *old, Instruction *instr) { + for (Operand& op : instr->operands) { + if (!op.isTemp()) + continue; + std::unordered_map::iterator phi = ctx.phi_map.find(op.tempId()); + if (phi != ctx.phi_map.end()) { + phi->second.uses.erase(old); + phi->second.uses.emplace(instr); + } + } +} + +void add_subdword_operand(ra_ctx& ctx, aco_ptr& instr, unsigned idx, unsigned byte, RegClass rc) +{ + chip_class chip = ctx.program->chip_class; if (instr->format == Format::PSEUDO || byte == 0) return; @@ -376,7 +392,9 @@ void add_subdword_operand(chip_class chip, aco_ptr& instr, unsigned } return; } else if (can_use_SDWA(chip, instr)) { - convert_to_SDWA(chip, instr); + aco_ptr tmp = convert_to_SDWA(chip, instr); + if (tmp) + update_phi_map(ctx, tmp.get(), instr.get()); return; } else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) { VOP3A_instruction *vop3 = static_cast(instr.get()); @@ -2233,7 +2251,7 @@ void register_allocation(Program *program, std::vector& live_out_per_bloc if (op.isTemp() && op.isFirstKill() && op.isLateKill()) register_file.clear(op); if (op.isTemp() && op.physReg().byte() != 0) - add_subdword_operand(program->chip_class, instr, i, op.physReg().byte(), op.regClass()); + add_subdword_operand(ctx, instr, i, op.physReg().byte(), op.regClass()); } /* emit parallelcopy */ @@ -2366,19 +2384,9 @@ void register_allocation(Program *program, std::vector& live_out_per_bloc aco_ptr tmp = std::move(instr); Format format = asVOP3(tmp->format); instr.reset(create_instruction(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size())); - for (unsigned i = 0; i < instr->operands.size(); i++) { - Operand& operand = tmp->operands[i]; - instr->operands[i] = operand; - /* keep phi_map up to date */ - if (operand.isTemp()) { - std::unordered_map::iterator phi = ctx.phi_map.find(operand.tempId()); - if (phi != ctx.phi_map.end()) { - phi->second.uses.erase(tmp.get()); - phi->second.uses.emplace(instr.get()); - } - } - } + std::copy(tmp->operands.begin(), tmp->operands.end(), instr->operands.begin()); std::copy(tmp->definitions.begin(), tmp->definitions.end(), instr->definitions.begin()); + update_phi_map(ctx, tmp.get(), instr.get()); } instructions.emplace_back(std::move(*it));