aco: turn v_mov_b32 into addition to create VOPD instructions

fossil-db (navi31, wave32):
Totals from 15655 (19.76% of 79242) affected shaders:
Instrs: 10699119 -> 10688239 (-0.10%); split: -0.11%, +0.00%
CodeSize: 61290308 -> 61288596 (-0.00%); split: -0.01%, +0.00%
Latency: 89159743 -> 89150355 (-0.01%); split: -0.01%, +0.00%
InvThroughput: 16966295 -> 16955427 (-0.06%); split: -0.07%, +0.00%
VALU: 5484626 -> 5473993 (-0.19%); split: -0.20%, +0.00%
VOPD: 1446725 -> 1457358 (+0.73%); split: +0.74%, -0.01%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27485>
This commit is contained in:
Rhys Perry
2024-02-07 11:16:18 +00:00
committed by Marge Bot
parent 65dfb27f8f
commit ea92aea9f2

View File

@@ -138,10 +138,7 @@ get_vopd_info(const Instruction* instr)
case aco_opcode::v_sub_f32: info.op = aco_opcode::v_dual_sub_f32; break;
case aco_opcode::v_subrev_f32: info.op = aco_opcode::v_dual_subrev_f32; break;
case aco_opcode::v_mul_legacy_f32: info.op = aco_opcode::v_dual_mul_dx9_zero_f32; break;
case aco_opcode::v_mov_b32:
info.op = aco_opcode::v_dual_mov_b32;
info.is_commutative = false;
break;
case aco_opcode::v_mov_b32: info.op = aco_opcode::v_dual_mov_b32; break;
case aco_opcode::v_cndmask_b32:
info.op = aco_opcode::v_dual_cndmask_b32;
info.is_commutative = false;
@@ -222,6 +219,14 @@ is_vopd_compatible(const VOPDInfo& a, const VOPDInfo& b)
if ((a_src_banks & b.src_banks) != 0)
return false;
/* If we have to turn v_mov_b32 into v_add_u32 but there is already an OPY-only instruction,
* we can't do it.
*/
if (a.op == aco_opcode::v_dual_mov_b32 && !b.is_commutative && b.is_opy_only)
return false;
if (b.op == aco_opcode::v_dual_mov_b32 && !a.is_commutative && a.is_opy_only)
return false;
return true;
}
@@ -634,7 +639,12 @@ get_vopd_opcode_operands(Instruction* instr, const VOPDInfo& info, bool swap, ac
*num_operands += instr->operands.size();
std::copy(instr->operands.begin(), instr->operands.end(), operands);
if (swap) {
if (swap && info.op == aco_opcode::v_dual_mov_b32) {
*op = aco_opcode::v_dual_add_nc_u32;
(*num_operands)++;
operands[0] = Operand::zero();
operands[1] = instr->operands[0];
} else if (swap) {
if (info.op == aco_opcode::v_dual_sub_f32)
*op = aco_opcode::v_dual_subrev_f32;
else if (info.op == aco_opcode::v_dual_subrev_f32)
@@ -654,8 +664,14 @@ create_vopd_instruction(const SchedILPContext& ctx, unsigned idx)
bool swap_x = false, swap_y = false;
if (x_info.src_banks & y_info.src_banks) {
assert(x_info.is_commutative || y_info.is_commutative);
swap_x = x_info.is_commutative;
swap_y = y_info.is_commutative && !swap_x;
/* Avoid swapping v_mov_b32 because it will become an OPY-only opcode. */
if (x_info.op == aco_opcode::v_dual_mov_b32 && !y_info.is_commutative) {
swap_x = true;
x_info.is_opy_only = true;
} else {
swap_x = x_info.is_commutative && x_info.op != aco_opcode::v_dual_mov_b32;
swap_y = y_info.is_commutative && !swap_x;
}
}
if (x_info.is_opy_only) {