diff --git a/.pick_status.json b/.pick_status.json index e8d4d71314d..3e4d39bb462 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -104,7 +104,7 @@ "description": "aco: swap the correct v_mov_b32 if there are two of them", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "408fa33c092810155baac342de90fd712231aa89", "notes": null diff --git a/src/amd/compiler/aco_scheduler_ilp.cpp b/src/amd/compiler/aco_scheduler_ilp.cpp index 4d3ab25ef7b..886d2f7b15f 100644 --- a/src/amd/compiler/aco_scheduler_ilp.cpp +++ b/src/amd/compiler/aco_scheduler_ilp.cpp @@ -758,13 +758,17 @@ create_vopd_instruction(const SchedILPContext& ctx, unsigned idx, bool prev_can_ if (x_info.src_banks & y_info.src_banks) { assert(x_info.is_commutative || y_info.is_commutative); /* Avoid swapping v_mov_b32 because it will become an OPY-only opcode. */ - if (x_info.op == aco_opcode::v_dual_mov_b32 && !y_info.is_commutative) { + if (x_info.op == aco_opcode::v_dual_mov_b32 && y_info.op == aco_opcode::v_dual_mov_b32) { + swap_x = x_info.is_opy_only; + swap_y = !swap_x; + } else if (x_info.op == aco_opcode::v_dual_mov_b32 && !y_info.is_commutative) { swap_x = true; x_info.is_opy_only = true; } else { swap_x = x_info.is_commutative && x_info.op != aco_opcode::v_dual_mov_b32; swap_y = y_info.is_commutative && !swap_x; } + y_info.is_opy_only |= swap_y && y_info.op == aco_opcode::v_dual_mov_b32; } if (x_info.is_opy_only) { diff --git a/src/amd/compiler/tests/test_scheduler.cpp b/src/amd/compiler/tests/test_scheduler.cpp index bdb75243d1f..f48a7ec91e8 100644 --- a/src/amd/compiler/tests/test_scheduler.cpp +++ b/src/amd/compiler/tests/test_scheduler.cpp @@ -162,6 +162,7 @@ BEGIN_TEST(vopd_sched.war) PhysReg reg_v0{256}; PhysReg reg_v1{257}; PhysReg reg_v3{259}; + PhysReg reg_v5{261}; //>> p_unit_test 0 //~gfx11! v1: %0:v[1] = v_dual_add_f32 %0:v[3], %0:v[1] :: v1: %0:v[0] = v_dual_mul_f32 %0:v[1], %0:v[3] @@ -184,6 +185,14 @@ BEGIN_TEST(vopd_sched.war) bld.vop2(aco_opcode::v_mul_f32, Definition(reg_v1, v1), Operand(reg_v3, v1), Operand(reg_v1, v1)); + /* Test that we swap the right v_mov_b32. */ + //>> p_unit_test 2 + //~gfx11! v1: %0:v[1] = v_dual_mov_b32 %0:v[5] :: v1: %0:v[0] = v_dual_add_nc_u32 0, %0:v[1] + //~gfx12! v1: %0:v[0] = v_dual_mov_b32 %0:v[1] :: v1: %0:v[1] = v_dual_add_nc_u32 0, %0:v[5] + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); + bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v1, v1)); + bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v1, v1), Operand(reg_v5, v1)); + finish_schedule_vopd_test(); } END_TEST