From 966cff9cfa4e6c226c76b332da57f63f0782eb7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 9 Sep 2021 08:27:21 +0200 Subject: [PATCH] aco/isel: Fix emit_vop2_instruction to apply 16/24-bit flags properly. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously it used a builder function but didn't use the return value from that function, so the flags were not applied. Signed-off-by: Timur Kristóf Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 04c6a7216d3..ef0e4fc6e67 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -833,7 +833,7 @@ emit_sop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Te } void -emit_vop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst, +emit_vop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode opc, Temp dst, bool commutative, bool swap_srcs = false, bool flush_denorms = false, bool nuw = false, uint8_t uses_ub = 0) { @@ -852,28 +852,27 @@ emit_vop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Te } } - Operand op0(src0); - Operand op1(src1); + Operand op[2] = {Operand(src0), Operand(src1)}; for (int i = 0; i < 2; i++) { if (uses_ub & (1 << i)) { uint32_t src_ub = get_alu_src_ub(ctx, instr, swap_srcs ? !i : i); if (src_ub <= 0xffff) - bld.set16bit(i ? op1 : op0); + op[i].set16bit(true); else if (src_ub <= 0xffffff) - bld.set24bit(i ? op1 : op0); + op[i].set24bit(true); } } if (flush_denorms && ctx->program->chip_class < GFX9) { assert(dst.size() == 1); - Temp tmp = bld.vop2(op, bld.def(v1), op0, op1); + Temp tmp = bld.vop2(opc, bld.def(v1), op[0], op[1]); bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand::c32(0x3f800000u), tmp); } else { if (nuw) { - bld.nuw().vop2(op, Definition(dst), op0, op1); + bld.nuw().vop2(opc, Definition(dst), op[0], op[1]); } else { - bld.vop2(op, Definition(dst), op0, op1); + bld.vop2(opc, Definition(dst), op[0], op[1]); } } } @@ -1646,7 +1645,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_lshlrev_b16, dst, true); } else if (dst.regClass() == v1) { emit_vop2_instruction(ctx, instr, aco_opcode::v_lshlrev_b32, dst, false, true, false, - false, 1); + false, 2); } else if (dst.regClass() == v2 && ctx->program->chip_class >= GFX8) { bld.vop3(aco_opcode::v_lshlrev_b64, Definition(dst), get_alu_src(ctx, instr->src[1]), get_alu_src(ctx, instr->src[0]));