diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index aa85f933938..28c3d445262 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -795,6 +795,19 @@ public: } } + /* Value if this were used with vop3/opsel or vop3p. */ + constexpr uint16_t constantValue16(bool opsel) const noexcept + { + assert(bytes() == 2 || bytes() == 4); + if (opsel) { + if (bytes() == 2 && int16_t(data_.i) >= -16 && int16_t(data_.i) <= 64 && !isLiteral()) + return int16_t(data_.i) >> 16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */ + else + return data_.i >> 16; + } + return data_.i; + } + constexpr bool isOfType(RegType type) const noexcept { return hasRegClass() && regClass().type() == type; diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 87052efdd2b..ef5403d9987 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -201,7 +201,8 @@ struct ssa_info { val = constant; /* check that no upper bits are lost in case of packed 16bit constants */ - if (gfx_level >= GFX8 && !op16.isLiteral() && op16.constantValue64() == constant) + if (gfx_level >= GFX8 && !op16.isLiteral() && + op16.constantValue16(true) == ((constant >> 16) & 0xffff)) add_label(label_constant_16bit); if (!op32.isLiteral()) @@ -2841,11 +2842,12 @@ combine_clamp(opt_ctx& ctx, aco_ptr& instr, aco_opcode min, aco_opc uint32_t const0 = 0, const1 = 0; for (int i = 0; i < 3; i++) { uint32_t val; + bool hi16 = opsel & (1 << i); if (operands[i].isConstant()) { - val = operands[i].constantValue(); + val = hi16 ? operands[i].constantValue16(true) : operands[i].constantValue(); } else if (operands[i].isTemp() && ctx.info[operands[i].tempId()].is_constant_or_literal(32)) { - val = ctx.info[operands[i].tempId()].val; + val = ctx.info[operands[i].tempId()].val >> (hi16 ? 16 : 0); } else { continue; } @@ -2860,11 +2862,6 @@ combine_clamp(opt_ctx& ctx, aco_ptr& instr, aco_opcode min, aco_opc if (const0_idx < 0 || const1_idx < 0) continue; - if (opsel & (1 << const0_idx)) - const0 >>= 16; - if (opsel & (1 << const1_idx)) - const1 >>= 16; - int lower_idx = const0_idx; switch (min) { case aco_opcode::v_min_f32: