aco: remove -0.0 for 32 bit fsign with mul_legacy/omod when denorms are flushed

v_mul_legacy_f32 and omod return +0.0 if any operand is +0.0/-0.0.

Foz-DB Navi21:
Totals from 4289 (5.60% of 76572) affected shaders:
Instrs: 8100571 -> 8099319 (-0.02%); split: -0.02%, +0.00%
CodeSize: 43433200 -> 43435088 (+0.00%); split: -0.01%, +0.01%
Latency: 88151566 -> 88147232 (-0.00%); split: -0.00%, +0.00%
InvThroughput: 22966705 -> 22965192 (-0.01%); split: -0.01%, +0.00%
VClause: 190010 -> 190009 (-0.00%)
SClause: 269697 -> 269689 (-0.00%)
Copies: 687294 -> 687296 (+0.00%); split: -0.00%, +0.00%

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25347>
This commit is contained in:
Georg Lehmann
2023-09-22 13:48:43 +02:00
committed by Marge Bot
parent 9508cadadb
commit 79b767f4c0

View File

@@ -2855,7 +2855,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand::c16(-1), src, Operand::c16(1u));
bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
} else if (dst.regClass() == v1) {
src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::zero(), src);
if (ctx->block->fp_mode.denorm32 == fp_denorm_flush) {
/* If denormals are flushed, then v_mul_legacy_f32(2.0, src) can become omod. */
src =
bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), Operand::c32(0x40000000), src);
} else {
src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::zero(), src);
}
src =
bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand::c32(-1), src, Operand::c32(1u));
bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src);