From 79b767f4c08f85368912d5590146850a0c6e8654 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 22 Sep 2023 13:48:43 +0200 Subject: [PATCH] aco: remove -0.0 for 32 bit fsign with mul_legacy/omod when denorms are flushed v_mul_legacy_f32 and omod return +0.0 if any operand is +0.0/-0.0. Foz-DB Navi21: Totals from 4289 (5.60% of 76572) affected shaders: Instrs: 8100571 -> 8099319 (-0.02%); split: -0.02%, +0.00% CodeSize: 43433200 -> 43435088 (+0.00%); split: -0.01%, +0.01% Latency: 88151566 -> 88147232 (-0.00%); split: -0.00%, +0.00% InvThroughput: 22966705 -> 22965192 (-0.01%); split: -0.01%, +0.00% VClause: 190010 -> 190009 (-0.00%) SClause: 269697 -> 269689 (-0.00%) Copies: 687294 -> 687296 (+0.00%); split: -0.00%, +0.00% Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 154553e385d..ab712f1441b 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2855,7 +2855,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand::c16(-1), src, Operand::c16(1u)); bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src); } else if (dst.regClass() == v1) { - src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::zero(), src); + if (ctx->block->fp_mode.denorm32 == fp_denorm_flush) { + /* If denormals are flushed, then v_mul_legacy_f32(2.0, src) can become omod. */ + src = + bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), Operand::c32(0x40000000), src); + } else { + src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::zero(), src); + } src = bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand::c32(-1), src, Operand::c32(1u)); bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src);