aco: remove -0.0 for 32 bit fsign with mul_legacy/omod when denorms are flushed
v_mul_legacy_f32 and omod return +0.0 if any operand is +0.0/-0.0. Foz-DB Navi21: Totals from 4289 (5.60% of 76572) affected shaders: Instrs: 8100571 -> 8099319 (-0.02%); split: -0.02%, +0.00% CodeSize: 43433200 -> 43435088 (+0.00%); split: -0.01%, +0.01% Latency: 88151566 -> 88147232 (-0.00%); split: -0.00%, +0.00% InvThroughput: 22966705 -> 22965192 (-0.01%); split: -0.01%, +0.00% VClause: 190010 -> 190009 (-0.00%) SClause: 269697 -> 269689 (-0.00%) Copies: 687294 -> 687296 (+0.00%); split: -0.00%, +0.00% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25347>
This commit is contained in:
@@ -2855,7 +2855,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand::c16(-1), src, Operand::c16(1u));
|
||||
bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
|
||||
} else if (dst.regClass() == v1) {
|
||||
src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::zero(), src);
|
||||
if (ctx->block->fp_mode.denorm32 == fp_denorm_flush) {
|
||||
/* If denormals are flushed, then v_mul_legacy_f32(2.0, src) can become omod. */
|
||||
src =
|
||||
bld.vop2(aco_opcode::v_mul_legacy_f32, bld.def(v1), Operand::c32(0x40000000), src);
|
||||
} else {
|
||||
src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::zero(), src);
|
||||
}
|
||||
src =
|
||||
bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand::c32(-1), src, Operand::c32(1u));
|
||||
bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src);
|
||||
|
Reference in New Issue
Block a user