From aeb68c29b48f8b138e0eaa21cacec9e5712ca8ed Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 27 Feb 2023 23:38:22 +0100 Subject: [PATCH] nir/opt_algebraic: add patterns for iand/ior of feq/fneu with 0 Foz-DB Navi21: Totals from 1245 (0.92% of 134913) affected shaders: VGPRs: 66232 -> 66248 (+0.02%); split: -0.01%, +0.04% CodeSize: 5874976 -> 5868168 (-0.12%); split: -0.17%, +0.05% MaxWaves: 25278 -> 25274 (-0.02%); split: +0.01%, -0.02% Instrs: 1087502 -> 1085267 (-0.21%); split: -0.21%, +0.00% Latency: 6531489 -> 6531672 (+0.00%); split: -0.04%, +0.05% InvThroughput: 1531774 -> 1532327 (+0.04%); split: -0.02%, +0.05% VClause: 22218 -> 22202 (-0.07%); split: -0.08%, +0.00% SClause: 45906 -> 45873 (-0.07%); split: -0.08%, +0.01% Copies: 64004 -> 64102 (+0.15%); split: -0.24%, +0.39% Branches: 21529 -> 21534 (+0.02%); split: -0.00%, +0.03% PreSGPRs: 51936 -> 51850 (-0.17%) PreVGPRs: 55393 -> 55398 (+0.01%); split: -0.02%, +0.03% Reviewed-by: Ian Romanick Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 9d2fbb55c05..60957c8c5da 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -931,6 +931,11 @@ for s in [16, 32, 64]: (('~iand', ('fge(is_used_once)', 0.0, 'a@{}'.format(s)), ('fge', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmax', a, ('fneg', b)))), (('~iand', ('fge', 0.0, 'a@{}'.format(s)), ('fge(is_used_once)', 'b@{}'.format(s), 0.0)), ('fge', 0.0, ('fmax', a, ('fneg', b)))), + (('ior', ('feq(is_used_once)', 'a@{}'.format(s), 0.0), ('feq', 'b@{}'.format(s), 0.0)), ('feq', ('fmin', ('fabs', a), ('fabs', b)), 0.0)), + (('ior', ('fneu(is_used_once)', 'a@{}'.format(s), 0.0), ('fneu', 'b@{}'.format(s), 0.0)), ('fneu', ('fadd', ('fabs', a), ('fabs', b)), 0.0)), + (('iand', ('feq(is_used_once)', 'a@{}'.format(s), 0.0), ('feq', 'b@{}'.format(s), 0.0)), ('feq', ('fadd', ('fabs', a), ('fabs', b)), 0.0)), + (('iand', ('fneu(is_used_once)', 'a@{}'.format(s), 0.0), ('fneu', 'b@{}'.format(s), 0.0)), ('fneu', ('fmin', ('fabs', a), ('fabs', b)), 0.0)), + # The (i2f32, ...) part is an open-coded fsign. When that is combined # with the bcsel, it's basically copysign(1.0, a). There are some # behavior differences between this pattern and copysign w.r.t. ±0 and