nir/opt_algebraic: optimize sign bit manipulation

libclc loves to generate the iand(0x7fffffff) pattern. ior/ixor patterns are
added for completeness.

Shaves 4 instructions off libclc vec4 normalize.

v2: Loop over the bit sizes (Georg).

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Marek Olšák <marek.olsak@amd.com> [v1]
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32398>
This commit is contained in:
Alyssa Rosenzweig
2024-11-28 17:39:44 -05:00
committed by Marge Bot
parent be049e1c14
commit 77d4ed0a01

View File

@@ -352,6 +352,18 @@ optimizations = [
('fexp2', ('fmulz', a, 'mb'))), {'mb': b}),
]
# Bitwise operations affecting the sign may be replaced by equivalent
# floating point operations, except possibly for denormal
# behaviour hence the is_only_used_as_float.
for sz in (16, 32, 64):
sign_bit = 1 << (sz - 1)
optimizations.extend([
(('iand(is_only_used_as_float)', f'a@{sz}', sign_bit - 1), ('fabs', a)),
(('ixor(is_only_used_as_float)', f'a@{sz}', sign_bit), ('fneg', a)),
(('ior(is_only_used_as_float)', f'a@{sz}', sign_bit), ('fneg', ('fabs', a))),
])
# Shorthand for the expansion of just the dot product part of the [iu]dp4a
# instructions.
sdot_4x8_a_b = ('iadd', ('iadd', ('imul', ('extract_i8', a, 0), ('extract_i8', b, 0)),