nir/opt_algebraic: optimize sign bit manipulation

libclc loves to generate the iand(0x7fffffff) pattern. ior/ixor patterns are added for completeness. Shaves 4 instructions off libclc vec4 normalize. v2: Loop over the bit sizes (Georg). Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Marek Olšák <marek.olsak@amd.com> [v1] Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32398>
2024-11-28 17:39:44 -05:00
parent be049e1c14
commit 77d4ed0a01
1 changed files with 12 additions and 0 deletions
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -352,6 +352,18 @@ optimizations = [
    ('fexp2', ('fmulz', a, 'mb'))), {'mb': b}),
 ]

+# Bitwise operations affecting the sign may be replaced by equivalent
+# floating point operations, except possibly for denormal
+# behaviour hence the is_only_used_as_float.
+for sz in (16, 32, 64):
+    sign_bit = 1 << (sz - 1)
+
+    optimizations.extend([
+        (('iand(is_only_used_as_float)', f'a@{sz}', sign_bit - 1), ('fabs', a)),
+        (('ixor(is_only_used_as_float)', f'a@{sz}', sign_bit), ('fneg', a)),
+        (('ior(is_only_used_as_float)', f'a@{sz}', sign_bit), ('fneg', ('fabs', a))),
+    ])
+
 # Shorthand for the expansion of just the dot product part of the [iu]dp4a
 # instructions.
 sdot_4x8_a_b = ('iadd', ('iadd', ('imul', ('extract_i8', a, 0), ('extract_i8', b, 0)),