nir: Lower fquantize2f16

Passes dEQP-VK.spirv_assembly.*opquantize*. Unlike the DXIL lowering, this should correctly handle NaNs. (I belive Dozen has a bug here that is masked by running constant folding early and poor CTS coverage.) It is also faster than the DXIL lowering for hardware that supports f2f16 conversions natively. It is not as good as a backend implementation that could flush-to-zero in hardware... but for a debug instruction it should be more than good enough. It might be slightly better to multiply with 0.0 to get the appropriate zero, but NIR really likes optimizing that out ... Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24616>
2023-08-10 14:12:37 -04:00
parent 05cb55abe8
commit a257e2daad
2 changed files with 18 additions and 0 deletions
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3847,6 +3847,9 @@ typedef struct nir_shader_compiler_options {
    *  type casts (e.g. f2f16).
    */
   bool preserve_mediump;
+
+   /** lowers fquantize2f16 to alu ops. */
+   bool lower_fquantize2f16;
 } nir_shader_compiler_options;

 typedef struct nir_shader {
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -2729,6 +2729,21 @@ optimizations.extend([
    (('fisnormal', 'a@64'), ('ult', 0x3fffffffffffff, ('iadd', ('ishl', a, 1), 0x20000000000000)), 'options->lower_fisnormal')
    ])

+
+"""
+  if (fabs(val) < SMALLEST_NORMALIZED_FLOAT16)
+     return (val & SIGN_BIT) /* +0.0 or -0.0 as appropriate */;
+  else
+     return f2f32(f2f16(val));
+"""
+optimizations.extend([
+    (('fquantize2f16', 'a@32'),
+     ('bcsel', ('!flt', ('!fabs', a), math.ldexp(1.0, -14)),
+               ('iand', a, 1 << 31),
+               ('!f2f32', ('!f2f16_rtne', a))),
+     'options->lower_fquantize2f16')
+    ])
+
 for s in range(0, 31):
    mask = 0xffffffff << s