From a257e2daad983204abf9ba47856f9ace0bc79b05 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 10 Aug 2023 14:12:37 -0400 Subject: [PATCH] nir: Lower fquantize2f16 Passes dEQP-VK.spirv_assembly.*opquantize*. Unlike the DXIL lowering, this should correctly handle NaNs. (I belive Dozen has a bug here that is masked by running constant folding early and poor CTS coverage.) It is also faster than the DXIL lowering for hardware that supports f2f16 conversions natively. It is not as good as a backend implementation that could flush-to-zero in hardware... but for a debug instruction it should be more than good enough. It might be slightly better to multiply with 0.0 to get the appropriate zero, but NIR really likes optimizing that out ... Signed-off-by: Alyssa Rosenzweig Reviewed-by: Georg Lehmann Part-of: --- src/compiler/nir/nir.h | 3 +++ src/compiler/nir/nir_opt_algebraic.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 6b5414dca49..31bc0f68518 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3847,6 +3847,9 @@ typedef struct nir_shader_compiler_options { * type casts (e.g. f2f16). */ bool preserve_mediump; + + /** lowers fquantize2f16 to alu ops. */ + bool lower_fquantize2f16; } nir_shader_compiler_options; typedef struct nir_shader { diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 374ddb68126..a430aa1bca1 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2729,6 +2729,21 @@ optimizations.extend([ (('fisnormal', 'a@64'), ('ult', 0x3fffffffffffff, ('iadd', ('ishl', a, 1), 0x20000000000000)), 'options->lower_fisnormal') ]) + +""" + if (fabs(val) < SMALLEST_NORMALIZED_FLOAT16) + return (val & SIGN_BIT) /* +0.0 or -0.0 as appropriate */; + else + return f2f32(f2f16(val)); +""" +optimizations.extend([ + (('fquantize2f16', 'a@32'), + ('bcsel', ('!flt', ('!fabs', a), math.ldexp(1.0, -14)), + ('iand', a, 1 << 31), + ('!f2f32', ('!f2f16_rtne', a))), + 'options->lower_fquantize2f16') + ]) + for s in range(0, 31): mask = 0xffffffff << s