nir: Lower fquantize2f16
Passes dEQP-VK.spirv_assembly.*opquantize*. Unlike the DXIL lowering, this should correctly handle NaNs. (I belive Dozen has a bug here that is masked by running constant folding early and poor CTS coverage.) It is also faster than the DXIL lowering for hardware that supports f2f16 conversions natively. It is not as good as a backend implementation that could flush-to-zero in hardware... but for a debug instruction it should be more than good enough. It might be slightly better to multiply with 0.0 to get the appropriate zero, but NIR really likes optimizing that out ... Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24616>
This commit is contained in:

committed by
Marge Bot

parent
05cb55abe8
commit
a257e2daad
@@ -3847,6 +3847,9 @@ typedef struct nir_shader_compiler_options {
|
||||
* type casts (e.g. f2f16).
|
||||
*/
|
||||
bool preserve_mediump;
|
||||
|
||||
/** lowers fquantize2f16 to alu ops. */
|
||||
bool lower_fquantize2f16;
|
||||
} nir_shader_compiler_options;
|
||||
|
||||
typedef struct nir_shader {
|
||||
|
@@ -2729,6 +2729,21 @@ optimizations.extend([
|
||||
(('fisnormal', 'a@64'), ('ult', 0x3fffffffffffff, ('iadd', ('ishl', a, 1), 0x20000000000000)), 'options->lower_fisnormal')
|
||||
])
|
||||
|
||||
|
||||
"""
|
||||
if (fabs(val) < SMALLEST_NORMALIZED_FLOAT16)
|
||||
return (val & SIGN_BIT) /* +0.0 or -0.0 as appropriate */;
|
||||
else
|
||||
return f2f32(f2f16(val));
|
||||
"""
|
||||
optimizations.extend([
|
||||
(('fquantize2f16', 'a@32'),
|
||||
('bcsel', ('!flt', ('!fabs', a), math.ldexp(1.0, -14)),
|
||||
('iand', a, 1 << 31),
|
||||
('!f2f32', ('!f2f16_rtne', a))),
|
||||
'options->lower_fquantize2f16')
|
||||
])
|
||||
|
||||
for s in range(0, 31):
|
||||
mask = 0xffffffff << s
|
||||
|
||||
|
Reference in New Issue
Block a user