nir/opt_algebraic: use fmulz for fpow lowering to fix incorrect rendering

The original implementation in all radeon drivers had this behavior.

Fixes: 9bc1fb4c07 - ac/llvm,radeonsi: lower nir_fpow for aco and llvm
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11464

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30069>
This commit is contained in:
Marek Olšák
2024-07-08 07:56:20 -04:00
committed by Marge Bot
parent ec4e5ef0f7
commit ecfefe823e

View File

@@ -340,6 +340,8 @@ optimizations = [
*add_fabs_fneg((('bcsel(nsz,nnan,ninf)', ignore_exact('feq', b, 0.0), 1.0, ('fexp2', ('fmul@32', a, 'mb'))),
('fexp2', ('fmulz', a, 'mb')),
has_fmulz), {'mb': b}),
*add_fabs_fneg((('bcsel', ignore_exact('feq', b, 0.0), 1.0, ('fexp2', ('fmulz', a, 'mb'))),
('fexp2', ('fmulz', a, 'mb'))), {'mb': b}),
]
# Shorthand for the expansion of just the dot product part of the [iu]dp4a
@@ -1518,6 +1520,8 @@ optimizations.extend([
# Exponential/logarithmic identities
(('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
(('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
# 32-bit fpow should use fmulz to fix https://gitlab.freedesktop.org/mesa/mesa/-/issues/11464 (includes apitrace)
(('fpow@32', a, b), ('fexp2', ('fmulz', ('flog2', a), b)), 'options->lower_fpow && ' + has_fmulz), # a^b = 2^(lg2(a)*b)
(('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
(('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
(('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),