Revert "nir: replace lower_ffma and fuse_ffma with has_ffma"

This reverts commit 939ddf3f67. Intel has a separate pass for fusing FFMAs selectively. We split these flags in commit 1b72c31e1f and the reasoning still stands. The patch being reverted was just a cleanup, so there should be no issue with reverting it. Acked-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6849>
2020-09-24 08:46:31 -07:00
parent d8cdcd4adf
commit 140f53e646
19 changed files with 84 additions and 42 deletions
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3055,9 +3055,12 @@ typedef enum {

 typedef struct nir_shader_compiler_options {
   bool lower_fdiv;
-   bool has_ffma16;
-   bool has_ffma32;
-   bool has_ffma64;
+   bool lower_ffma16;
+   bool lower_ffma32;
+   bool lower_ffma64;
+   bool fuse_ffma16;
+   bool fuse_ffma32;
+   bool fuse_ffma64;
   bool lower_flrp16;
   bool lower_flrp32;
   /** Lowers flrp when it does not support doubles */
--- a/src/compiler/nir/nir_lower_flrp.c
+++ b/src/compiler/nir/nir_lower_flrp.c
@@ -370,11 +370,11 @@ convert_flrp_instruction(nir_builder *bld,
   unsigned bit_size = nir_dest_bit_size(alu->dest.dest);

   if (bit_size == 16)
-      have_ffma = bld->shader->options->has_ffma16;
+      have_ffma = !bld->shader->options->lower_ffma16;
   else if (bit_size == 32)
-      have_ffma = bld->shader->options->has_ffma32;
+      have_ffma = !bld->shader->options->lower_ffma32;
   else if (bit_size == 64)
-      have_ffma = bld->shader->options->has_ffma64;
+      have_ffma = !bld->shader->options->lower_ffma64;
   else
      unreachable("invalid bit_size");

--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -193,13 +193,13 @@ optimizations.extend([
   (('fadd', a, ('fneg', ('ffract', a))), ('ffloor', a), '!options->lower_ffloor'),
   (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
   (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
-   (('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), '!options->has_ffma16'),
-   (('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), '!options->has_ffma32'),
-   (('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), '!options->has_ffma64'),
+   (('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'),
+   (('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'),
+   (('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'),
   # Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late).
-   (('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->has_ffma16'),
-   (('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->has_ffma32'),
-   (('~ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->has_ffma64'),
+   (('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'),
+   (('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'),
+   (('~ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'),

   (('~fmul', ('fadd', ('iand', ('ineg', ('b2i', 'a@bool')), ('fmul', b, c)), '#d'), '#e'),
    ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))),
@@ -2032,9 +2032,9 @@ late_optimizations = [
   (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
   (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
   (('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'),
-   (('~fadd@16', ('fmul', a, b), c), ('ffma', a, b, c), 'options->has_ffma16'),
-   (('~fadd@32', ('fmul', a, b), c), ('ffma', a, b, c), 'options->has_ffma32'),
-   (('~fadd@64', ('fmul', a, b), c), ('ffma', a, b, c), 'options->has_ffma64'),
+   (('~fadd@16', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma16'),
+   (('~fadd@32', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma32'),
+   (('~fadd@64', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma64'),

   # These are duplicated from the main optimizations table.  The late
   # patterns that rearrange expressions like x - .5 < 0 to x < .5 can create