nir/range-analysis: Adjust result range of multiplication to account for flush-to-zero

Fixes piglit tests (new in piglit!110): - fs-underflow-fma-compare-zero.shader_test - fs-underflow-mul-compare-zero.shader_test v2: Add back part of comment accidentally deleted. Noticed by Caio. Remove is_not_zero function as it is no longer used. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111308 Fixes: fa116ce357 ("nir/range-analysis: Range tracking for ffma and flrp") Fixes: 405de7ccb6 ("nir/range-analysis: Rudimentary value range analysis pass") Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> All Gen7+ platforms** had similar results. (Ice Lake shown) total instructions in shared programs: 16278465 -> 16279492 (<.01%) instructions in affected programs: 16765 -> 17792 (6.13%) helped: 0 HURT: 23 HURT stats (abs) min: 7 max: 275 x̄: 44.65 x̃: 8 HURT stats (rel) min: 1.15% max: 17.51% x̄: 4.23% x̃: 1.62% 95% mean confidence interval for instructions value: 9.57 79.74 95% mean confidence interval for instructions %-change: 1.85% 6.61% Instructions are HURT. total cycles in shared programs: 367135159 -> 367154270 (<.01%) cycles in affected programs: 279306 -> 298417 (6.84%) helped: 0 HURT: 23 HURT stats (abs) min: 13 max: 6029 x̄: 830.91 x̃: 54 HURT stats (rel) min: 0.17% max: 45.67% x̄: 7.33% x̃: 0.49% 95% mean confidence interval for cycles value: 100.89 1560.94 95% mean confidence interval for cycles %-change: 0.94% 13.71% Cycles are HURT. total spills in shared programs: 8870 -> 8869 (-0.01%) spills in affected programs: 19 -> 18 (-5.26%) helped: 1 HURT: 0 total fills in shared programs: 21904 -> 21901 (-0.01%) fills in affected programs: 81 -> 78 (-3.70%) helped: 1 HURT: 0 LOST: 0 GAINED: 1 ** On Broadwell, a shader was hurt for spills / fills instead of helped. No changes on any earlier platforms.
2019-08-09 10:55:49 -07:00
parent 33ad2bab4b
commit ef2e235252
1 changed files with 22 additions and 31 deletions
--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@@ -31,12 +31,6 @@
 * the result.
 */

-static bool
-is_not_zero(enum ssa_ranges r)
-{
-   return r == gt_zero || r == lt_zero || r == ne_zero;
-}
-
 static void *
 pack_data(const struct ssa_result_range r)
 {
@@ -271,7 +265,11 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
   ASSERT_TABLE_IS_COMMUTATIVE(fadd_table);
   ASSERT_TABLE_IS_DIAGONAL(fadd_table);

-   /* ge_zero: ge_zero * ge_zero
+   /* Due to flush-to-zero semanatics of floating-point numbers with very
+    * small mangnitudes, we can never really be sure a result will be
+    * non-zero.
+    *
+    * ge_zero: ge_zero * ge_zero
    *        | ge_zero * gt_zero
    *        | ge_zero * eq_zero
    *        | le_zero * lt_zero
@@ -280,9 +278,7 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
    *        | gt_zero * ge_zero  # Multiplication is commutative
    *        | eq_zero * ge_zero  # Multiplication is commutative
    *        | a * a              # Left source == right source
-    *        ;
-    *
-    * gt_zero: gt_zero * gt_zero
+    *        | gt_zero * gt_zero
    *        | lt_zero * lt_zero
    *        ;
    *
@@ -291,19 +287,10 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
    *        | lt_zero * ge_zero  # Multiplication is commutative
    *        | le_zero * ge_zero  # Multiplication is commutative
    *        | le_zero * gt_zero
-    *        ;
-    *
-    * lt_zero: lt_zero * gt_zero
+    *        | lt_zero * gt_zero
    *        | gt_zero * lt_zero  # Multiplication is commutative
    *        ;
    *
-    * ne_zero: ne_zero * gt_zero
-    *        | ne_zero * lt_zero
-    *        | gt_zero * ne_zero  # Multiplication is commutative
-    *        | lt_zero * ne_zero  # Multiplication is commutative
-    *        | ne_zero * ne_zero
-    *        ;
-    *
    * eq_zero: eq_zero * <any>
    *          <any> * eq_zero    # Multiplication is commutative
    *
@@ -312,11 +299,11 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
   static const enum ssa_ranges fmul_table[last_range + 1][last_range + 1] = {
      /* left\right   unknown  lt_zero  le_zero  gt_zero  ge_zero  ne_zero  eq_zero */
      /* unknown */ { _______, _______, _______, _______, _______, _______, eq_zero },
-      /* lt_zero */ { _______, gt_zero, ge_zero, lt_zero, le_zero, ne_zero, eq_zero },
+      /* lt_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
      /* le_zero */ { _______, ge_zero, ge_zero, le_zero, le_zero, _______, eq_zero },
-      /* gt_zero */ { _______, lt_zero, le_zero, gt_zero, ge_zero, ne_zero, eq_zero },
+      /* gt_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
      /* ge_zero */ { _______, le_zero, le_zero, ge_zero, ge_zero, _______, eq_zero },
-      /* ne_zero */ { _______, ne_zero, _______, ne_zero, _______, ne_zero, eq_zero },
+      /* ne_zero */ { _______, _______, _______, _______, _______, _______, eq_zero },
      /* eq_zero */ { eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero, eq_zero }
   };

@@ -601,11 +588,13 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,

      /* x * x => ge_zero */
      if (left.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
-         /* x * x => ge_zero or gt_zero depending on the range of x. */
-         r.range = is_not_zero(left.range) ? gt_zero : ge_zero;
+         /* Even if x > 0, the result of x*x can be zero when x is, for
+          * example, a subnormal number.
+          */
+         r.range = ge_zero;
      } else if (left.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
-         /* -x * x => le_zero or lt_zero depending on the range of x. */
-         r.range = is_not_zero(left.range) ? lt_zero : le_zero;
+         /* -x * x => le_zero. */
+         r.range = le_zero;
      } else
         r.range = fmul_table[left.range][right.range];

@@ -732,11 +721,13 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
      enum ssa_ranges fmul_range;

      if (first.range != eq_zero && nir_alu_srcs_equal(alu, alu, 0, 1)) {
-         /* x * x => ge_zero or gt_zero depending on the range of x. */
-         fmul_range = is_not_zero(first.range) ? gt_zero : ge_zero;
+         /* See handling of nir_op_fmul for explanation of why ge_zero is the
+          * range.
+          */
+         fmul_range = ge_zero;
      } else if (first.range != eq_zero && nir_alu_srcs_negative_equal(alu, alu, 0, 1)) {
-         /* -x * x => le_zero or lt_zero depending on the range of x. */
-         fmul_range = is_not_zero(first.range) ? lt_zero : le_zero;
+         /* -x * x => le_zero */
+         fmul_range = le_zero;
      } else
         fmul_range = fmul_table[first.range][second.range];