brw/cmod: Fix some errors when propagating from CMP to ADD.SAT

When I originally wrote that code, I didn't understand what a jerk NaN can be. v2: Remove the brw_type_is_uint stuff. This function is currently only called for float types. In a later commit, integer types will be supported but only for NZ and Z conditions. Noticed by Matt. shader-db: All Intel platforms had similar results. (Lunar Lake shown) total instructions in shared programs: 17122197 -> 17122204 (<.01%) instructions in affected programs: 1691 -> 1698 (0.41%) helped: 0 / HURT: 4 total cycles in shared programs: 895602484 -> 895602370 (<.01%) cycles in affected programs: 912964 -> 912850 (-0.01%) helped: 2 / HURT: 2 fossil-db: All Intel platforms had similar results. (Lunar Lake shown) Totals: Instrs: 210736388 -> 210736518 (+0.00%) Cycle count: 30377728900 -> 30377733040 (+0.00%); split: -0.00%, +0.00% Totals from 130 (0.02% of 706776) affected shaders: Instrs: 169911 -> 170041 (+0.08%) Cycle count: 18021210 -> 18025350 (+0.02%); split: -0.00%, +0.02% Reviewed-by: Matt Turner <mattst88@gmail.com> Fixes: 020b0055e7 ("i965/fs: Propagate conditional modifiers from compares to adds") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34509> (cherry picked from commit 0dab520a19e323237e415210a70cb21b30512386)
2025-04-09 21:36:34 -07:00
parent d14c0562c6
commit aa08dfbad4
3 changed files with 204 additions and 46 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1054,7 +1054,7 @@
        "description": "brw/cmod: Fix some errors when propagating from CMP to ADD.SAT",
        "nominated": true,
        "nomination_type": 2,
-        "resolution": 0,
+        "resolution": 1,
        "main_sha": null,
        "because_sha": "020b0055e7a085a6a8c961ad12ce94e58606a1ae",
        "notes": null
--- a/src/intel/compiler/brw_opt_cmod_propagation.cpp
+++ b/src/intel/compiler/brw_opt_cmod_propagation.cpp
@@ -24,6 +24,7 @@
 #include "brw_shader.h"
 #include "brw_cfg.h"
 #include "brw_eu.h"
+#include "util/half_float.h"

 /** @file
 *
@@ -48,6 +49,26 @@
 * exists and therefore remove the instruction.
 */

+static double
+src_as_float(const brw_reg &src)
+{
+   assert(src.file == IMM);
+
+   switch (src.type) {
+   case BRW_TYPE_HF:
+      return _mesa_half_to_float((uint16_t)src.d);
+
+   case BRW_TYPE_F:
+      return src.f;
+
+   case BRW_TYPE_DF:
+      return src.df;
+
+   default:
+      unreachable("Invalid float type.");
+   }
+}
+
 static bool
 cmod_propagate_cmp_to_add(const intel_device_info *devinfo, brw_inst *inst)
 {
@@ -116,17 +137,31 @@ cmod_propagate_cmp_to_add(const intel_device_info *devinfo, brw_inst *inst)
          * For negative values:
          * (sat(x) >  0) == (x >  0) --- false
          * (sat(x) <= 0) == (x <= 0) --- true
+          *
+          * Except for the x = NaN cases. sat(NaN) is 0, so add.sat.le of a
+          * NaN result will be true. add.sat.g of a NaN result is false, so
+          * the optimization is also incorrect when the second source of the
+          * comparison is less than zero. All of the fsat(x) > is_negative
+          * cases should have been eliminated in NIR.
          */
         const enum brw_conditional_mod cond =
            negate ? brw_swap_cmod(inst->conditional_mod)
            : inst->conditional_mod;

-         if (scan_inst->saturate &&
-             (brw_type_is_float(scan_inst->dst.type) ||
-              brw_type_is_uint(scan_inst->dst.type)) &&
-             (cond != BRW_CONDITIONAL_G &&
-              cond != BRW_CONDITIONAL_LE))
-            goto not_match;
+         if (scan_inst->saturate) {
+            if (cond != BRW_CONDITIONAL_G)
+               goto not_match;
+
+            if (inst->src[1].file != IMM)
+               goto not_match;
+
+            double v = src_as_float(inst->src[1]);
+            if (negate)
+               v = -v;
+
+            if (v < 0.0)
+               goto not_match;
+         }

         /* Otherwise, try propagating the conditional. */
         if (scan_inst->can_do_cmod() &&
--- a/src/intel/compiler/test_opt_cmod_propagation.cpp
+++ b/src/intel/compiler/test_opt_cmod_propagation.cpp
@@ -18,6 +18,11 @@ protected:
                           enum brw_reg_type add_type,
                           enum brw_reg_type op_type,
                           bool expected_cmod_prop_progress);
+
+   void test_cmp_to_add_sat(enum brw_conditional_mod before,
+                            bool add_negative_src0,
+                            bool add_negative_constant,
+                            bool expected_cmod_prop_progress);
 };

 TEST_F(cmod_propagation_test, basic)
@@ -2202,63 +2207,181 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_read)
   EXPECT_SHADERS_MATCH(bld, exp);
 }

-TEST_F(cmod_propagation_test, cmp_to_add_float_e)
-{
-   brw_builder bld = make_shader();
-
-   brw_reg dest = bld.vgrf(BRW_TYPE_F);
-   brw_reg src0 = bld.vgrf(BRW_TYPE_F);
-   brw_reg neg10(brw_imm_f(-10.0f));
-   brw_reg pos10(brw_imm_f(10.0f));
-
-   bld.ADD(dest, src0, neg10)->saturate = true;
-   bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_EQ);
-
-   EXPECT_NO_PROGRESS(brw_opt_cmod_propagation, bld);
-}
-
-TEST_F(cmod_propagation_test, cmp_to_add_float_g)
+void
+cmod_propagation_test::test_cmp_to_add_sat(enum brw_conditional_mod before,
+                                           bool add_negative_src0,
+                                           bool add_negative_constant,
+                                           bool expected_cmod_prop_progress)
 {
   brw_builder bld = make_shader();
   brw_builder exp = make_shader();

   brw_reg dest  = vgrf(bld, exp, BRW_TYPE_F);
   brw_reg src0  = vgrf(bld, exp, BRW_TYPE_F);
-   brw_reg neg10 = brw_imm_f(-10.0f);
-   brw_reg pos10 = brw_imm_f(10.0f);
+   brw_reg neg = brw_imm_f(-0.5f);
+   brw_reg pos = brw_imm_f(0.5f);

-   bld.ADD(dest, src0, neg10)->saturate = true;
-   bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_G);
+   bld.ADD(dest,
+           add_negative_src0 ? negate(src0) : src0,
+           add_negative_constant ? neg : pos)
+      ->saturate = true;

-   EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
+   /* The parity of negations between the ADD and the CMP must be
+    * different. Otherwise the ADD and the CMP aren't performing the same
+    * arithmetic, and the optimization won't trigger.
+    */
+   const bool cmp_negative_constant =
+      add_negative_constant == add_negative_src0;

-   brw_inst *add = exp.ADD(dest, src0, neg10);
-   add->saturate = true;
-   add->conditional_mod = BRW_CONDITIONAL_G;
+   bld.CMP(bld.null_reg_f(),
+           src0,
+           cmp_negative_constant ? neg : pos,
+           before);

-   EXPECT_SHADERS_MATCH(bld, exp);
+   EXPECT_PROGRESS_RESULT(expected_cmod_prop_progress,
+                          brw_opt_cmod_propagation, bld);
+
+   if (expected_cmod_prop_progress) {
+      const enum brw_conditional_mod after =
+         add_negative_src0 ? brw_swap_cmod(before) : before;
+
+      brw_inst *add = exp.ADD(dest,
+                              add_negative_src0 ? negate(src0) : src0,
+                              add_negative_constant ? neg : pos);
+      add->saturate = true;
+      add->conditional_mod = after;
+
+      EXPECT_SHADERS_MATCH(bld, exp);
+   }
 }

-TEST_F(cmod_propagation_test, cmp_to_add_float_le)
+TEST_F(cmod_propagation_test, cmp_g_to_add_src0_neg_constant)
 {
-   brw_builder bld = make_shader();
-   brw_builder exp = make_shader();
+   /* This works even if src0 is NaN. (NaN > 0.5) is false, and (0.0 > 0.5) is
+    * false.
+    */
+   test_cmp_to_add_sat(BRW_CONDITIONAL_G, false, true, true);
+}

-   brw_reg dest  = vgrf(bld, exp, BRW_TYPE_F);
-   brw_reg src0  = vgrf(bld, exp, BRW_TYPE_F);
-   brw_reg neg10 = brw_imm_f(-10.0f);
-   brw_reg pos10 = brw_imm_f(10.0f);
+TEST_F(cmod_propagation_test, cmp_g_to_add_src0_pos_constant)
+{
+   /* This fails if src0 is NaN. (NaN > -0.5) is false, but (0.0 > -0.5) is
+    * true.
+    */
+   test_cmp_to_add_sat(BRW_CONDITIONAL_G, false, false, false);
+}

-   bld.ADD(dest, src0, neg10)->saturate = true;
-   bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_LE);
+TEST_F(cmod_propagation_test, cmp_g_to_add_neg_src0_neg_constant)
+{
+   /* This is effectively the same as cmp_l_to_add_src0_neg_constant. */
+   test_cmp_to_add_sat(BRW_CONDITIONAL_G, true, true, false);
+}

-   EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
+TEST_F(cmod_propagation_test, cmp_g_to_add_neg_src0_pos_constant)
+{
+   /* This is effectively the same as cmp_l_to_add_src0_pos_constant. */
+   test_cmp_to_add_sat(BRW_CONDITIONAL_G, true, false, false);
+}

-   brw_inst *add = exp.ADD(dest, src0, neg10);
-   add->saturate = true;
-   add->conditional_mod = BRW_CONDITIONAL_LE;
+TEST_F(cmod_propagation_test, cmp_l_to_add_src0_neg_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_L, false, true, false);
+}

-   EXPECT_SHADERS_MATCH(bld, exp);
+TEST_F(cmod_propagation_test, cmp_l_to_add_src0_pos_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_L, false, false, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_l_to_add_neg_src0_neg_constant)
+{
+   /* This is effectively the same as cmp_g_to_add_src0_neg_constant. */
+   test_cmp_to_add_sat(BRW_CONDITIONAL_L, true, true, true);
+}
+
+TEST_F(cmod_propagation_test, cmp_l_to_add_neg_src0_pos_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_L, true, false, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_le_to_add_src0_neg_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_LE, false, true, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_le_to_add_src0_pos_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_LE, false, false, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_le_to_add_neg_src0_neg_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_LE, true, true, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_le_to_add_neg_src0_pos_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_LE, true, false, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_ge_to_add_src0_neg_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_GE, false, true, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_ge_to_add_src0_pos_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_GE, false, false, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_ge_to_add_neg_src0_neg_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_GE, true, true, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_ge_to_add_neg_src0_pos_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_GE, true, false, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_nz_to_add_src0_neg_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, false, true, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_nz_to_add_src0_pos_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, false, false, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_nz_to_add_neg_src0_neg_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, true, true, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_nz_to_add_neg_src0_pos_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, true, false, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_z_to_add_src0_neg_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_Z, false, true, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_z_to_add_src0_pos_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_Z, false, false, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_z_to_add_neg_src0_neg_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_Z, true, true, false);
+}
+
+TEST_F(cmod_propagation_test, cmp_z_to_add_neg_src0_pos_constant)
+{
+   test_cmp_to_add_sat(BRW_CONDITIONAL_Z, true, false, false);
 }

 TEST_F(cmod_propagation_test, prop_across_sel)