From aa08dfbad4eb8042d49671c483da222c1e1eed50 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 9 Apr 2025 21:36:34 -0700 Subject: [PATCH] brw/cmod: Fix some errors when propagating from CMP to ADD.SAT When I originally wrote that code, I didn't understand what a jerk NaN can be. v2: Remove the brw_type_is_uint stuff. This function is currently only called for float types. In a later commit, integer types will be supported but only for NZ and Z conditions. Noticed by Matt. shader-db: All Intel platforms had similar results. (Lunar Lake shown) total instructions in shared programs: 17122197 -> 17122204 (<.01%) instructions in affected programs: 1691 -> 1698 (0.41%) helped: 0 / HURT: 4 total cycles in shared programs: 895602484 -> 895602370 (<.01%) cycles in affected programs: 912964 -> 912850 (-0.01%) helped: 2 / HURT: 2 fossil-db: All Intel platforms had similar results. (Lunar Lake shown) Totals: Instrs: 210736388 -> 210736518 (+0.00%) Cycle count: 30377728900 -> 30377733040 (+0.00%); split: -0.00%, +0.00% Totals from 130 (0.02% of 706776) affected shaders: Instrs: 169911 -> 170041 (+0.08%) Cycle count: 18021210 -> 18025350 (+0.02%); split: -0.00%, +0.02% Reviewed-by: Matt Turner Fixes: 020b0055e7a ("i965/fs: Propagate conditional modifiers from compares to adds") Part-of: (cherry picked from commit 0dab520a19e323237e415210a70cb21b30512386) --- .pick_status.json | 2 +- .../compiler/brw_opt_cmod_propagation.cpp | 47 +++- .../compiler/test_opt_cmod_propagation.cpp | 201 ++++++++++++++---- 3 files changed, 204 insertions(+), 46 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 198cefd5649..fba5a37c2b0 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1054,7 +1054,7 @@ "description": "brw/cmod: Fix some errors when propagating from CMP to ADD.SAT", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "020b0055e7a085a6a8c961ad12ce94e58606a1ae", "notes": null diff --git a/src/intel/compiler/brw_opt_cmod_propagation.cpp b/src/intel/compiler/brw_opt_cmod_propagation.cpp index 55365f317d0..1c201d0b46a 100644 --- a/src/intel/compiler/brw_opt_cmod_propagation.cpp +++ b/src/intel/compiler/brw_opt_cmod_propagation.cpp @@ -24,6 +24,7 @@ #include "brw_shader.h" #include "brw_cfg.h" #include "brw_eu.h" +#include "util/half_float.h" /** @file * @@ -48,6 +49,26 @@ * exists and therefore remove the instruction. */ +static double +src_as_float(const brw_reg &src) +{ + assert(src.file == IMM); + + switch (src.type) { + case BRW_TYPE_HF: + return _mesa_half_to_float((uint16_t)src.d); + + case BRW_TYPE_F: + return src.f; + + case BRW_TYPE_DF: + return src.df; + + default: + unreachable("Invalid float type."); + } +} + static bool cmod_propagate_cmp_to_add(const intel_device_info *devinfo, brw_inst *inst) { @@ -116,17 +137,31 @@ cmod_propagate_cmp_to_add(const intel_device_info *devinfo, brw_inst *inst) * For negative values: * (sat(x) > 0) == (x > 0) --- false * (sat(x) <= 0) == (x <= 0) --- true + * + * Except for the x = NaN cases. sat(NaN) is 0, so add.sat.le of a + * NaN result will be true. add.sat.g of a NaN result is false, so + * the optimization is also incorrect when the second source of the + * comparison is less than zero. All of the fsat(x) > is_negative + * cases should have been eliminated in NIR. */ const enum brw_conditional_mod cond = negate ? brw_swap_cmod(inst->conditional_mod) : inst->conditional_mod; - if (scan_inst->saturate && - (brw_type_is_float(scan_inst->dst.type) || - brw_type_is_uint(scan_inst->dst.type)) && - (cond != BRW_CONDITIONAL_G && - cond != BRW_CONDITIONAL_LE)) - goto not_match; + if (scan_inst->saturate) { + if (cond != BRW_CONDITIONAL_G) + goto not_match; + + if (inst->src[1].file != IMM) + goto not_match; + + double v = src_as_float(inst->src[1]); + if (negate) + v = -v; + + if (v < 0.0) + goto not_match; + } /* Otherwise, try propagating the conditional. */ if (scan_inst->can_do_cmod() && diff --git a/src/intel/compiler/test_opt_cmod_propagation.cpp b/src/intel/compiler/test_opt_cmod_propagation.cpp index f7e2011d545..1f5fe14ae27 100644 --- a/src/intel/compiler/test_opt_cmod_propagation.cpp +++ b/src/intel/compiler/test_opt_cmod_propagation.cpp @@ -18,6 +18,11 @@ protected: enum brw_reg_type add_type, enum brw_reg_type op_type, bool expected_cmod_prop_progress); + + void test_cmp_to_add_sat(enum brw_conditional_mod before, + bool add_negative_src0, + bool add_negative_constant, + bool expected_cmod_prop_progress); }; TEST_F(cmod_propagation_test, basic) @@ -2202,63 +2207,181 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_read) EXPECT_SHADERS_MATCH(bld, exp); } -TEST_F(cmod_propagation_test, cmp_to_add_float_e) -{ - brw_builder bld = make_shader(); - - brw_reg dest = bld.vgrf(BRW_TYPE_F); - brw_reg src0 = bld.vgrf(BRW_TYPE_F); - brw_reg neg10(brw_imm_f(-10.0f)); - brw_reg pos10(brw_imm_f(10.0f)); - - bld.ADD(dest, src0, neg10)->saturate = true; - bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_EQ); - - EXPECT_NO_PROGRESS(brw_opt_cmod_propagation, bld); -} - -TEST_F(cmod_propagation_test, cmp_to_add_float_g) +void +cmod_propagation_test::test_cmp_to_add_sat(enum brw_conditional_mod before, + bool add_negative_src0, + bool add_negative_constant, + bool expected_cmod_prop_progress) { brw_builder bld = make_shader(); brw_builder exp = make_shader(); brw_reg dest = vgrf(bld, exp, BRW_TYPE_F); brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F); - brw_reg neg10 = brw_imm_f(-10.0f); - brw_reg pos10 = brw_imm_f(10.0f); + brw_reg neg = brw_imm_f(-0.5f); + brw_reg pos = brw_imm_f(0.5f); - bld.ADD(dest, src0, neg10)->saturate = true; - bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_G); + bld.ADD(dest, + add_negative_src0 ? negate(src0) : src0, + add_negative_constant ? neg : pos) + ->saturate = true; - EXPECT_PROGRESS(brw_opt_cmod_propagation, bld); + /* The parity of negations between the ADD and the CMP must be + * different. Otherwise the ADD and the CMP aren't performing the same + * arithmetic, and the optimization won't trigger. + */ + const bool cmp_negative_constant = + add_negative_constant == add_negative_src0; - brw_inst *add = exp.ADD(dest, src0, neg10); - add->saturate = true; - add->conditional_mod = BRW_CONDITIONAL_G; + bld.CMP(bld.null_reg_f(), + src0, + cmp_negative_constant ? neg : pos, + before); - EXPECT_SHADERS_MATCH(bld, exp); + EXPECT_PROGRESS_RESULT(expected_cmod_prop_progress, + brw_opt_cmod_propagation, bld); + + if (expected_cmod_prop_progress) { + const enum brw_conditional_mod after = + add_negative_src0 ? brw_swap_cmod(before) : before; + + brw_inst *add = exp.ADD(dest, + add_negative_src0 ? negate(src0) : src0, + add_negative_constant ? neg : pos); + add->saturate = true; + add->conditional_mod = after; + + EXPECT_SHADERS_MATCH(bld, exp); + } } -TEST_F(cmod_propagation_test, cmp_to_add_float_le) +TEST_F(cmod_propagation_test, cmp_g_to_add_src0_neg_constant) { - brw_builder bld = make_shader(); - brw_builder exp = make_shader(); + /* This works even if src0 is NaN. (NaN > 0.5) is false, and (0.0 > 0.5) is + * false. + */ + test_cmp_to_add_sat(BRW_CONDITIONAL_G, false, true, true); +} - brw_reg dest = vgrf(bld, exp, BRW_TYPE_F); - brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F); - brw_reg neg10 = brw_imm_f(-10.0f); - brw_reg pos10 = brw_imm_f(10.0f); +TEST_F(cmod_propagation_test, cmp_g_to_add_src0_pos_constant) +{ + /* This fails if src0 is NaN. (NaN > -0.5) is false, but (0.0 > -0.5) is + * true. + */ + test_cmp_to_add_sat(BRW_CONDITIONAL_G, false, false, false); +} - bld.ADD(dest, src0, neg10)->saturate = true; - bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_LE); +TEST_F(cmod_propagation_test, cmp_g_to_add_neg_src0_neg_constant) +{ + /* This is effectively the same as cmp_l_to_add_src0_neg_constant. */ + test_cmp_to_add_sat(BRW_CONDITIONAL_G, true, true, false); +} - EXPECT_PROGRESS(brw_opt_cmod_propagation, bld); +TEST_F(cmod_propagation_test, cmp_g_to_add_neg_src0_pos_constant) +{ + /* This is effectively the same as cmp_l_to_add_src0_pos_constant. */ + test_cmp_to_add_sat(BRW_CONDITIONAL_G, true, false, false); +} - brw_inst *add = exp.ADD(dest, src0, neg10); - add->saturate = true; - add->conditional_mod = BRW_CONDITIONAL_LE; +TEST_F(cmod_propagation_test, cmp_l_to_add_src0_neg_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_L, false, true, false); +} - EXPECT_SHADERS_MATCH(bld, exp); +TEST_F(cmod_propagation_test, cmp_l_to_add_src0_pos_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_L, false, false, false); +} + +TEST_F(cmod_propagation_test, cmp_l_to_add_neg_src0_neg_constant) +{ + /* This is effectively the same as cmp_g_to_add_src0_neg_constant. */ + test_cmp_to_add_sat(BRW_CONDITIONAL_L, true, true, true); +} + +TEST_F(cmod_propagation_test, cmp_l_to_add_neg_src0_pos_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_L, true, false, false); +} + +TEST_F(cmod_propagation_test, cmp_le_to_add_src0_neg_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_LE, false, true, false); +} + +TEST_F(cmod_propagation_test, cmp_le_to_add_src0_pos_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_LE, false, false, false); +} + +TEST_F(cmod_propagation_test, cmp_le_to_add_neg_src0_neg_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_LE, true, true, false); +} + +TEST_F(cmod_propagation_test, cmp_le_to_add_neg_src0_pos_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_LE, true, false, false); +} + +TEST_F(cmod_propagation_test, cmp_ge_to_add_src0_neg_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_GE, false, true, false); +} + +TEST_F(cmod_propagation_test, cmp_ge_to_add_src0_pos_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_GE, false, false, false); +} + +TEST_F(cmod_propagation_test, cmp_ge_to_add_neg_src0_neg_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_GE, true, true, false); +} + +TEST_F(cmod_propagation_test, cmp_ge_to_add_neg_src0_pos_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_GE, true, false, false); +} + +TEST_F(cmod_propagation_test, cmp_nz_to_add_src0_neg_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, false, true, false); +} + +TEST_F(cmod_propagation_test, cmp_nz_to_add_src0_pos_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, false, false, false); +} + +TEST_F(cmod_propagation_test, cmp_nz_to_add_neg_src0_neg_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, true, true, false); +} + +TEST_F(cmod_propagation_test, cmp_nz_to_add_neg_src0_pos_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, true, false, false); +} + +TEST_F(cmod_propagation_test, cmp_z_to_add_src0_neg_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_Z, false, true, false); +} + +TEST_F(cmod_propagation_test, cmp_z_to_add_src0_pos_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_Z, false, false, false); +} + +TEST_F(cmod_propagation_test, cmp_z_to_add_neg_src0_neg_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_Z, true, true, false); +} + +TEST_F(cmod_propagation_test, cmp_z_to_add_neg_src0_pos_constant) +{ + test_cmp_to_add_sat(BRW_CONDITIONAL_Z, true, false, false); } TEST_F(cmod_propagation_test, prop_across_sel)