brw/cmod: Don't propagate from CMP to possible Inf + (-Inf)

Most of the churn in this commit is changing unit tests that were
testing things that are now invalid.

shader-db:

All Intel platforms had similar results. (Lunar Lake shown)
total instructions in shared programs: 17122204 -> 17122669 (<.01%)
instructions in affected programs: 120669 -> 121134 (0.39%)
helped: 0 / HURT: 124

total cycles in shared programs: 895602370 -> 895613210 (<.01%)
cycles in affected programs: 17868974 -> 17879814 (0.06%)
helped: 35 / HURT: 85

fossil-db:

All Intel platforms had similar results. (Lunar Lake shown)
Totals:
Instrs: 210736518 -> 210743769 (+0.00%)
Cycle count: 30377733040 -> 30377699060 (-0.00%); split: -0.00%, +0.00%
Max live registers: 66056852 -> 66056966 (+0.00%)

Totals from 1505 (0.21% of 706776) affected shaders:
Instrs: 1890151 -> 1897402 (+0.38%)
Cycle count: 48397408 -> 48363428 (-0.07%); split: -0.11%, +0.04%
Max live registers: 256821 -> 256935 (+0.04%)

Reviewed-by: Matt Turner <mattst88@gmail.com>
Fixes: 020b0055e7 ("i965/fs: Propagate conditional modifiers from compares to adds")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34509>
(cherry picked from commit e26270249b1c093c8b2443492dc940d3e41418b9)
This commit is contained in:
Ian Romanick
2025-04-09 19:59:31 -07:00
committed by Eric Engestrom
parent aa08dfbad4
commit f2a54f5244
3 changed files with 151 additions and 92 deletions

View File

@@ -1044,7 +1044,7 @@
"description": "brw/cmod: Don't propagate from CMP to possible Inf + (-Inf)",
"nominated": true,
"nomination_type": 2,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "020b0055e7a085a6a8c961ad12ce94e58606a1ae",
"notes": null

View File

@@ -75,6 +75,36 @@ cmod_propagate_cmp_to_add(const intel_device_info *devinfo, brw_inst *inst)
bool read_flag = false;
const unsigned flags_written = inst->flags_written(devinfo);
/* The floating point comparison can only be removed if we can prove that
* either the addition is not ±Inf - (±Inf) or that ±Inf - (±Inf) compared
* with zero has the same result as ±Inf compared with ±Inf.
*
* The former can only be proven at this point in compilation if src[1] is
* an immediate value. Otherwise we can't know that nether value is
* ±Inf. For the latter, consider this table:
*
* A B A+(-B) A<B A-B<0 A<=B A-B<=0 A>B A-B>0 A>=B A-B>=0 A==B A-B==0 A!=B A-B!=0
* Inf Inf NaN F F T F F F T F T F F T
* Inf -Inf Inf F F F F T T T T F F T T
* -Inf Inf -Inf T T T T F F F F F F T T
* -Inf -Inf NaN F F T F F F T F T F F T
*
* The column for A<B and A-B<0 is identical, and the column for A>B and
* A-B>0 are identical.
*
* If src[1] is NaN, the transformation is always valid.
*/
if (brw_type_is_float(inst->src[0].type)) {
if (inst->conditional_mod != BRW_CONDITIONAL_L &&
inst->conditional_mod != BRW_CONDITIONAL_G) {
if (inst->src[1].file != IMM)
return false;
if (isinf(src_as_float(inst->src[1])) != 0)
return false;
}
}
foreach_inst_in_block_reverse_starting_from(brw_inst, scan_inst, inst) {
if (scan_inst->opcode == BRW_OPCODE_ADD &&
!scan_inst->predicate &&

View File

@@ -23,6 +23,9 @@ protected:
bool add_negative_src0,
bool add_negative_constant,
bool expected_cmod_prop_progress);
void test_subtract_merge(enum brw_conditional_mod before,
bool expected_cmod_prop_progress);
};
TEST_F(cmod_propagation_test, basic)
@@ -521,7 +524,9 @@ TEST_F(cmod_propagation_test, add_not_merge_with_compare)
EXPECT_NO_PROGRESS(brw_opt_cmod_propagation, bld);
}
TEST_F(cmod_propagation_test, subtract_merge_with_compare)
void
cmod_propagation_test::test_subtract_merge(enum brw_conditional_mod before,
bool expected_cmod_prop_progress)
{
brw_builder bld = make_shader();
brw_builder exp = make_shader();
@@ -531,13 +536,47 @@ TEST_F(cmod_propagation_test, subtract_merge_with_compare)
brw_reg src1 = vgrf(bld, exp, BRW_TYPE_F);
bld.ADD(dest, src0, negate(src1));
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
bld.CMP(bld.null_reg_f(), src0, src1, before);
EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
EXPECT_PROGRESS_RESULT(expected_cmod_prop_progress,
brw_opt_cmod_propagation, bld);
exp.ADD(dest, src0, negate(src1))->conditional_mod = BRW_CONDITIONAL_L;
if (expected_cmod_prop_progress) {
exp.ADD(dest, src0, negate(src1))
->conditional_mod = before;
EXPECT_SHADERS_MATCH(bld, exp);
EXPECT_SHADERS_MATCH(bld, exp);
}
}
TEST_F(cmod_propagation_test, subtract_merge_with_compare_l)
{
test_subtract_merge(BRW_CONDITIONAL_L, true);
}
TEST_F(cmod_propagation_test, subtract_merge_with_compare_g)
{
test_subtract_merge(BRW_CONDITIONAL_G, true);
}
TEST_F(cmod_propagation_test, subtract_merge_with_compare_le)
{
test_subtract_merge(BRW_CONDITIONAL_LE, false);
}
TEST_F(cmod_propagation_test, subtract_merge_with_compare_ge)
{
test_subtract_merge(BRW_CONDITIONAL_GE, false);
}
TEST_F(cmod_propagation_test, subtract_merge_with_compare_z)
{
test_subtract_merge(BRW_CONDITIONAL_Z, false);
}
TEST_F(cmod_propagation_test, subtract_merge_with_compare_nz)
{
test_subtract_merge(BRW_CONDITIONAL_NZ, false);
}
TEST_F(cmod_propagation_test, subtract_immediate_merge_with_compare)
@@ -575,16 +614,17 @@ TEST_F(cmod_propagation_test, subtract_merge_with_compare_intervening_add)
brw_reg dest0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg dest1 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src1 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg one = brw_imm_f(1.0f);
brw_reg negative_one = brw_imm_f(-1.0f);
bld.ADD(dest0, src0, negate(src1));
bld.ADD(dest1, src0, src1);
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
bld.ADD(dest0, src0, negative_one);
bld.ADD(dest1, src0, one);
bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_L);
EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
exp.ADD(dest0, src0, negate(src1))->conditional_mod = BRW_CONDITIONAL_L;
exp.ADD(dest1, src0, src1);
exp.ADD(dest0, src0, negative_one)->conditional_mod = BRW_CONDITIONAL_L;
exp.ADD(dest1, src0, one);
EXPECT_SHADERS_MATCH(bld, exp);
}
@@ -596,11 +636,12 @@ TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_partia
brw_reg dest0 = bld.vgrf(BRW_TYPE_F);
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
brw_reg one = brw_imm_f(1.0f);
brw_reg negative_one = brw_imm_f(-1.0f);
bld.ADD(dest0, src0, negate(src1));
set_predicate(BRW_PREDICATE_NORMAL, bld.ADD(dest1, src0, negate(src1)));
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
bld.ADD(dest0, src0, negative_one);
set_predicate(BRW_PREDICATE_NORMAL, bld.ADD(dest1, src0, one));
bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_L);
EXPECT_NO_PROGRESS(brw_opt_cmod_propagation, bld);
}
@@ -612,34 +653,16 @@ TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_add)
brw_reg dest0 = bld.vgrf(BRW_TYPE_F);
brw_reg dest1 = bld.vgrf(BRW_TYPE_F);
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
brw_reg one = brw_imm_f(1.0f);
brw_reg negative_one = brw_imm_f(-1.0f);
bld.ADD(dest0, src0, negate(src1));
set_condmod(BRW_CONDITIONAL_EQ, bld.ADD(dest1, src0, src1));
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
bld.ADD(dest0, src0, negative_one);
set_condmod(BRW_CONDITIONAL_EQ, bld.ADD(dest1, src0, one));
bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_L);
EXPECT_NO_PROGRESS(brw_opt_cmod_propagation, bld);
}
TEST_F(cmod_propagation_test, add_merge_with_compare)
{
brw_builder bld = make_shader();
brw_builder exp = make_shader();
brw_reg dest = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src1 = vgrf(bld, exp, BRW_TYPE_F);
bld.ADD(dest, src0, src1);
bld.CMP(bld.null_reg_f(), src0, negate(src1), BRW_CONDITIONAL_L);
EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
exp.ADD(dest, src0, src1)->conditional_mod = BRW_CONDITIONAL_L;
EXPECT_SHADERS_MATCH(bld, exp);
}
TEST_F(cmod_propagation_test, negative_subtract_merge_with_compare)
{
brw_builder bld = make_shader();
@@ -647,17 +670,17 @@ TEST_F(cmod_propagation_test, negative_subtract_merge_with_compare)
brw_reg dest = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src1 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg one = brw_imm_f(1.0f);
bld.ADD(dest, src1, negate(src0));
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
bld.ADD(dest, negate(src0), one);
bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_L);
EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
/* The result of the subtract is the negatiion of the result of the
/* The result of the subtract is the negation of the result of the
* implicit subtract in the compare, so the condition must change.
*/
exp.ADD(dest, src1, negate(src0))->conditional_mod = BRW_CONDITIONAL_G;
exp.ADD(dest, negate(src0), one)->conditional_mod = BRW_CONDITIONAL_G;
EXPECT_SHADERS_MATCH(bld, exp);
}
@@ -670,25 +693,26 @@ TEST_F(cmod_propagation_test, subtract_delete_compare)
brw_reg dest = vgrf(bld, exp, BRW_TYPE_F);
brw_reg dest1 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src1 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src2 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg one = brw_imm_f(1.0f);
brw_reg negative_one = brw_imm_f(-1.0f);
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1)));
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negative_one));
set_predicate(BRW_PREDICATE_NORMAL, bld.MOV(dest1, src2));
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_L);
/* = Before =
* 0: add.l.f0(8) dest0:F src0:F -src1:F
* 0: add.l.f0(8) dest0:F src0:F -1.0:F
* 1: (+f0) mov(0) dest1:F src2:F
* 2: cmp.l.f0(8) null:F src0:F src1:F
* 2: cmp.l.f0(8) null:F src0:F 1.0:F
*
* = After =
* 0: add.l.f0(8) dest:F src0:F -src1:F
* 0: add.l.f0(8) dest:F src0:F -1.0:F
* 1: (+f0) mov(0) dest1:F src2:F
*/
EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
set_condmod(BRW_CONDITIONAL_L, exp.ADD(dest, src0, negate(src1)));
set_condmod(BRW_CONDITIONAL_L, exp.ADD(dest, src0, negative_one));
set_predicate(BRW_PREDICATE_NORMAL, exp.MOV(dest1, src2));
EXPECT_SHADERS_MATCH(bld, exp);
@@ -705,27 +729,28 @@ TEST_F(cmod_propagation_test, subtract_delete_compare_other_flag)
brw_reg dest = vgrf(bld, exp, BRW_TYPE_F);
brw_reg dest1 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src1 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src2 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg one = brw_imm_f(1.0f);
brw_reg negative_one = brw_imm_f(-1.0f);
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1)))
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negative_one))
->flag_subreg = 1;
set_predicate(BRW_PREDICATE_NORMAL, bld.MOV(dest1, src2));
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L)
bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_L)
->flag_subreg = 1;
/* = Before =
* 0: add.l.f0.1(8) dest0:F src0:F -src1:F
* 0: add.l.f0.1(8) dest0:F src0:F -1.0:F
* 1: (+f0) mov(0) dest1:F src2:F
* 2: cmp.l.f0.1(8) null:F src0:F src1:F
* 2: cmp.l.f0.1(8) null:F src0:F 1.0:F
*
* = After =
* 0: add.l.f0.1(8) dest:F src0:F -src1:F
* 0: add.l.f0.1(8) dest:F src0:F -1.0:F
* 1: (+f0) mov(0) dest1:F src2:F
*/
EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
set_condmod(BRW_CONDITIONAL_L, exp.ADD(dest, src0, negate(src1)))
set_condmod(BRW_CONDITIONAL_L, exp.ADD(dest, src0, negative_one))
->flag_subreg = 1;
set_predicate(BRW_PREDICATE_NORMAL, exp.MOV(dest1, src2));
@@ -738,10 +763,11 @@ TEST_F(cmod_propagation_test, subtract_to_mismatch_flag)
brw_reg dest = bld.vgrf(BRW_TYPE_F);
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
brw_reg src1 = bld.vgrf(BRW_TYPE_F);
brw_reg one = brw_imm_f(1.0f);
brw_reg negative_one = brw_imm_f(-1.0f);
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1)));
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L)
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negative_one));
bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_L)
->flag_subreg = 1;
EXPECT_NO_PROGRESS(brw_opt_cmod_propagation, bld);
@@ -755,26 +781,27 @@ TEST_F(cmod_propagation_test,
brw_reg dest0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src1 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg one = brw_imm_f(1.0f);
brw_reg negative_one = brw_imm_f(-1.0f);
bld.ADD(dest0, src0, negate(src1));
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L)
bld.ADD(dest0, src0, negative_one);
bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_L)
->flag_subreg = 1;
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_L);
/* = Before =
* 0: add(8) dest0:F src0:F -src1:F
* 1: cmp.l.f0.1(8) null:F src0:F src1:F
* 2: cmp.l.f0(8) null:F src0:F src1:F
* 0: add(8) dest0:F src0:F -1.0:F
* 1: cmp.l.f0.1(8) null:F src0:F 1.0:F
* 2: cmp.l.f0(8) null:F src0:F 1.0:F
*
* = After =
* 0: add.l.f0(8) dest0:F src0:F -src1:F
* 1: cmp.l.f0.1(8) null:F src0:F src1:F
* 0: add.l.f0(8) dest0:F src0:F -1.0:F
* 1: cmp.l.f0.1(8) null:F src0:F 1.0:F
*
* NOTE: Another perfectly valid after sequence would be:
*
* 0: add.f0.1(8) dest0:F src0:F -src1:F
* 1: cmp.l.f0(8) null:F src0:F src1:F
* 0: add.f0.1(8) dest0:F src0:F -1.0:F
* 1: cmp.l.f0(8) null:F src0:F 1.0:F
*
* However, the optimization pass starts at the end of the basic block.
* Because of this, the cmp.l.f0 will always be chosen. If the pass
@@ -782,8 +809,8 @@ TEST_F(cmod_propagation_test,
*/
EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
exp.ADD(dest0, src0, negate(src1))->conditional_mod = BRW_CONDITIONAL_L;
exp.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L)
exp.ADD(dest0, src0, negative_one)->conditional_mod = BRW_CONDITIONAL_L;
exp.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_L)
->flag_subreg = 1;
EXPECT_SHADERS_MATCH(bld, exp);
@@ -798,27 +825,28 @@ TEST_F(cmod_propagation_test,
brw_reg dest0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg dest1 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src1 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src2 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg zero = brw_imm_f(0.0f);
brw_reg one = brw_imm_f(1.0f);
brw_reg negative_one = brw_imm_f(-1.0f);
bld.ADD(dest0, src0, negate(src1));
bld.ADD(dest0, src0, negative_one);
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero))
->flag_subreg = 1;
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_L);
/* = Before =
* 0: add(8) dest0:F src0:F -src1:F
* 0: add(8) dest0:F src0:F -1.0:F
* 1: (+f0.1) sel(8) dest1 src2 0.0f
* 2: cmp.l.f0(8) null:F src0:F src1:F
* 2: cmp.l.f0(8) null:F src0:F 1.0:F
*
* = After =
* 0: add.l.f0(8) dest0:F src0:F -src1:F
* 0: add.l.f0(8) dest0:F src0:F -1.0:F
* 1: (+f0.1) sel(8) dest1 src2 0.0f
*/
EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
exp.ADD(dest0, src0, negate(src1))->conditional_mod = BRW_CONDITIONAL_L;
exp.ADD(dest0, src0, negative_one)->conditional_mod = BRW_CONDITIONAL_L;
set_predicate(BRW_PREDICATE_NORMAL, exp.SEL(dest1, src2, zero))
->flag_subreg = 1;
@@ -833,25 +861,26 @@ TEST_F(cmod_propagation_test, subtract_delete_compare_derp)
brw_reg dest0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg dest1 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src1 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg one = brw_imm_f(1.0f);
brw_reg negative_one = brw_imm_f(-1.0f);
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest0, src0, negate(src1)));
set_predicate(BRW_PREDICATE_NORMAL, bld.ADD(dest1, negate(src0), src1));
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest0, src0, negative_one));
set_predicate(BRW_PREDICATE_NORMAL, bld.ADD(dest1, negate(src0), one));
bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_L);
/* = Before =
* 0: add.l.f0(8) dest0:F src0:F -src1:F
* 1: (+f0) add(0) dest1:F -src0:F src1:F
* 2: cmp.l.f0(8) null:F src0:F src1:F
* 0: add.l.f0(8) dest0:F src0:F -1.0:F
* 1: (+f0) add(0) dest1:F -src0:F 1.0:F
* 2: cmp.l.f0(8) null:F src0:F 1.0:F
*
* = After =
* 0: add.l.f0(8) dest0:F src0:F -src1:F
* 1: (+f0) add(0) dest1:F -src0:F src1:F
* 0: add.l.f0(8) dest0:F src0:F -1.0:F
* 1: (+f0) add(0) dest1:F -src0:F 1.0:F
*/
EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
set_condmod(BRW_CONDITIONAL_L, exp.ADD(dest0, src0, negate(src1)));
set_predicate(BRW_PREDICATE_NORMAL, exp.ADD(dest1, negate(src0), src1));
set_condmod(BRW_CONDITIONAL_L, exp.ADD(dest0, src0, negative_one));
set_predicate(BRW_PREDICATE_NORMAL, exp.ADD(dest1, negate(src0), one));
EXPECT_SHADERS_MATCH(bld, exp);
}