brw/cmod: Fix some errors when propagating from CMP to ADD.SAT

When I originally wrote that code, I didn't understand what a jerk NaN
can be.

v2: Remove the brw_type_is_uint stuff. This function is currently only
called for float types. In a later commit, integer types will be
supported but only for NZ and Z conditions. Noticed by Matt.

shader-db:

All Intel platforms had similar results. (Lunar Lake shown)
total instructions in shared programs: 17122197 -> 17122204 (<.01%)
instructions in affected programs: 1691 -> 1698 (0.41%)
helped: 0 / HURT: 4

total cycles in shared programs: 895602484 -> 895602370 (<.01%)
cycles in affected programs: 912964 -> 912850 (-0.01%)
helped: 2 / HURT: 2

fossil-db:

All Intel platforms had similar results. (Lunar Lake shown)
Totals:
Instrs: 210736388 -> 210736518 (+0.00%)
Cycle count: 30377728900 -> 30377733040 (+0.00%); split: -0.00%, +0.00%

Totals from 130 (0.02% of 706776) affected shaders:
Instrs: 169911 -> 170041 (+0.08%)
Cycle count: 18021210 -> 18025350 (+0.02%); split: -0.00%, +0.02%

Reviewed-by: Matt Turner <mattst88@gmail.com>
Fixes: 020b0055e7 ("i965/fs: Propagate conditional modifiers from compares to adds")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34509>
(cherry picked from commit 0dab520a19e323237e415210a70cb21b30512386)
This commit is contained in:
Ian Romanick
2025-04-09 21:36:34 -07:00
committed by Eric Engestrom
parent d14c0562c6
commit aa08dfbad4
3 changed files with 204 additions and 46 deletions

View File

@@ -1054,7 +1054,7 @@
"description": "brw/cmod: Fix some errors when propagating from CMP to ADD.SAT",
"nominated": true,
"nomination_type": 2,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "020b0055e7a085a6a8c961ad12ce94e58606a1ae",
"notes": null

View File

@@ -24,6 +24,7 @@
#include "brw_shader.h"
#include "brw_cfg.h"
#include "brw_eu.h"
#include "util/half_float.h"
/** @file
*
@@ -48,6 +49,26 @@
* exists and therefore remove the instruction.
*/
static double
src_as_float(const brw_reg &src)
{
assert(src.file == IMM);
switch (src.type) {
case BRW_TYPE_HF:
return _mesa_half_to_float((uint16_t)src.d);
case BRW_TYPE_F:
return src.f;
case BRW_TYPE_DF:
return src.df;
default:
unreachable("Invalid float type.");
}
}
static bool
cmod_propagate_cmp_to_add(const intel_device_info *devinfo, brw_inst *inst)
{
@@ -116,17 +137,31 @@ cmod_propagate_cmp_to_add(const intel_device_info *devinfo, brw_inst *inst)
* For negative values:
* (sat(x) > 0) == (x > 0) --- false
* (sat(x) <= 0) == (x <= 0) --- true
*
* Except for the x = NaN cases. sat(NaN) is 0, so add.sat.le of a
* NaN result will be true. add.sat.g of a NaN result is false, so
* the optimization is also incorrect when the second source of the
* comparison is less than zero. All of the fsat(x) > is_negative
* cases should have been eliminated in NIR.
*/
const enum brw_conditional_mod cond =
negate ? brw_swap_cmod(inst->conditional_mod)
: inst->conditional_mod;
if (scan_inst->saturate &&
(brw_type_is_float(scan_inst->dst.type) ||
brw_type_is_uint(scan_inst->dst.type)) &&
(cond != BRW_CONDITIONAL_G &&
cond != BRW_CONDITIONAL_LE))
goto not_match;
if (scan_inst->saturate) {
if (cond != BRW_CONDITIONAL_G)
goto not_match;
if (inst->src[1].file != IMM)
goto not_match;
double v = src_as_float(inst->src[1]);
if (negate)
v = -v;
if (v < 0.0)
goto not_match;
}
/* Otherwise, try propagating the conditional. */
if (scan_inst->can_do_cmod() &&

View File

@@ -18,6 +18,11 @@ protected:
enum brw_reg_type add_type,
enum brw_reg_type op_type,
bool expected_cmod_prop_progress);
void test_cmp_to_add_sat(enum brw_conditional_mod before,
bool add_negative_src0,
bool add_negative_constant,
bool expected_cmod_prop_progress);
};
TEST_F(cmod_propagation_test, basic)
@@ -2202,63 +2207,181 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_read)
EXPECT_SHADERS_MATCH(bld, exp);
}
TEST_F(cmod_propagation_test, cmp_to_add_float_e)
{
brw_builder bld = make_shader();
brw_reg dest = bld.vgrf(BRW_TYPE_F);
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
brw_reg neg10(brw_imm_f(-10.0f));
brw_reg pos10(brw_imm_f(10.0f));
bld.ADD(dest, src0, neg10)->saturate = true;
bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_EQ);
EXPECT_NO_PROGRESS(brw_opt_cmod_propagation, bld);
}
TEST_F(cmod_propagation_test, cmp_to_add_float_g)
void
cmod_propagation_test::test_cmp_to_add_sat(enum brw_conditional_mod before,
bool add_negative_src0,
bool add_negative_constant,
bool expected_cmod_prop_progress)
{
brw_builder bld = make_shader();
brw_builder exp = make_shader();
brw_reg dest = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg neg10 = brw_imm_f(-10.0f);
brw_reg pos10 = brw_imm_f(10.0f);
brw_reg neg = brw_imm_f(-0.5f);
brw_reg pos = brw_imm_f(0.5f);
bld.ADD(dest, src0, neg10)->saturate = true;
bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_G);
bld.ADD(dest,
add_negative_src0 ? negate(src0) : src0,
add_negative_constant ? neg : pos)
->saturate = true;
EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
/* The parity of negations between the ADD and the CMP must be
* different. Otherwise the ADD and the CMP aren't performing the same
* arithmetic, and the optimization won't trigger.
*/
const bool cmp_negative_constant =
add_negative_constant == add_negative_src0;
brw_inst *add = exp.ADD(dest, src0, neg10);
add->saturate = true;
add->conditional_mod = BRW_CONDITIONAL_G;
bld.CMP(bld.null_reg_f(),
src0,
cmp_negative_constant ? neg : pos,
before);
EXPECT_SHADERS_MATCH(bld, exp);
EXPECT_PROGRESS_RESULT(expected_cmod_prop_progress,
brw_opt_cmod_propagation, bld);
if (expected_cmod_prop_progress) {
const enum brw_conditional_mod after =
add_negative_src0 ? brw_swap_cmod(before) : before;
brw_inst *add = exp.ADD(dest,
add_negative_src0 ? negate(src0) : src0,
add_negative_constant ? neg : pos);
add->saturate = true;
add->conditional_mod = after;
EXPECT_SHADERS_MATCH(bld, exp);
}
}
TEST_F(cmod_propagation_test, cmp_to_add_float_le)
TEST_F(cmod_propagation_test, cmp_g_to_add_src0_neg_constant)
{
brw_builder bld = make_shader();
brw_builder exp = make_shader();
/* This works even if src0 is NaN. (NaN > 0.5) is false, and (0.0 > 0.5) is
* false.
*/
test_cmp_to_add_sat(BRW_CONDITIONAL_G, false, true, true);
}
brw_reg dest = vgrf(bld, exp, BRW_TYPE_F);
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F);
brw_reg neg10 = brw_imm_f(-10.0f);
brw_reg pos10 = brw_imm_f(10.0f);
TEST_F(cmod_propagation_test, cmp_g_to_add_src0_pos_constant)
{
/* This fails if src0 is NaN. (NaN > -0.5) is false, but (0.0 > -0.5) is
* true.
*/
test_cmp_to_add_sat(BRW_CONDITIONAL_G, false, false, false);
}
bld.ADD(dest, src0, neg10)->saturate = true;
bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_LE);
TEST_F(cmod_propagation_test, cmp_g_to_add_neg_src0_neg_constant)
{
/* This is effectively the same as cmp_l_to_add_src0_neg_constant. */
test_cmp_to_add_sat(BRW_CONDITIONAL_G, true, true, false);
}
EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
TEST_F(cmod_propagation_test, cmp_g_to_add_neg_src0_pos_constant)
{
/* This is effectively the same as cmp_l_to_add_src0_pos_constant. */
test_cmp_to_add_sat(BRW_CONDITIONAL_G, true, false, false);
}
brw_inst *add = exp.ADD(dest, src0, neg10);
add->saturate = true;
add->conditional_mod = BRW_CONDITIONAL_LE;
TEST_F(cmod_propagation_test, cmp_l_to_add_src0_neg_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_L, false, true, false);
}
EXPECT_SHADERS_MATCH(bld, exp);
TEST_F(cmod_propagation_test, cmp_l_to_add_src0_pos_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_L, false, false, false);
}
TEST_F(cmod_propagation_test, cmp_l_to_add_neg_src0_neg_constant)
{
/* This is effectively the same as cmp_g_to_add_src0_neg_constant. */
test_cmp_to_add_sat(BRW_CONDITIONAL_L, true, true, true);
}
TEST_F(cmod_propagation_test, cmp_l_to_add_neg_src0_pos_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_L, true, false, false);
}
TEST_F(cmod_propagation_test, cmp_le_to_add_src0_neg_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_LE, false, true, false);
}
TEST_F(cmod_propagation_test, cmp_le_to_add_src0_pos_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_LE, false, false, false);
}
TEST_F(cmod_propagation_test, cmp_le_to_add_neg_src0_neg_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_LE, true, true, false);
}
TEST_F(cmod_propagation_test, cmp_le_to_add_neg_src0_pos_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_LE, true, false, false);
}
TEST_F(cmod_propagation_test, cmp_ge_to_add_src0_neg_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_GE, false, true, false);
}
TEST_F(cmod_propagation_test, cmp_ge_to_add_src0_pos_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_GE, false, false, false);
}
TEST_F(cmod_propagation_test, cmp_ge_to_add_neg_src0_neg_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_GE, true, true, false);
}
TEST_F(cmod_propagation_test, cmp_ge_to_add_neg_src0_pos_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_GE, true, false, false);
}
TEST_F(cmod_propagation_test, cmp_nz_to_add_src0_neg_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, false, true, false);
}
TEST_F(cmod_propagation_test, cmp_nz_to_add_src0_pos_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, false, false, false);
}
TEST_F(cmod_propagation_test, cmp_nz_to_add_neg_src0_neg_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, true, true, false);
}
TEST_F(cmod_propagation_test, cmp_nz_to_add_neg_src0_pos_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, true, false, false);
}
TEST_F(cmod_propagation_test, cmp_z_to_add_src0_neg_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_Z, false, true, false);
}
TEST_F(cmod_propagation_test, cmp_z_to_add_src0_pos_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_Z, false, false, false);
}
TEST_F(cmod_propagation_test, cmp_z_to_add_neg_src0_neg_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_Z, true, true, false);
}
TEST_F(cmod_propagation_test, cmp_z_to_add_neg_src0_pos_constant)
{
test_cmp_to_add_sat(BRW_CONDITIONAL_Z, true, false, false);
}
TEST_F(cmod_propagation_test, prop_across_sel)