brw/cmod: Fix some errors when propagating from CMP to ADD.SAT
When I originally wrote that code, I didn't understand what a jerk NaN
can be.
v2: Remove the brw_type_is_uint stuff. This function is currently only
called for float types. In a later commit, integer types will be
supported but only for NZ and Z conditions. Noticed by Matt.
shader-db:
All Intel platforms had similar results. (Lunar Lake shown)
total instructions in shared programs: 17122197 -> 17122204 (<.01%)
instructions in affected programs: 1691 -> 1698 (0.41%)
helped: 0 / HURT: 4
total cycles in shared programs: 895602484 -> 895602370 (<.01%)
cycles in affected programs: 912964 -> 912850 (-0.01%)
helped: 2 / HURT: 2
fossil-db:
All Intel platforms had similar results. (Lunar Lake shown)
Totals:
Instrs: 210736388 -> 210736518 (+0.00%)
Cycle count: 30377728900 -> 30377733040 (+0.00%); split: -0.00%, +0.00%
Totals from 130 (0.02% of 706776) affected shaders:
Instrs: 169911 -> 170041 (+0.08%)
Cycle count: 18021210 -> 18025350 (+0.02%); split: -0.00%, +0.02%
Reviewed-by: Matt Turner <mattst88@gmail.com>
Fixes: 020b0055e7
("i965/fs: Propagate conditional modifiers from compares to adds")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34509>
(cherry picked from commit 0dab520a19e323237e415210a70cb21b30512386)
This commit is contained in:

committed by
Eric Engestrom

parent
d14c0562c6
commit
aa08dfbad4
@@ -1054,7 +1054,7 @@
|
||||
"description": "brw/cmod: Fix some errors when propagating from CMP to ADD.SAT",
|
||||
"nominated": true,
|
||||
"nomination_type": 2,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": "020b0055e7a085a6a8c961ad12ce94e58606a1ae",
|
||||
"notes": null
|
||||
|
@@ -24,6 +24,7 @@
|
||||
#include "brw_shader.h"
|
||||
#include "brw_cfg.h"
|
||||
#include "brw_eu.h"
|
||||
#include "util/half_float.h"
|
||||
|
||||
/** @file
|
||||
*
|
||||
@@ -48,6 +49,26 @@
|
||||
* exists and therefore remove the instruction.
|
||||
*/
|
||||
|
||||
static double
|
||||
src_as_float(const brw_reg &src)
|
||||
{
|
||||
assert(src.file == IMM);
|
||||
|
||||
switch (src.type) {
|
||||
case BRW_TYPE_HF:
|
||||
return _mesa_half_to_float((uint16_t)src.d);
|
||||
|
||||
case BRW_TYPE_F:
|
||||
return src.f;
|
||||
|
||||
case BRW_TYPE_DF:
|
||||
return src.df;
|
||||
|
||||
default:
|
||||
unreachable("Invalid float type.");
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
cmod_propagate_cmp_to_add(const intel_device_info *devinfo, brw_inst *inst)
|
||||
{
|
||||
@@ -116,17 +137,31 @@ cmod_propagate_cmp_to_add(const intel_device_info *devinfo, brw_inst *inst)
|
||||
* For negative values:
|
||||
* (sat(x) > 0) == (x > 0) --- false
|
||||
* (sat(x) <= 0) == (x <= 0) --- true
|
||||
*
|
||||
* Except for the x = NaN cases. sat(NaN) is 0, so add.sat.le of a
|
||||
* NaN result will be true. add.sat.g of a NaN result is false, so
|
||||
* the optimization is also incorrect when the second source of the
|
||||
* comparison is less than zero. All of the fsat(x) > is_negative
|
||||
* cases should have been eliminated in NIR.
|
||||
*/
|
||||
const enum brw_conditional_mod cond =
|
||||
negate ? brw_swap_cmod(inst->conditional_mod)
|
||||
: inst->conditional_mod;
|
||||
|
||||
if (scan_inst->saturate &&
|
||||
(brw_type_is_float(scan_inst->dst.type) ||
|
||||
brw_type_is_uint(scan_inst->dst.type)) &&
|
||||
(cond != BRW_CONDITIONAL_G &&
|
||||
cond != BRW_CONDITIONAL_LE))
|
||||
goto not_match;
|
||||
if (scan_inst->saturate) {
|
||||
if (cond != BRW_CONDITIONAL_G)
|
||||
goto not_match;
|
||||
|
||||
if (inst->src[1].file != IMM)
|
||||
goto not_match;
|
||||
|
||||
double v = src_as_float(inst->src[1]);
|
||||
if (negate)
|
||||
v = -v;
|
||||
|
||||
if (v < 0.0)
|
||||
goto not_match;
|
||||
}
|
||||
|
||||
/* Otherwise, try propagating the conditional. */
|
||||
if (scan_inst->can_do_cmod() &&
|
||||
|
@@ -18,6 +18,11 @@ protected:
|
||||
enum brw_reg_type add_type,
|
||||
enum brw_reg_type op_type,
|
||||
bool expected_cmod_prop_progress);
|
||||
|
||||
void test_cmp_to_add_sat(enum brw_conditional_mod before,
|
||||
bool add_negative_src0,
|
||||
bool add_negative_constant,
|
||||
bool expected_cmod_prop_progress);
|
||||
};
|
||||
|
||||
TEST_F(cmod_propagation_test, basic)
|
||||
@@ -2202,63 +2207,181 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_read)
|
||||
EXPECT_SHADERS_MATCH(bld, exp);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_to_add_float_e)
|
||||
{
|
||||
brw_builder bld = make_shader();
|
||||
|
||||
brw_reg dest = bld.vgrf(BRW_TYPE_F);
|
||||
brw_reg src0 = bld.vgrf(BRW_TYPE_F);
|
||||
brw_reg neg10(brw_imm_f(-10.0f));
|
||||
brw_reg pos10(brw_imm_f(10.0f));
|
||||
|
||||
bld.ADD(dest, src0, neg10)->saturate = true;
|
||||
bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_EQ);
|
||||
|
||||
EXPECT_NO_PROGRESS(brw_opt_cmod_propagation, bld);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_to_add_float_g)
|
||||
void
|
||||
cmod_propagation_test::test_cmp_to_add_sat(enum brw_conditional_mod before,
|
||||
bool add_negative_src0,
|
||||
bool add_negative_constant,
|
||||
bool expected_cmod_prop_progress)
|
||||
{
|
||||
brw_builder bld = make_shader();
|
||||
brw_builder exp = make_shader();
|
||||
|
||||
brw_reg dest = vgrf(bld, exp, BRW_TYPE_F);
|
||||
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F);
|
||||
brw_reg neg10 = brw_imm_f(-10.0f);
|
||||
brw_reg pos10 = brw_imm_f(10.0f);
|
||||
brw_reg neg = brw_imm_f(-0.5f);
|
||||
brw_reg pos = brw_imm_f(0.5f);
|
||||
|
||||
bld.ADD(dest, src0, neg10)->saturate = true;
|
||||
bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_G);
|
||||
bld.ADD(dest,
|
||||
add_negative_src0 ? negate(src0) : src0,
|
||||
add_negative_constant ? neg : pos)
|
||||
->saturate = true;
|
||||
|
||||
EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
|
||||
/* The parity of negations between the ADD and the CMP must be
|
||||
* different. Otherwise the ADD and the CMP aren't performing the same
|
||||
* arithmetic, and the optimization won't trigger.
|
||||
*/
|
||||
const bool cmp_negative_constant =
|
||||
add_negative_constant == add_negative_src0;
|
||||
|
||||
brw_inst *add = exp.ADD(dest, src0, neg10);
|
||||
add->saturate = true;
|
||||
add->conditional_mod = BRW_CONDITIONAL_G;
|
||||
bld.CMP(bld.null_reg_f(),
|
||||
src0,
|
||||
cmp_negative_constant ? neg : pos,
|
||||
before);
|
||||
|
||||
EXPECT_SHADERS_MATCH(bld, exp);
|
||||
EXPECT_PROGRESS_RESULT(expected_cmod_prop_progress,
|
||||
brw_opt_cmod_propagation, bld);
|
||||
|
||||
if (expected_cmod_prop_progress) {
|
||||
const enum brw_conditional_mod after =
|
||||
add_negative_src0 ? brw_swap_cmod(before) : before;
|
||||
|
||||
brw_inst *add = exp.ADD(dest,
|
||||
add_negative_src0 ? negate(src0) : src0,
|
||||
add_negative_constant ? neg : pos);
|
||||
add->saturate = true;
|
||||
add->conditional_mod = after;
|
||||
|
||||
EXPECT_SHADERS_MATCH(bld, exp);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_to_add_float_le)
|
||||
TEST_F(cmod_propagation_test, cmp_g_to_add_src0_neg_constant)
|
||||
{
|
||||
brw_builder bld = make_shader();
|
||||
brw_builder exp = make_shader();
|
||||
/* This works even if src0 is NaN. (NaN > 0.5) is false, and (0.0 > 0.5) is
|
||||
* false.
|
||||
*/
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_G, false, true, true);
|
||||
}
|
||||
|
||||
brw_reg dest = vgrf(bld, exp, BRW_TYPE_F);
|
||||
brw_reg src0 = vgrf(bld, exp, BRW_TYPE_F);
|
||||
brw_reg neg10 = brw_imm_f(-10.0f);
|
||||
brw_reg pos10 = brw_imm_f(10.0f);
|
||||
TEST_F(cmod_propagation_test, cmp_g_to_add_src0_pos_constant)
|
||||
{
|
||||
/* This fails if src0 is NaN. (NaN > -0.5) is false, but (0.0 > -0.5) is
|
||||
* true.
|
||||
*/
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_G, false, false, false);
|
||||
}
|
||||
|
||||
bld.ADD(dest, src0, neg10)->saturate = true;
|
||||
bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_LE);
|
||||
TEST_F(cmod_propagation_test, cmp_g_to_add_neg_src0_neg_constant)
|
||||
{
|
||||
/* This is effectively the same as cmp_l_to_add_src0_neg_constant. */
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_G, true, true, false);
|
||||
}
|
||||
|
||||
EXPECT_PROGRESS(brw_opt_cmod_propagation, bld);
|
||||
TEST_F(cmod_propagation_test, cmp_g_to_add_neg_src0_pos_constant)
|
||||
{
|
||||
/* This is effectively the same as cmp_l_to_add_src0_pos_constant. */
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_G, true, false, false);
|
||||
}
|
||||
|
||||
brw_inst *add = exp.ADD(dest, src0, neg10);
|
||||
add->saturate = true;
|
||||
add->conditional_mod = BRW_CONDITIONAL_LE;
|
||||
TEST_F(cmod_propagation_test, cmp_l_to_add_src0_neg_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_L, false, true, false);
|
||||
}
|
||||
|
||||
EXPECT_SHADERS_MATCH(bld, exp);
|
||||
TEST_F(cmod_propagation_test, cmp_l_to_add_src0_pos_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_L, false, false, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_l_to_add_neg_src0_neg_constant)
|
||||
{
|
||||
/* This is effectively the same as cmp_g_to_add_src0_neg_constant. */
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_L, true, true, true);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_l_to_add_neg_src0_pos_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_L, true, false, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_le_to_add_src0_neg_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_LE, false, true, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_le_to_add_src0_pos_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_LE, false, false, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_le_to_add_neg_src0_neg_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_LE, true, true, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_le_to_add_neg_src0_pos_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_LE, true, false, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_ge_to_add_src0_neg_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_GE, false, true, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_ge_to_add_src0_pos_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_GE, false, false, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_ge_to_add_neg_src0_neg_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_GE, true, true, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_ge_to_add_neg_src0_pos_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_GE, true, false, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_nz_to_add_src0_neg_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, false, true, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_nz_to_add_src0_pos_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, false, false, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_nz_to_add_neg_src0_neg_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, true, true, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_nz_to_add_neg_src0_pos_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_NZ, true, false, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_z_to_add_src0_neg_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_Z, false, true, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_z_to_add_src0_pos_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_Z, false, false, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_z_to_add_neg_src0_neg_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_Z, true, true, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, cmp_z_to_add_neg_src0_pos_constant)
|
||||
{
|
||||
test_cmp_to_add_sat(BRW_CONDITIONAL_Z, true, false, false);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, prop_across_sel)
|
||||
|
Reference in New Issue
Block a user