intel/fs: Allow cmod propagation to instructions with saturate modifier

v2: Add unit tests.  Suggested by Matt.

All Intel GPUs had similar results. (Ice Lake shown)
total instructions in shared programs: 17229441 -> 17228658 (<.01%)
instructions in affected programs: 159574 -> 158791 (-0.49%)
helped: 489
HURT: 0
helped stats (abs) min: 1 max: 5 x̄: 1.60 x̃: 1
helped stats (rel) min: 0.07% max: 2.70% x̄: 0.61% x̃: 0.59%
95% mean confidence interval for instructions value: -1.72 -1.48
95% mean confidence interval for instructions %-change: -0.64% -0.58%
Instructions are helped.

total cycles in shared programs: 360944149 -> 360937144 (<.01%)
cycles in affected programs: 1072195 -> 1065190 (-0.65%)
helped: 254
HURT: 27
helped stats (abs) min: 2 max: 234 x̄: 30.51 x̃: 9
helped stats (rel) min: 0.04% max: 8.99% x̄: 0.75% x̃: 0.24%
HURT stats (abs)   min: 2 max: 83 x̄: 27.56 x̃: 24
HURT stats (rel)   min: 0.09% max: 3.79% x̄: 1.28% x̃: 1.16%
95% mean confidence interval for cycles value: -30.11 -19.75
95% mean confidence interval for cycles %-change: -0.70% -0.41%
Cycles are helped.

Reviewed-by: Matt Turner <mattst88@gmail.com> [v1]
This commit is contained in:
Ian Romanick
2019-03-11 15:49:26 -07:00
parent a7724b1cbb
commit a79570099b
2 changed files with 528 additions and 9 deletions

View File

@@ -313,14 +313,6 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
scan_inst->opcode == BRW_OPCODE_CMPN)
break;
/* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods":
*
* * Note that the [post condition signal] bits generated at
* the output of a compute are before the .sat.
*/
if (scan_inst->saturate)
break;
/* From the Sky Lake PRM, Vol 2a, "Multiply":
*
* "When multiplying integer data types, if one of the sources
@@ -338,11 +330,52 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
scan_inst->opcode == BRW_OPCODE_MUL)
break;
/* Otherwise, try propagating the conditional. */
enum brw_conditional_mod cond =
inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod)
: inst->conditional_mod;
/* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods":
*
* * Note that the [post condition signal] bits generated at
* the output of a compute are before the .sat.
*
* This limits the cases where we can propagate the conditional
* modifier. If scan_inst has a saturate modifier, then we can
* only propagate from inst if inst is 'scan_inst <= 0',
* 'scan_inst == 0', 'scan_inst != 0', or 'scan_inst > 0'. If
* inst is 'scan_inst == 0', the conditional modifier must be
* replace with LE. Likewise, if inst is 'scan_inst != 0', the
* conditional modifier must be replace with G.
*
* The only other cases are 'scan_inst < 0' (which is a
* contradiction) and 'scan_inst >= 0' (which is a tautology).
*/
if (scan_inst->saturate) {
if (scan_inst->dst.type != BRW_REGISTER_TYPE_F)
break;
if (cond != BRW_CONDITIONAL_Z &&
cond != BRW_CONDITIONAL_NZ &&
cond != BRW_CONDITIONAL_LE &&
cond != BRW_CONDITIONAL_G)
break;
if (inst->opcode != BRW_OPCODE_MOV &&
inst->opcode != BRW_OPCODE_CMP)
break;
/* inst->src[1].is_zero() was tested before, but be safe
* against possible future changes in this code.
*/
assert(inst->opcode != BRW_OPCODE_CMP || inst->src[1].is_zero());
if (cond == BRW_CONDITIONAL_Z)
cond = BRW_CONDITIONAL_LE;
else if (cond == BRW_CONDITIONAL_NZ)
cond = BRW_CONDITIONAL_G;
}
/* Otherwise, try propagating the conditional. */
if (scan_inst->can_do_cmod() &&
((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
scan_inst->conditional_mod == cond)) {

View File

@@ -38,6 +38,16 @@ public:
struct brw_wm_prog_data *prog_data;
struct gl_shader_program *shader_prog;
fs_visitor *v;
void test_positive_float_saturate_prop(enum brw_conditional_mod before,
enum brw_conditional_mod after,
enum opcode op);
void test_negative_float_saturate_prop(enum brw_conditional_mod before,
enum opcode op);
void test_negative_int_saturate_prop(enum brw_conditional_mod before,
enum opcode op);
};
class cmod_propagation_fs_visitor : public fs_visitor
@@ -921,3 +931,479 @@ TEST_F(cmod_propagation_test, signed_unsigned_comparison_mismatch)
EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 1)->conditional_mod);
}
void
cmod_propagation_test::test_positive_float_saturate_prop(enum brw_conditional_mod before,
enum brw_conditional_mod after,
enum opcode op)
{
const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg zero(brw_imm_f(0.0f));
bld.ADD(dest, src0, src1)->saturate = true;
assert(op == BRW_OPCODE_CMP || op == BRW_OPCODE_MOV);
if (op == BRW_OPCODE_CMP)
bld.CMP(bld.null_reg_f(), dest, zero, before);
else
bld.MOV(bld.null_reg_f(), dest)->conditional_mod = before;
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_TRUE(cmod_propagation(v));
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(0, block0->end_ip);
EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
EXPECT_TRUE(instruction(block0, 0)->saturate);
EXPECT_EQ(after, instruction(block0, 0)->conditional_mod);
}
void
cmod_propagation_test::test_negative_float_saturate_prop(enum brw_conditional_mod before,
enum opcode op)
{
const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg zero(brw_imm_f(0.0f));
bld.ADD(dest, src0, src1)->saturate = true;
assert(op == BRW_OPCODE_CMP || op == BRW_OPCODE_MOV);
if (op == BRW_OPCODE_CMP)
bld.CMP(bld.null_reg_f(), dest, zero, before);
else
bld.MOV(bld.null_reg_f(), dest)->conditional_mod = before;
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_FALSE(cmod_propagation(v));
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
EXPECT_TRUE(instruction(block0, 0)->saturate);
EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
EXPECT_EQ(op, instruction(block0, 1)->opcode);
EXPECT_FALSE(instruction(block0, 1)->saturate);
EXPECT_EQ(before, instruction(block0, 1)->conditional_mod);
}
void
cmod_propagation_test::test_negative_int_saturate_prop(enum brw_conditional_mod before,
enum opcode op)
{
const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::int_type);
fs_reg src0 = v->vgrf(glsl_type::int_type);
fs_reg src1 = v->vgrf(glsl_type::int_type);
fs_reg zero(brw_imm_d(0));
bld.ADD(dest, src0, src1)->saturate = true;
assert(op == BRW_OPCODE_CMP || op == BRW_OPCODE_MOV);
if (op == BRW_OPCODE_CMP)
bld.CMP(bld.null_reg_d(), dest, zero, before);
else
bld.MOV(bld.null_reg_d(), dest)->conditional_mod = before;
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_FALSE(cmod_propagation(v));
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
EXPECT_TRUE(instruction(block0, 0)->saturate);
EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
EXPECT_EQ(op, instruction(block0, 1)->opcode);
EXPECT_FALSE(instruction(block0, 1)->saturate);
EXPECT_EQ(before, instruction(block0, 1)->conditional_mod);
}
TEST_F(cmod_propagation_test, float_saturate_nz_cmp)
{
/* With the saturate modifier, the comparison happens before clamping to
* [0, 1]. (sat(x) != 0) == (x > 0).
*
* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: cmp.nz.f0(8) null dest 0.0f
*
* = After =
* 0: add.sat.g.f0(8) dest src0 src1
*/
test_positive_float_saturate_prop(BRW_CONDITIONAL_NZ, BRW_CONDITIONAL_G,
BRW_OPCODE_CMP);
}
TEST_F(cmod_propagation_test, float_saturate_nz_mov)
{
/* With the saturate modifier, the comparison happens before clamping to
* [0, 1]. (sat(x) != 0) == (x > 0).
*
* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: mov.nz.f0(8) null dest
*
* = After =
* 0: add.sat.g.f0(8) dest src0 src1
*/
test_positive_float_saturate_prop(BRW_CONDITIONAL_NZ, BRW_CONDITIONAL_G,
BRW_OPCODE_MOV);
}
TEST_F(cmod_propagation_test, float_saturate_z_cmp)
{
/* With the saturate modifier, the comparison happens before clamping to
* [0, 1]. (sat(x) == 0) == (x <= 0).
*
* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: cmp.z.f0(8) null dest 0.0f
*
* = After =
* 0: add.sat.le.f0(8) dest src0 src1
*/
test_positive_float_saturate_prop(BRW_CONDITIONAL_Z, BRW_CONDITIONAL_LE,
BRW_OPCODE_CMP);
}
TEST_F(cmod_propagation_test, float_saturate_z_mov)
{
/* With the saturate modifier, the comparison happens before clamping to
* [0, 1]. (sat(x) == 0) == (x <= 0).
*
* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: mov.z.f0(8) null dest
*
* = After =
* 0: add.sat.le.f0(8) dest src0 src1
*/
#if 1
/* cmod propagation bails on every MOV except MOV.NZ. */
test_negative_float_saturate_prop(BRW_CONDITIONAL_Z, BRW_OPCODE_MOV);
#else
test_positive_float_saturate_prop(BRW_CONDITIONAL_Z, BRW_CONDITIONAL_LE,
BRW_OPCODE_MOV);
#endif
}
TEST_F(cmod_propagation_test, float_saturate_g_cmp)
{
/* With the saturate modifier, the comparison happens before clamping to
* [0, 1]. (sat(x) > 0) == (x > 0).
*
* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: cmp.g.f0(8) null dest 0.0f
*
* = After =
* 0: add.sat.g.f0(8) dest src0 src1
*/
test_positive_float_saturate_prop(BRW_CONDITIONAL_G, BRW_CONDITIONAL_G,
BRW_OPCODE_CMP);
}
TEST_F(cmod_propagation_test, float_saturate_g_mov)
{
/* With the saturate modifier, the comparison happens before clamping to
* [0, 1]. (sat(x) > 0) == (x > 0).
*
* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: mov.g.f0(8) null dest
*
* = After =
* 0: add.sat.g.f0(8) dest src0 src1
*/
#if 1
/* cmod propagation bails on every MOV except MOV.NZ. */
test_negative_float_saturate_prop(BRW_CONDITIONAL_G, BRW_OPCODE_MOV);
#else
test_positive_float_saturate_prop(BRW_CONDITIONAL_G, BRW_CONDITIONAL_G,
BRW_OPCODE_MOV);
#endif
}
TEST_F(cmod_propagation_test, float_saturate_le_cmp)
{
/* With the saturate modifier, the comparison happens before clamping to
* [0, 1]. (sat(x) <= 0) == (x <= 0).
*
* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: cmp.le.f0(8) null dest 0.0f
*
* = After =
* 0: add.sat.le.f0(8) dest src0 src1
*/
test_positive_float_saturate_prop(BRW_CONDITIONAL_LE, BRW_CONDITIONAL_LE,
BRW_OPCODE_CMP);
}
TEST_F(cmod_propagation_test, float_saturate_le_mov)
{
/* With the saturate modifier, the comparison happens before clamping to
* [0, 1]. (sat(x) <= 0) == (x <= 0).
*
* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: mov.le.f0(8) null dest
*
* = After =
* 0: add.sat.le.f0(8) dest src0 src1
*/
#if 1
/* cmod propagation bails on every MOV except MOV.NZ. */
test_negative_float_saturate_prop(BRW_CONDITIONAL_LE, BRW_OPCODE_MOV);
#else
test_positive_float_saturate_prop(BRW_CONDITIONAL_LE, BRW_CONDITIONAL_LE,
BRW_OPCODE_MOV);
#endif
}
TEST_F(cmod_propagation_test, float_saturate_l_cmp)
{
/* With the saturate modifier, the comparison happens before clamping to
* [0, 1]. There is no before / after equivalence for (sat(x) < 0).
*
* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: cmp.l.f0(8) null dest 0.0f
*
* = After =
* No change
*/
test_negative_float_saturate_prop(BRW_CONDITIONAL_L, BRW_OPCODE_CMP);
}
#if 0
TEST_F(cmod_propagation_test, float_saturate_l_mov)
{
/* With the saturate modifier, the comparison happens before clamping to
* [0, 1]. There is no before / after equivalence for (sat(x) < 0).
*
* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: mov.l.f0(8) null dest 0.0f
*
* = After =
* No change
*/
test_negative_float_saturate_prop(BRW_CONDITIONAL_L, BRW_OPCODE_MOV);
}
#endif
TEST_F(cmod_propagation_test, float_saturate_ge_cmp)
{
/* With the saturate modifier, the comparison happens before clamping to
* [0, 1]. There is no before / after equivalence for (sat(x) >= 0).
*
* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: cmp.ge.f0(8) null dest 0.0f
*
* = After =
* No change
*/
test_negative_float_saturate_prop(BRW_CONDITIONAL_GE, BRW_OPCODE_CMP);
}
TEST_F(cmod_propagation_test, float_saturate_ge_mov)
{
/* With the saturate modifier, the comparison happens before clamping to
* [0, 1]. There is no before / after equivalence for (sat(x) >= 0).
*
* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: mov.ge.f0(8) null dest 0.0f
*
* = After =
* No change
*/
test_negative_float_saturate_prop(BRW_CONDITIONAL_GE, BRW_OPCODE_MOV);
}
TEST_F(cmod_propagation_test, int_saturate_nz_cmp)
{
/* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: cmp.nz.f0(8) null dest 0
*
* = After =
* No change.
*/
test_negative_int_saturate_prop(BRW_CONDITIONAL_NZ, BRW_OPCODE_CMP);
}
TEST_F(cmod_propagation_test, int_saturate_nz_mov)
{
/* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: mov.nz.f0(8) null dest
*
* = After =
* No change.
*/
test_negative_int_saturate_prop(BRW_CONDITIONAL_NZ, BRW_OPCODE_MOV);
}
TEST_F(cmod_propagation_test, int_saturate_z_cmp)
{
/* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: cmp.z.f0(8) null dest 0
*
* = After =
* No change.
*/
test_negative_int_saturate_prop(BRW_CONDITIONAL_Z, BRW_OPCODE_CMP);
}
TEST_F(cmod_propagation_test, int_saturate_z_mov)
{
/* With the saturate modifier, the comparison happens before clamping to
* [0, 1]. (sat(x) == 0) == (x <= 0).
*
* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: mov.z.f0(8) null dest
*
* = After =
* No change.
*/
test_negative_int_saturate_prop(BRW_CONDITIONAL_Z, BRW_OPCODE_MOV);
}
TEST_F(cmod_propagation_test, int_saturate_g_cmp)
{
/* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: cmp.g.f0(8) null dest 0
*
* = After =
* No change.
*/
test_negative_int_saturate_prop(BRW_CONDITIONAL_G, BRW_OPCODE_CMP);
}
TEST_F(cmod_propagation_test, int_saturate_g_mov)
{
/* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: mov.g.f0(8) null dest
*
* = After =
* No change.
*/
test_negative_int_saturate_prop(BRW_CONDITIONAL_G, BRW_OPCODE_MOV);
}
TEST_F(cmod_propagation_test, int_saturate_le_cmp)
{
/* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: cmp.le.f0(8) null dest 0
*
* = After =
* No change.
*/
test_negative_int_saturate_prop(BRW_CONDITIONAL_LE, BRW_OPCODE_CMP);
}
TEST_F(cmod_propagation_test, int_saturate_le_mov)
{
/* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: mov.le.f0(8) null dest
*
* = After =
* No change.
*/
test_negative_int_saturate_prop(BRW_CONDITIONAL_LE, BRW_OPCODE_MOV);
}
TEST_F(cmod_propagation_test, int_saturate_l_cmp)
{
/* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: cmp.l.f0(8) null dest 0
*
* = After =
* No change
*/
test_negative_int_saturate_prop(BRW_CONDITIONAL_L, BRW_OPCODE_CMP);
}
TEST_F(cmod_propagation_test, int_saturate_l_mov)
{
/* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: mov.l.f0(8) null dest 0
*
* = After =
* No change
*/
test_negative_int_saturate_prop(BRW_CONDITIONAL_L, BRW_OPCODE_MOV);
}
TEST_F(cmod_propagation_test, int_saturate_ge_cmp)
{
/* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: cmp.ge.f0(8) null dest 0
*
* = After =
* No change
*/
test_negative_int_saturate_prop(BRW_CONDITIONAL_GE, BRW_OPCODE_CMP);
}
TEST_F(cmod_propagation_test, int_saturate_ge_mov)
{
/* = Before =
*
* 0: add.sat(8) dest src0 src1
* 1: mov.ge.f0(8) null dest
*
* = After =
* No change
*/
test_negative_int_saturate_prop(BRW_CONDITIONAL_GE, BRW_OPCODE_MOV);
}