intel/fs: Fix flag_subreg handling in cmod propagation

There were two errors.  First, the pass could propagate conditional
modifiers from an instruction that writes on flag register to an
instruction that writes a different flag register.  For example,

    cmp.nz.f0.0(16) null:F, vgrf6:F, vgrf5:F
    cmp.nz.f0.1(16) null:F, vgrf6:F, vgrf5:F

could be come

    cmp.nz.f0.0(16) null:F, vgrf6:F, vgrf5:F

Second, if an instruction writes f0.1 has it's condition propagated, the
modified instruction will incorrectly write flag f0.0.  For example,

    linterp(16) vgrf6:F, g2:F, attr0:F
    cmp.z.f0.1(16) null:F, vgrf6:F, vgrf5:F
    (-f0.1) discard_jump(16) (null):UD

could become

    linterp.z.f0.0(16) vgrf6:F, g2:F, attr0:F
    (-f0.1) discard_jump(16) (null):UD

None of these cases will occur currently.  The only time we use f0.1 is
for generating discard intrinsics.  In all those cases, we generate a
squence like:

    cmp.nz.f0.0(16) vgrf7:F, vgrf6:F, vgrf5:F
    (+f0.1) cmp.z(16) null:D, vgrf7:D, 0d
    (-f0.1) discard_jump(16) (null):UD

Due to the mixed types and incompatible conditions, this sequence would
never see any cmod propagation.  The next patch will change this.

No shader-db changes on any Intel platform.

v2: Fix typo in comment in test case subtract_delete_compare_other_flag.
Noticed by Caio.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
Ian Romanick
2019-05-22 10:18:06 -07:00
parent 2dd6013933
commit 8030cb75c1
2 changed files with 196 additions and 0 deletions

View File

@@ -53,6 +53,7 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block,
fs_inst *inst) fs_inst *inst)
{ {
bool read_flag = false; bool read_flag = false;
const unsigned flags_written = inst->flags_written();
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (scan_inst->opcode == BRW_OPCODE_ADD && if (scan_inst->opcode == BRW_OPCODE_ADD &&
@@ -79,6 +80,17 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block,
goto not_match; goto not_match;
} }
/* If the scan instruction writes a different flag register than the
* instruction we're trying to propagate from, bail.
*
* FINISHME: The second part of the condition may be too strong.
* Perhaps (scan_inst->flags_written() & flags_written) !=
* flags_written?
*/
if (scan_inst->flags_written() != 0 &&
scan_inst->flags_written() != flags_written)
goto not_match;
/* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods": /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods":
* *
* * Note that the [post condition signal] bits generated at * * Note that the [post condition signal] bits generated at
@@ -130,6 +142,7 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block,
{ {
const enum brw_conditional_mod cond = brw_negate_cmod(inst->conditional_mod); const enum brw_conditional_mod cond = brw_negate_cmod(inst->conditional_mod);
bool read_flag = false; bool read_flag = false;
const unsigned flags_written = inst->flags_written();
if (cond != BRW_CONDITIONAL_Z && cond != BRW_CONDITIONAL_NZ) if (cond != BRW_CONDITIONAL_Z && cond != BRW_CONDITIONAL_NZ)
return false; return false;
@@ -146,6 +159,17 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block,
scan_inst->exec_size != inst->exec_size) scan_inst->exec_size != inst->exec_size)
break; break;
/* If the scan instruction writes a different flag register than the
* instruction we're trying to propagate from, bail.
*
* FINISHME: The second part of the condition may be too strong.
* Perhaps (scan_inst->flags_written() & flags_written) !=
* flags_written?
*/
if (scan_inst->flags_written() != 0 &&
scan_inst->flags_written() != flags_written)
break;
if (scan_inst->can_do_cmod() && if (scan_inst->can_do_cmod() &&
((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
scan_inst->conditional_mod == cond)) { scan_inst->conditional_mod == cond)) {
@@ -231,9 +255,21 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
} }
bool read_flag = false; bool read_flag = false;
const unsigned flags_written = inst->flags_written();
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
if (regions_overlap(scan_inst->dst, scan_inst->size_written, if (regions_overlap(scan_inst->dst, scan_inst->size_written,
inst->src[0], inst->size_read(0))) { inst->src[0], inst->size_read(0))) {
/* If the scan instruction writes a different flag register than
* the instruction we're trying to propagate from, bail.
*
* FINISHME: The second part of the condition may be too strong.
* Perhaps (scan_inst->flags_written() & flags_written) !=
* flags_written?
*/
if (scan_inst->flags_written() != 0 &&
scan_inst->flags_written() != flags_written)
break;
if (scan_inst->is_partial_write() || if (scan_inst->is_partial_write() ||
scan_inst->dst.offset != inst->src[0].offset || scan_inst->dst.offset != inst->src[0].offset ||
scan_inst->exec_size != inst->exec_size) scan_inst->exec_size != inst->exec_size)
@@ -380,6 +416,7 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
scan_inst->conditional_mod == cond)) { scan_inst->conditional_mod == cond)) {
scan_inst->conditional_mod = cond; scan_inst->conditional_mod = cond;
scan_inst->flag_subreg = inst->flag_subreg;
inst->remove(block); inst->remove(block);
progress = true; progress = true;
} }

View File

@@ -140,6 +140,40 @@ TEST_F(cmod_propagation_test, basic)
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
} }
TEST_F(cmod_propagation_test, basic_other_flag)
{
const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg zero(brw_imm_f(0.0f));
bld.ADD(dest, src0, src1);
bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE)
->flag_subreg = 1;
/* = Before =
*
* 0: add(8) dest src0 src1
* 1: cmp.ge.f0.1(8) null dest 0.0f
*
* = After =
* 0: add.ge.f0.1(8) dest src0 src1
*/
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_TRUE(cmod_propagation(v));
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(0, block0->end_ip);
EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
EXPECT_EQ(1, instruction(block0, 0)->flag_subreg);
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
}
TEST_F(cmod_propagation_test, cmp_nonzero) TEST_F(cmod_propagation_test, cmp_nonzero)
{ {
const fs_builder &bld = v->bld; const fs_builder &bld = v->bld;
@@ -864,6 +898,84 @@ TEST_F(cmod_propagation_test, subtract_delete_compare)
EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
} }
TEST_F(cmod_propagation_test, subtract_delete_compare_other_flag)
{
/* This test is the same as subtract_delete_compare but it explicitly used
* flag f0.1 for the subtraction and the comparison.
*/
const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::float_type);
fs_reg dest1 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
fs_reg src2 = v->vgrf(glsl_type::float_type);
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1)))
->flag_subreg = 1;
set_predicate(BRW_PREDICATE_NORMAL, bld.MOV(dest1, src2));
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L)
->flag_subreg = 1;
/* = Before =
* 0: add.l.f0.1(8) dest0:F src0:F -src1:F
* 1: (+f0) mov(0) dest1:F src2:F
* 2: cmp.l.f0.1(8) null:F src0:F src1:F
*
* = After =
* 0: add.l.f0.1(8) dest:F src0:F -src1:F
* 1: (+f0) mov(0) dest1:F src2:F
*/
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(2, block0->end_ip);
EXPECT_TRUE(cmod_propagation(v));
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
EXPECT_EQ(1, instruction(block0, 0)->flag_subreg);
EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode);
EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
}
TEST_F(cmod_propagation_test, subtract_to_mismatch_flag)
{
const fs_builder &bld = v->bld;
fs_reg dest = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1)));
bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L)
->flag_subreg = 1;
/* = Before =
* 0: add.l.f0(8) dest0:F src0:F -src1:F
* 1: cmp.l.f0.1(8) null:F src0:F src1:F
*
* = After =
* No changes
*/
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_FALSE(cmod_propagation(v));
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
EXPECT_EQ(0, instruction(block0, 0)->flag_subreg);
EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 1)->conditional_mod);
EXPECT_EQ(1, instruction(block0, 1)->flag_subreg);
}
TEST_F(cmod_propagation_test, subtract_delete_compare_derp) TEST_F(cmod_propagation_test, subtract_delete_compare_derp)
{ {
const fs_builder &bld = v->bld; const fs_builder &bld = v->bld;
@@ -1643,6 +1755,53 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_flag_read_compatible_value)
EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
} }
TEST_F(cmod_propagation_test,
not_to_or_intervening_flag_read_compatible_value_mismatch_flag)
{
/* Exercise propagation of conditional modifier from a NOT instruction to
* another ALU instruction as performed by cmod_propagate_not.
*/
const fs_builder &bld = v->bld;
fs_reg dest0 = v->vgrf(glsl_type::uint_type);
fs_reg dest1 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::uint_type);
fs_reg src1 = v->vgrf(glsl_type::uint_type);
fs_reg src2 = v->vgrf(glsl_type::float_type);
fs_reg zero(brw_imm_f(0.0f));
set_condmod(BRW_CONDITIONAL_Z, bld.OR(dest0, src0, src1))
->flag_subreg = 1;
set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero));
set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest0));
/* = Before =
*
* 0: or.z.f0.1(8) dest0 src0 src1
* 1: (+f0) sel(8) dest1 src2 0.0f
* 2: not.nz.f0(8) null dest0
*
* = After =
* No changes
*/
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(2, block0->end_ip);
EXPECT_FALSE(cmod_propagation(v));
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(2, block0->end_ip);
EXPECT_EQ(BRW_OPCODE_OR, instruction(block0, 0)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_Z, instruction(block0, 0)->conditional_mod);
EXPECT_EQ(1, instruction(block0, 0)->flag_subreg);
EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
EXPECT_EQ(BRW_OPCODE_NOT, instruction(block0, 2)->opcode);
EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 2)->conditional_mod);
EXPECT_EQ(0, instruction(block0, 2)->flag_subreg);
}
TEST_F(cmod_propagation_test, not_to_or_intervening_flag_read_incompatible_value) TEST_F(cmod_propagation_test, not_to_or_intervening_flag_read_incompatible_value)
{ {
/* Exercise propagation of conditional modifier from a NOT instruction to /* Exercise propagation of conditional modifier from a NOT instruction to