i965/fs: Allow saturate propagation to propagate negations into MULs.

Allows us to transform

   mul      res  src0   src1
   mov.sat  dst  -res

into

   mul.sat  dst  src0  -src1

instructions in affected programs: 45246 -> 45054 (-0.42%)
helped: 162

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
This commit is contained in:
Matt Turner
2015-01-27 22:46:22 -08:00
parent 1567da1e28
commit 7b6113bc2d
2 changed files with 137 additions and 3 deletions

View File

@@ -56,8 +56,7 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
inst->dst.file != VGRF ||
inst->dst.type != inst->src[0].type ||
inst->src[0].file != VGRF ||
inst->src[0].abs ||
inst->src[0].negate)
inst->src[0].abs)
continue;
int src_var = v->live_intervals->var_from_reg(inst->src[0]);
@@ -82,6 +81,16 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
scan_inst->src[i].type = inst->dst.type;
}
}
if (inst->src[0].negate) {
if (scan_inst->opcode == BRW_OPCODE_MUL) {
scan_inst->src[0].negate = !scan_inst->src[0].negate;
inst->src[0].negate = false;
} else {
break;
}
}
scan_inst->saturate = true;
inst->saturate = false;
progress = true;
@@ -96,7 +105,9 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
if (scan_inst->opcode != BRW_OPCODE_MOV ||
!scan_inst->saturate ||
scan_inst->src[0].abs ||
scan_inst->src[0].negate) {
scan_inst->src[0].negate ||
scan_inst->src[0].abs != inst->src[0].abs ||
scan_inst->src[0].negate != inst->src[0].negate) {
interfered = true;
break;
}

View File

@@ -244,6 +244,129 @@ TEST_F(saturate_propagation_test, neg_mov_sat)
EXPECT_TRUE(instruction(block0, 1)->saturate);
}
TEST_F(saturate_propagation_test, mul_neg_mov_sat)
{
const fs_builder &bld = v->bld;
fs_reg dst0 = v->vgrf(glsl_type::float_type);
fs_reg dst1 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
bld.MUL(dst0, src0, src1);
dst0.negate = true;
set_saturate(true, bld.MOV(dst1, dst0));
/* = Before =
*
* 0: mul(8) dst0 src0 src1
* 1: mov.sat(8) dst1 -dst0
*
* = After =
* 0: mul.sat(8) dst0 src0 -src1
* 1: mov(8) dst1 dst0
*/
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_TRUE(saturate_propagation(v));
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
EXPECT_TRUE(instruction(block0, 0)->saturate);
EXPECT_TRUE(instruction(block0, 0)->src[0].negate);
EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode);
EXPECT_FALSE(instruction(block0, 1)->saturate);
EXPECT_FALSE(instruction(block0, 1)->src[0].negate);
}
TEST_F(saturate_propagation_test, mul_mov_sat_neg_mov_sat)
{
const fs_builder &bld = v->bld;
fs_reg dst0 = v->vgrf(glsl_type::float_type);
fs_reg dst1 = v->vgrf(glsl_type::float_type);
fs_reg dst2 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
bld.MUL(dst0, src0, src1);
set_saturate(true, bld.MOV(dst1, dst0));
dst0.negate = true;
set_saturate(true, bld.MOV(dst2, dst0));
/* = Before =
*
* 0: mul(8) dst0 src0 src1
* 1: mov.sat(8) dst1 dst0
* 2: mov.sat(8) dst2 -dst0
*
* = After =
* (no changes)
*/
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(2, block0->end_ip);
EXPECT_FALSE(saturate_propagation(v));
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(2, block0->end_ip);
EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
EXPECT_FALSE(instruction(block0, 0)->saturate);
EXPECT_FALSE(instruction(block0, 0)->src[1].negate);
EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode);
EXPECT_TRUE(instruction(block0, 1)->saturate);
EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 2)->opcode);
EXPECT_TRUE(instruction(block0, 2)->src[0].negate);
EXPECT_TRUE(instruction(block0, 2)->saturate);
}
TEST_F(saturate_propagation_test, mul_neg_mov_sat_neg_mov_sat)
{
const fs_builder &bld = v->bld;
fs_reg dst0 = v->vgrf(glsl_type::float_type);
fs_reg dst1 = v->vgrf(glsl_type::float_type);
fs_reg dst2 = v->vgrf(glsl_type::float_type);
fs_reg src0 = v->vgrf(glsl_type::float_type);
fs_reg src1 = v->vgrf(glsl_type::float_type);
bld.MUL(dst0, src0, src1);
dst0.negate = true;
set_saturate(true, bld.MOV(dst1, dst0));
set_saturate(true, bld.MOV(dst2, dst0));
/* = Before =
*
* 0: mul(8) dst0 src0 src1
* 1: mov.sat(8) dst1 -dst0
* 2: mov.sat(8) dst2 -dst0
*
* = After =
* (no changes)
*/
v->calculate_cfg();
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(2, block0->end_ip);
EXPECT_FALSE(saturate_propagation(v));
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(2, block0->end_ip);
EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
EXPECT_FALSE(instruction(block0, 0)->saturate);
EXPECT_FALSE(instruction(block0, 0)->src[1].negate);
EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode);
EXPECT_TRUE(instruction(block0, 1)->src[0].negate);
EXPECT_TRUE(instruction(block0, 1)->saturate);
EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 2)->opcode);
EXPECT_TRUE(instruction(block0, 2)->src[0].negate);
EXPECT_TRUE(instruction(block0, 2)->saturate);
}
TEST_F(saturate_propagation_test, abs_mov_sat)
{
const fs_builder &bld = v->bld;