intel/brw: Copy prop from raw integer moves with mismatched types
The specific pattern from the unit test was observed in ray tracing trampoline shaders. v2: Refactor the is_raw_move tests out to a utility function. Suggested by Ken. v3: Fix a regression caused by being too picky about source modifiers. This was introduced somewhere between when I did initial shader-db runs an v2. v4: Fix typo in comment. Noticed by Caio. shader-db: All Intel platforms had similar results. (Meteor Lake shown) total instructions in shared programs: 19734086 -> 19733997 (<.01%) instructions in affected programs: 135388 -> 135299 (-0.07%) helped: 76 / HURT: 2 total cycles in shared programs: 916290451 -> 916264968 (<.01%) cycles in affected programs: 41046002 -> 41020519 (-0.06%) helped: 32 / HURT: 29 fossil-db: Meteor Lake, DG2, and Skylake had similar results. (Meteor Lake shown) Totals: Instrs: 151531355 -> 151513669 (-0.01%); split: -0.01%, +0.00% Cycle count: 17209372399 -> 17208178205 (-0.01%); split: -0.01%, +0.00% Max live registers: 32016490 -> 32016493 (+0.00%) Totals from 17361 (2.75% of 630198) affected shaders: Instrs: 2642048 -> 2624362 (-0.67%); split: -0.67%, +0.00% Cycle count: 79803066 -> 78608872 (-1.50%); split: -1.75%, +0.25% Max live registers: 421668 -> 421671 (+0.00%) Tiger Lake and Ice Lake had similar results. (Tiger Lake shown) Totals: Instrs: 149995644 -> 149977326 (-0.01%); split: -0.01%, +0.00% Cycle count: 15567293770 -> 15566524840 (-0.00%); split: -0.02%, +0.01% Spill count: 61241 -> 61238 (-0.00%) Fill count: 107304 -> 107301 (-0.00%) Max live registers: 31993109 -> 31993112 (+0.00%) Totals from 17813 (2.83% of 629912) affected shaders: Instrs: 3738236 -> 3719918 (-0.49%); split: -0.49%, +0.00% Cycle count: 4251157049 -> 4250388119 (-0.02%); split: -0.06%, +0.04% Spill count: 28268 -> 28265 (-0.01%) Fill count: 50377 -> 50374 (-0.01%) Max live registers: 470648 -> 470651 (+0.00%) Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30251>
This commit is contained in:
@@ -1001,6 +1001,29 @@ fs_inst::has_sampler_residency() const
|
||||
}
|
||||
}
|
||||
|
||||
/* \sa inst_is_raw_move in brw_eu_validate. */
|
||||
bool
|
||||
fs_inst::is_raw_move() const
|
||||
{
|
||||
if (opcode != BRW_OPCODE_MOV)
|
||||
return false;
|
||||
|
||||
if (src[0].file == IMM) {
|
||||
if (brw_type_is_vector_imm(src[0].type))
|
||||
return false;
|
||||
} else if (src[0].negate || src[0].abs) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (saturate)
|
||||
return false;
|
||||
|
||||
return src[0].type == dst.type ||
|
||||
(brw_type_is_int(src[0].type) &&
|
||||
brw_type_is_int(dst.type) &&
|
||||
brw_type_size_bits(src[0].type) == brw_type_size_bits(dst.type));
|
||||
}
|
||||
|
||||
/* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.
|
||||
* This brings in those uniform definitions
|
||||
*/
|
||||
|
@@ -1274,8 +1274,12 @@ can_propagate_from(fs_inst *inst)
|
||||
inst->src[0].file == IMM ||
|
||||
(inst->src[0].file == FIXED_GRF &&
|
||||
inst->src[0].is_contiguous())) &&
|
||||
inst->src[0].type == inst->dst.type &&
|
||||
!inst->saturate &&
|
||||
/* is_raw_move also rejects source modifiers, but copy propagation
|
||||
* can handle that if the types are the same.
|
||||
*/
|
||||
((inst->src[0].type == inst->dst.type &&
|
||||
!inst->saturate) ||
|
||||
inst->is_raw_move()) &&
|
||||
/* Subset of !is_partial_write() conditions. */
|
||||
!inst->predicate && inst->dst.is_contiguous()) ||
|
||||
is_identity_payload(FIXED_GRF, inst);
|
||||
@@ -1752,7 +1756,11 @@ find_value_for_offset(fs_inst *def, const brw_reg &src, unsigned src_size)
|
||||
|
||||
switch (def->opcode) {
|
||||
case BRW_OPCODE_MOV:
|
||||
if (def->dst.type == def->src[0].type && def->src[0].stride <= 1) {
|
||||
/* is_raw_move also rejects source modifiers, but copy propagation
|
||||
* can handle that if the tyeps are the same.
|
||||
*/
|
||||
if ((def->dst.type == def->src[0].type || def->is_raw_move()) &&
|
||||
def->src[0].stride <= 1) {
|
||||
val = def->src[0];
|
||||
|
||||
unsigned rel_offset = src.offset - def->dst.offset;
|
||||
|
@@ -70,6 +70,7 @@ public:
|
||||
bool is_control_flow_end() const;
|
||||
bool is_control_flow() const;
|
||||
bool is_commutative() const;
|
||||
bool is_raw_move() const;
|
||||
bool can_do_saturate() const;
|
||||
bool reads_accumulator_implicitly() const;
|
||||
bool writes_accumulator_implicitly(const struct intel_device_info *devinfo) const;
|
||||
|
@@ -228,3 +228,89 @@ TEST_F(copy_propagation_test, maxmax_sat_imm)
|
||||
v->cfg = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(copy_propagation_test, mixed_integer_sign)
|
||||
{
|
||||
brw_reg vgrf0 = bld.vgrf(BRW_TYPE_UD);
|
||||
brw_reg vgrf1 = bld.vgrf(BRW_TYPE_D);
|
||||
brw_reg vgrf2 = bld.vgrf(BRW_TYPE_UD);
|
||||
brw_reg vgrf3 = bld.vgrf(BRW_TYPE_UD);
|
||||
brw_reg vgrf4 = bld.vgrf(BRW_TYPE_UD);
|
||||
|
||||
bld.MOV(vgrf1, vgrf0);
|
||||
bld.BFE(vgrf2, vgrf3, vgrf4, retype(vgrf1, BRW_TYPE_UD));
|
||||
|
||||
/* = Before =
|
||||
*
|
||||
* 0: mov(8) vgrf1:D vgrf0:UD
|
||||
* 1: bfe(8) vgrf2:UD vgrf3:UD vgrf4:UD vgrf1:UD
|
||||
*
|
||||
* = After =
|
||||
* 0: mov(8) vgrf1:D vgrf0:UD
|
||||
* 1: bfe(8) vgrf2:UD vgrf3:UD vgrf4:UD vgrf0:UD
|
||||
*/
|
||||
|
||||
brw_calculate_cfg(*v);
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
EXPECT_TRUE(copy_propagation(v));
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
fs_inst *mov = instruction(block0, 0);
|
||||
EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode);
|
||||
EXPECT_TRUE(mov->dst.equals(vgrf1));
|
||||
EXPECT_TRUE(mov->src[0].equals(vgrf0));
|
||||
|
||||
fs_inst *bfe = instruction(block0, 1);
|
||||
EXPECT_EQ(BRW_OPCODE_BFE, bfe->opcode);
|
||||
EXPECT_TRUE(bfe->dst.equals(vgrf2));
|
||||
EXPECT_TRUE(bfe->src[0].equals(vgrf3));
|
||||
EXPECT_TRUE(bfe->src[1].equals(vgrf4));
|
||||
EXPECT_TRUE(bfe->src[2].equals(vgrf0));
|
||||
}
|
||||
|
||||
TEST_F(copy_propagation_test, mixed_integer_sign_with_vector_imm)
|
||||
{
|
||||
brw_reg vgrf0 = bld.vgrf(BRW_TYPE_W);
|
||||
brw_reg vgrf1 = bld.vgrf(BRW_TYPE_UD);
|
||||
brw_reg vgrf2 = bld.vgrf(BRW_TYPE_UD);
|
||||
|
||||
bld.MOV(vgrf0, brw_imm_uv(0xffff));
|
||||
bld.ADD(vgrf1, vgrf2, retype(vgrf0, BRW_TYPE_UW));
|
||||
|
||||
/* = Before =
|
||||
*
|
||||
* 0: mov(8) vgrf0:W ...:UV
|
||||
* 1: add(8) vgrf1:UD vgrf2:UD vgrf0:UW
|
||||
*
|
||||
* = After =
|
||||
* No change
|
||||
*/
|
||||
|
||||
brw_calculate_cfg(*v);
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
|
||||
const brw_reg src1 = instruction(block0, 1)->src[1];
|
||||
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
EXPECT_FALSE(copy_propagation(v));
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
fs_inst *mov = instruction(block0, 0);
|
||||
EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode);
|
||||
EXPECT_TRUE(mov->dst.equals(vgrf0));
|
||||
EXPECT_TRUE(mov->src[0].file == IMM);
|
||||
|
||||
fs_inst *add = instruction(block0, 1);
|
||||
EXPECT_EQ(BRW_OPCODE_ADD, add->opcode);
|
||||
EXPECT_TRUE(add->dst.equals(vgrf1));
|
||||
EXPECT_TRUE(add->src[0].equals(vgrf2));
|
||||
EXPECT_TRUE(add->src[1].equals(src1));
|
||||
}
|
||||
|
Reference in New Issue
Block a user