intel/brw: Copy prop from raw integer moves with mismatched types

The specific pattern from the unit test was observed in ray tracing
trampoline shaders.

v2: Refactor the is_raw_move tests out to a utility function. Suggested
by Ken.

v3: Fix a regression caused by being too picky about source
modifiers. This was introduced somewhere between when I did initial
shader-db runs an v2.

v4: Fix typo in comment. Noticed by Caio.

shader-db:

All Intel platforms had similar results. (Meteor Lake shown)
total instructions in shared programs: 19734086 -> 19733997 (<.01%)
instructions in affected programs: 135388 -> 135299 (-0.07%)
helped: 76 / HURT: 2

total cycles in shared programs: 916290451 -> 916264968 (<.01%)
cycles in affected programs: 41046002 -> 41020519 (-0.06%)
helped: 32 / HURT: 29

fossil-db:

Meteor Lake, DG2, and Skylake had similar results. (Meteor Lake shown)
Totals:
Instrs: 151531355 -> 151513669 (-0.01%); split: -0.01%, +0.00%
Cycle count: 17209372399 -> 17208178205 (-0.01%); split: -0.01%, +0.00%
Max live registers: 32016490 -> 32016493 (+0.00%)

Totals from 17361 (2.75% of 630198) affected shaders:
Instrs: 2642048 -> 2624362 (-0.67%); split: -0.67%, +0.00%
Cycle count: 79803066 -> 78608872 (-1.50%); split: -1.75%, +0.25%
Max live registers: 421668 -> 421671 (+0.00%)

Tiger Lake and Ice Lake had similar results. (Tiger Lake shown)
Totals:
Instrs: 149995644 -> 149977326 (-0.01%); split: -0.01%, +0.00%
Cycle count: 15567293770 -> 15566524840 (-0.00%); split: -0.02%, +0.01%
Spill count: 61241 -> 61238 (-0.00%)
Fill count: 107304 -> 107301 (-0.00%)
Max live registers: 31993109 -> 31993112 (+0.00%)

Totals from 17813 (2.83% of 629912) affected shaders:
Instrs: 3738236 -> 3719918 (-0.49%); split: -0.49%, +0.00%
Cycle count: 4251157049 -> 4250388119 (-0.02%); split: -0.06%, +0.04%
Spill count: 28268 -> 28265 (-0.01%)
Fill count: 50377 -> 50374 (-0.01%)
Max live registers: 470648 -> 470651 (+0.00%)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30251>
This commit is contained in:
Ian Romanick
2024-07-16 16:04:38 -07:00
committed by Marge Bot
parent c160ed212e
commit 572e00dd66
4 changed files with 121 additions and 3 deletions

View File

@@ -1001,6 +1001,29 @@ fs_inst::has_sampler_residency() const
}
}
/* \sa inst_is_raw_move in brw_eu_validate. */
bool
fs_inst::is_raw_move() const
{
if (opcode != BRW_OPCODE_MOV)
return false;
if (src[0].file == IMM) {
if (brw_type_is_vector_imm(src[0].type))
return false;
} else if (src[0].negate || src[0].abs) {
return false;
}
if (saturate)
return false;
return src[0].type == dst.type ||
(brw_type_is_int(src[0].type) &&
brw_type_is_int(dst.type) &&
brw_type_size_bits(src[0].type) == brw_type_size_bits(dst.type));
}
/* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.
* This brings in those uniform definitions
*/

View File

@@ -1274,8 +1274,12 @@ can_propagate_from(fs_inst *inst)
inst->src[0].file == IMM ||
(inst->src[0].file == FIXED_GRF &&
inst->src[0].is_contiguous())) &&
inst->src[0].type == inst->dst.type &&
!inst->saturate &&
/* is_raw_move also rejects source modifiers, but copy propagation
* can handle that if the types are the same.
*/
((inst->src[0].type == inst->dst.type &&
!inst->saturate) ||
inst->is_raw_move()) &&
/* Subset of !is_partial_write() conditions. */
!inst->predicate && inst->dst.is_contiguous()) ||
is_identity_payload(FIXED_GRF, inst);
@@ -1752,7 +1756,11 @@ find_value_for_offset(fs_inst *def, const brw_reg &src, unsigned src_size)
switch (def->opcode) {
case BRW_OPCODE_MOV:
if (def->dst.type == def->src[0].type && def->src[0].stride <= 1) {
/* is_raw_move also rejects source modifiers, but copy propagation
* can handle that if the tyeps are the same.
*/
if ((def->dst.type == def->src[0].type || def->is_raw_move()) &&
def->src[0].stride <= 1) {
val = def->src[0];
unsigned rel_offset = src.offset - def->dst.offset;

View File

@@ -70,6 +70,7 @@ public:
bool is_control_flow_end() const;
bool is_control_flow() const;
bool is_commutative() const;
bool is_raw_move() const;
bool can_do_saturate() const;
bool reads_accumulator_implicitly() const;
bool writes_accumulator_implicitly(const struct intel_device_info *devinfo) const;

View File

@@ -228,3 +228,89 @@ TEST_F(copy_propagation_test, maxmax_sat_imm)
v->cfg = NULL;
}
}
TEST_F(copy_propagation_test, mixed_integer_sign)
{
brw_reg vgrf0 = bld.vgrf(BRW_TYPE_UD);
brw_reg vgrf1 = bld.vgrf(BRW_TYPE_D);
brw_reg vgrf2 = bld.vgrf(BRW_TYPE_UD);
brw_reg vgrf3 = bld.vgrf(BRW_TYPE_UD);
brw_reg vgrf4 = bld.vgrf(BRW_TYPE_UD);
bld.MOV(vgrf1, vgrf0);
bld.BFE(vgrf2, vgrf3, vgrf4, retype(vgrf1, BRW_TYPE_UD));
/* = Before =
*
* 0: mov(8) vgrf1:D vgrf0:UD
* 1: bfe(8) vgrf2:UD vgrf3:UD vgrf4:UD vgrf1:UD
*
* = After =
* 0: mov(8) vgrf1:D vgrf0:UD
* 1: bfe(8) vgrf2:UD vgrf3:UD vgrf4:UD vgrf0:UD
*/
brw_calculate_cfg(*v);
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_TRUE(copy_propagation(v));
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
fs_inst *mov = instruction(block0, 0);
EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode);
EXPECT_TRUE(mov->dst.equals(vgrf1));
EXPECT_TRUE(mov->src[0].equals(vgrf0));
fs_inst *bfe = instruction(block0, 1);
EXPECT_EQ(BRW_OPCODE_BFE, bfe->opcode);
EXPECT_TRUE(bfe->dst.equals(vgrf2));
EXPECT_TRUE(bfe->src[0].equals(vgrf3));
EXPECT_TRUE(bfe->src[1].equals(vgrf4));
EXPECT_TRUE(bfe->src[2].equals(vgrf0));
}
TEST_F(copy_propagation_test, mixed_integer_sign_with_vector_imm)
{
brw_reg vgrf0 = bld.vgrf(BRW_TYPE_W);
brw_reg vgrf1 = bld.vgrf(BRW_TYPE_UD);
brw_reg vgrf2 = bld.vgrf(BRW_TYPE_UD);
bld.MOV(vgrf0, brw_imm_uv(0xffff));
bld.ADD(vgrf1, vgrf2, retype(vgrf0, BRW_TYPE_UW));
/* = Before =
*
* 0: mov(8) vgrf0:W ...:UV
* 1: add(8) vgrf1:UD vgrf2:UD vgrf0:UW
*
* = After =
* No change
*/
brw_calculate_cfg(*v);
bblock_t *block0 = v->cfg->blocks[0];
const brw_reg src1 = instruction(block0, 1)->src[1];
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
EXPECT_FALSE(copy_propagation(v));
EXPECT_EQ(0, block0->start_ip);
EXPECT_EQ(1, block0->end_ip);
fs_inst *mov = instruction(block0, 0);
EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode);
EXPECT_TRUE(mov->dst.equals(vgrf0));
EXPECT_TRUE(mov->src[0].file == IMM);
fs_inst *add = instruction(block0, 1);
EXPECT_EQ(BRW_OPCODE_ADD, add->opcode);
EXPECT_TRUE(add->dst.equals(vgrf1));
EXPECT_TRUE(add->src[0].equals(vgrf2));
EXPECT_TRUE(add->src[1].equals(src1));
}