intel/brw: Relax is_partial_write check in cmod propagation
The is_partial_write check is too strict because it tests two separate things. It tests whether or not the instruction always writes a value (i.e., is it predicated), and it tests whether or not the instruction writes a complete register. This latter check is problematic as it perevents cmod propagation in SIMD1, and it prevents cmod propagation in SIMD8 when the destination size is 16 bits. This check is unnecessary. Cmod propagation already checks that the region written and region read overlap. It also already checks that the execution sizes of the instructions match. Further restriction based on the specific parts of the register written only generates false negatives. v2: Relax all of the calls to is_partial_write. Suggested by Caio. No shader-db changes on any Intel platform. fossil-db: Meteor Lake Totals: Instrs: 151505520 -> 151502923 (-0.00%); split: -0.00%, +0.00% Cycle count: 17201385104 -> 17194901423 (-0.04%); split: -0.06%, +0.02% Spill count: 80827 -> 80837 (+0.01%) Fill count: 152693 -> 152692 (-0.00%); split: -0.01%, +0.01% Totals from 346 (0.05% of 630198) affected shaders: Instrs: 1257205 -> 1254608 (-0.21%); split: -0.21%, +0.00% Cycle count: 5532845647 -> 5526361966 (-0.12%); split: -0.18%, +0.06% Spill count: 32903 -> 32913 (+0.03%) Fill count: 64338 -> 64337 (-0.00%); split: -0.03%, +0.03% DG2 Totals: Instrs: 151531440 -> 151528055 (-0.00%); split: -0.00%, +0.00% Cycle count: 17200238927 -> 17197996676 (-0.01%); split: -0.03%, +0.02% Spill count: 81003 -> 80971 (-0.04%); split: -0.04%, +0.00% Fill count: 152975 -> 152912 (-0.04%); split: -0.05%, +0.01% Totals from 346 (0.05% of 630198) affected shaders: Instrs: 1260363 -> 1256978 (-0.27%); split: -0.27%, +0.00% Cycle count: 5532019670 -> 5529777419 (-0.04%); split: -0.09%, +0.05% Spill count: 33046 -> 33014 (-0.10%); split: -0.11%, +0.01% Fill count: 64581 -> 64518 (-0.10%); split: -0.13%, +0.03% Tiger Lake and Ice Lake had similar results. (Tiger Lake shown) Totals: Instrs: 149972324 -> 149972289 (-0.00%) Cycle count: 15566495293 -> 15565151171 (-0.01%); split: -0.01%, +0.00% Totals from 16 (0.00% of 629912) affected shaders: Instrs: 351194 -> 351159 (-0.01%) Cycle count: 3922227030 -> 3920882908 (-0.03%); split: -0.04%, +0.00% Skylake Totals: Instrs: 140787999 -> 140787983 (-0.00%); split: -0.00%, +0.00% Cycle count: 14665614947 -> 14665515855 (-0.00%); split: -0.00%, +0.00% Spill count: 58500 -> 58501 (+0.00%) Fill count: 102097 -> 102100 (+0.00%) Totals from 16 (0.00% of 625685) affected shaders: Instrs: 343560 -> 343544 (-0.00%); split: -0.01%, +0.01% Cycle count: 3354997898 -> 3354898806 (-0.00%); split: -0.01%, +0.01% Spill count: 16864 -> 16865 (+0.01%) Fill count: 27479 -> 27482 (+0.01%) Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30251>
This commit is contained in:
@@ -59,7 +59,8 @@ cmod_propagate_cmp_to_add(const intel_device_info *devinfo, bblock_t *block,
|
||||
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
||||
if (scan_inst->opcode == BRW_OPCODE_ADD &&
|
||||
!scan_inst->is_partial_write() &&
|
||||
!scan_inst->predicate &&
|
||||
scan_inst->dst.is_contiguous() &&
|
||||
scan_inst->exec_size == inst->exec_size) {
|
||||
bool negate;
|
||||
|
||||
@@ -184,7 +185,8 @@ cmod_propagate_not(const intel_device_info *devinfo, bblock_t *block,
|
||||
scan_inst->opcode != BRW_OPCODE_AND)
|
||||
break;
|
||||
|
||||
if (scan_inst->is_partial_write() ||
|
||||
if (scan_inst->predicate ||
|
||||
!scan_inst->dst.is_contiguous() ||
|
||||
scan_inst->dst.offset != inst->src[0].offset ||
|
||||
scan_inst->exec_size != inst->exec_size)
|
||||
break;
|
||||
@@ -298,7 +300,8 @@ opt_cmod_propagation_local(const intel_device_info *devinfo, bblock_t *block)
|
||||
scan_inst->flags_written(devinfo) != flags_written)
|
||||
break;
|
||||
|
||||
if (scan_inst->is_partial_write() ||
|
||||
if (scan_inst->predicate ||
|
||||
!scan_inst->dst.is_contiguous() ||
|
||||
scan_inst->dst.offset != inst->src[0].offset ||
|
||||
scan_inst->exec_size != inst->exec_size)
|
||||
break;
|
||||
|
@@ -3136,3 +3136,33 @@ TEST_F(cmod_propagation_test, prop_across_sel)
|
||||
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, Boolean_size_conversion)
|
||||
{
|
||||
brw_reg dest1 = bld.vgrf(BRW_TYPE_W);
|
||||
brw_reg src0 = bld.vgrf(BRW_TYPE_W);
|
||||
brw_reg zero(brw_imm_w(0));
|
||||
|
||||
bld.CMP(dest1, src0, zero, BRW_CONDITIONAL_NZ);
|
||||
set_condmod(BRW_CONDITIONAL_NZ, bld.MOV(bld.null_reg_d(), dest1));
|
||||
|
||||
/* = Before =
|
||||
* 0: cmp.nz.f0 dest1:W src0:W 0W
|
||||
* 1: mov.nz.f0 null:D dest1:W
|
||||
*
|
||||
* = After =
|
||||
* 0: cmp.nz.f0 dest1:W src0:W 0W
|
||||
*/
|
||||
|
||||
brw_calculate_cfg(*v);
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
EXPECT_TRUE(cmod_propagation(v));
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(0, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
|
||||
EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod);
|
||||
}
|
||||
|
Reference in New Issue
Block a user