intel/fs: Emit better code for bfi(..., 0)
DG2, Tiger Lake, Ice Lake, and Skylake had similar results (Ice Lake shown) total instructions in shared programs: 20570141 -> 20570063 (<.01%) instructions in affected programs: 30679 -> 30601 (-0.25%) helped: 77 / HURT: 0 total cycles in shared programs: 902113977 -> 902118723 (<.01%) cycles in affected programs: 3255958 -> 3260704 (0.15%) helped: 60 / HURT: 19 Broadwell total instructions in shared programs: 18524633 -> 18524547 (<.01%) instructions in affected programs: 34095 -> 34009 (-0.25%) helped: 75 / HURT: 2 total cycles in shared programs: 949532394 -> 949543761 (<.01%) cycles in affected programs: 3419107 -> 3430474 (0.33%) helped: 57 / HURT: 24 total spills in shared programs: 22484 -> 22484 (0.00%) spills in affected programs: 516 -> 516 (0.00%) helped: 2 / HURT: 2 total fills in shared programs: 29346 -> 29338 (-0.03%) fills in affected programs: 572 -> 564 (-1.40%) helped: 4 / HURT: 0 Haswell total instructions in shared programs: 17331356 -> 17331523 (<.01%) instructions in affected programs: 27920 -> 28087 (0.60%) helped: 41 / HURT: 4 total cycles in shared programs: 936603192 -> 936574664 (<.01%) cycles in affected programs: 3417695 -> 3389167 (-0.83%) helped: 28 / HURT: 21 total spills in shared programs: 19718 -> 19756 (0.19%) spills in affected programs: 436 -> 474 (8.72%) helped: 0 / HURT: 4 total fills in shared programs: 22547 -> 22607 (0.27%) fills in affected programs: 444 -> 504 (13.51%) helped: 0 / HURT: 4 Ivy Bridge total cycles in shared programs: 463451277 -> 463451273 (<.01%) cycles in affected programs: 95870 -> 95866 (<.01%) helped: 3 / HURT: 2 DG2, Tiger Lake, Ice Lake, and Skylake had similar results (Ice Lake shown) Totals: Instrs: 152825278 -> 152819969 (-0.00%); split: -0.00%, +0.00% Cycles: 15014075626 -> 15014628652 (+0.00%); split: -0.01%, +0.01% Subgroup size: 8528536 -> 8528560 (+0.00%) Send messages: 7711431 -> 7711464 (+0.00%) Spill count: 99907 -> 99509 (-0.40%); split: -0.40%, +0.00% Fill count: 202459 -> 201598 (-0.43%); split: -0.43%, +0.00% Scratch Memory Size: 4376576 -> 4371456 (-0.12%) Totals from 2915 (0.44% of 662497) affected shaders: Instrs: 2288842 -> 2283533 (-0.23%); split: -0.24%, +0.01% Cycles: 471633295 -> 472186321 (+0.12%); split: -0.27%, +0.39% Subgroup size: 27488 -> 27512 (+0.09%) Send messages: 151344 -> 151377 (+0.02%) Spill count: 48091 -> 47693 (-0.83%); split: -0.83%, +0.00% Fill count: 59053 -> 58192 (-1.46%); split: -1.46%, +0.00% Scratch Memory Size: 1827840 -> 1822720 (-0.28%) Reviewed-by: Matt Turner <mattst88@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19968>
This commit is contained in:
@@ -936,6 +936,12 @@ can_fuse_fmul_fsign(nir_alu_instr *instr, unsigned fsign_src)
|
||||
is_used_once(fsign_instr);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_const_zero(const nir_src &src)
|
||||
{
|
||||
return nir_src_is_const(src) && nir_src_as_int(src) == 0;
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
|
||||
bool need_dest)
|
||||
@@ -1733,7 +1739,16 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
|
||||
break;
|
||||
case nir_op_bfi:
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
bld.BFI2(result, op[0], op[1], op[2]);
|
||||
|
||||
/* bfi is ((...) | (~src0 & src2)). The second part is zero when src2 is
|
||||
* either 0 or src0. Replacing the 0 with another value can eliminate a
|
||||
* temporary register.
|
||||
*/
|
||||
if (is_const_zero(instr->src[2].src))
|
||||
bld.BFI2(result, op[0], op[1], op[0]);
|
||||
else
|
||||
bld.BFI2(result, op[0], op[1], op[2]);
|
||||
|
||||
break;
|
||||
|
||||
case nir_op_bitfield_insert:
|
||||
|
Reference in New Issue
Block a user