From 96cde9cc01dc0cfa4d4edf70d47df118ca57e5c0 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 16 Nov 2022 13:12:50 -0800 Subject: [PATCH] intel/fs: Emit better code for bfi(..., 0) DG2, Tiger Lake, Ice Lake, and Skylake had similar results (Ice Lake shown) total instructions in shared programs: 20570141 -> 20570063 (<.01%) instructions in affected programs: 30679 -> 30601 (-0.25%) helped: 77 / HURT: 0 total cycles in shared programs: 902113977 -> 902118723 (<.01%) cycles in affected programs: 3255958 -> 3260704 (0.15%) helped: 60 / HURT: 19 Broadwell total instructions in shared programs: 18524633 -> 18524547 (<.01%) instructions in affected programs: 34095 -> 34009 (-0.25%) helped: 75 / HURT: 2 total cycles in shared programs: 949532394 -> 949543761 (<.01%) cycles in affected programs: 3419107 -> 3430474 (0.33%) helped: 57 / HURT: 24 total spills in shared programs: 22484 -> 22484 (0.00%) spills in affected programs: 516 -> 516 (0.00%) helped: 2 / HURT: 2 total fills in shared programs: 29346 -> 29338 (-0.03%) fills in affected programs: 572 -> 564 (-1.40%) helped: 4 / HURT: 0 Haswell total instructions in shared programs: 17331356 -> 17331523 (<.01%) instructions in affected programs: 27920 -> 28087 (0.60%) helped: 41 / HURT: 4 total cycles in shared programs: 936603192 -> 936574664 (<.01%) cycles in affected programs: 3417695 -> 3389167 (-0.83%) helped: 28 / HURT: 21 total spills in shared programs: 19718 -> 19756 (0.19%) spills in affected programs: 436 -> 474 (8.72%) helped: 0 / HURT: 4 total fills in shared programs: 22547 -> 22607 (0.27%) fills in affected programs: 444 -> 504 (13.51%) helped: 0 / HURT: 4 Ivy Bridge total cycles in shared programs: 463451277 -> 463451273 (<.01%) cycles in affected programs: 95870 -> 95866 (<.01%) helped: 3 / HURT: 2 DG2, Tiger Lake, Ice Lake, and Skylake had similar results (Ice Lake shown) Totals: Instrs: 152825278 -> 152819969 (-0.00%); split: -0.00%, +0.00% Cycles: 15014075626 -> 15014628652 (+0.00%); split: -0.01%, +0.01% Subgroup size: 8528536 -> 8528560 (+0.00%) Send messages: 7711431 -> 7711464 (+0.00%) Spill count: 99907 -> 99509 (-0.40%); split: -0.40%, +0.00% Fill count: 202459 -> 201598 (-0.43%); split: -0.43%, +0.00% Scratch Memory Size: 4376576 -> 4371456 (-0.12%) Totals from 2915 (0.44% of 662497) affected shaders: Instrs: 2288842 -> 2283533 (-0.23%); split: -0.24%, +0.01% Cycles: 471633295 -> 472186321 (+0.12%); split: -0.27%, +0.39% Subgroup size: 27488 -> 27512 (+0.09%) Send messages: 151344 -> 151377 (+0.02%) Spill count: 48091 -> 47693 (-0.83%); split: -0.83%, +0.00% Fill count: 59053 -> 58192 (-1.46%); split: -1.46%, +0.00% Scratch Memory Size: 1827840 -> 1822720 (-0.28%) Reviewed-by: Matt Turner Part-of: --- src/intel/compiler/brw_fs_nir.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 073a1ad391c..0c48d10daf6 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -936,6 +936,12 @@ can_fuse_fmul_fsign(nir_alu_instr *instr, unsigned fsign_src) is_used_once(fsign_instr); } +static bool +is_const_zero(const nir_src &src) +{ + return nir_src_is_const(src) && nir_src_as_int(src) == 0; +} + void fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, bool need_dest) @@ -1733,7 +1739,16 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, break; case nir_op_bfi: assert(nir_dest_bit_size(instr->dest.dest) < 64); - bld.BFI2(result, op[0], op[1], op[2]); + + /* bfi is ((...) | (~src0 & src2)). The second part is zero when src2 is + * either 0 or src0. Replacing the 0 with another value can eliminate a + * temporary register. + */ + if (is_const_zero(instr->src[2].src)) + bld.BFI2(result, op[0], op[1], op[0]); + else + bld.BFI2(result, op[0], op[1], op[2]); + break; case nir_op_bitfield_insert: