From 086ed1e54b86ba05b57d0a93ba8392e31c6a0311 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 1 Aug 2024 11:22:37 +0200 Subject: [PATCH] broadcom/compiler: emit instructions producing flags earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We usually emit flags right before consuming them but this is suboptimal from the point of view of register pressure: if an instruction is only used to generate flags then waiting to emit it right before reading the flags extends the liveness of the sources used to generate the flags for no gain. This pass will check for such instructions and try to move them as early as possible. Shader-db results below show this is effective to reduce register pressure, allowing a few shaders to increase thread counts and/or reduce spilling: total instructions in shared programs: 11057173 -> 11057076 (<.01%) instructions in affected programs: 1955543 -> 1955446 (<.01%) helped: 4214 HURT: 3905 Inconclusive result (value mean confidence interval includes 0). total threads in shared programs: 425096 -> 425170 (0.02%) threads in affected programs: 74 -> 148 (100.00%) helped: 37 HURT: 0 Threads are helped. total uniforms in shared programs: 3846275 -> 3845674 (-0.02%) uniforms in affected programs: 23574 -> 22973 (-2.55%) helped: 217 HURT: 30 Uniforms are helped. total max-temps in shared programs: 2222910 -> 2220488 (-0.11%) max-temps in affected programs: 61904 -> 59482 (-3.91%) helped: 2145 HURT: 113 Max-temps are helped. total spills in shared programs: 4294 -> 4280 (-0.33%) spills in affected programs: 148 -> 134 (-9.46%) helped: 8 HURT: 0 total fills in shared programs: 6497 -> 6468 (-0.45%) fills in affected programs: 291 -> 262 (-9.97%) helped: 8 HURT: 0 total sfu-stalls in shared programs: 14344 -> 14611 (1.86%) sfu-stalls in affected programs: 1308 -> 1575 (20.41%) helped: 217 HURT: 335 Inconclusive result (%-change mean confidence interval includes 0). total inst-and-stalls in shared programs: 11071517 -> 11071687 (<.01%) inst-and-stalls in affected programs: 1946767 -> 1946937 (<.01%) helped: 4191 HURT: 3909 Inconclusive result (value mean confidence interval includes 0). total nops in shared programs: 270628 -> 269829 (-0.30%) nops in affected programs: 22032 -> 21233 (-3.63%) helped: 1213 HURT: 571 Inconclusive result (%-change mean confidence interval includes 0). Reviewed-by: Alejandro PiƱeiro Part-of: --- src/broadcom/compiler/nir_to_vir.c | 66 ++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index f20e498cbdf..9384888f290 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -4621,6 +4621,71 @@ ntq_emit_impl(struct v3d_compile *c, nir_function_impl *impl) ntq_emit_cf_list(c, &impl->body); } +static bool +vir_inst_reads_reg(struct qinst *inst, struct qreg r) +{ + for (int i = 0; i < vir_get_nsrc(inst); i++) { + if (inst->src[i].file == r.file && inst->src[i].index == r.index) + return true; + } + return false; +} + +static void +sched_flags_in_block(struct v3d_compile *c, struct qblock *block) +{ + struct qinst *flags_inst = NULL; + list_for_each_entry_safe_rev(struct qinst, inst, &block->instructions, link) { + /* Check for cases that would prevent us from moving a flags + * instruction any earlier than this instruction: + * + * - The flags instruction reads the result of this instr. + * - The instruction reads or writes flags. + */ + if (flags_inst) { + if (vir_inst_reads_reg(flags_inst, inst->dst) || + v3d_qpu_writes_flags(&inst->qpu) || + v3d_qpu_reads_flags(&inst->qpu)) { + list_move_to(&flags_inst->link, &inst->link); + flags_inst = NULL; + } + } + + /* Skip if this instruction does more than just write flags */ + if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU || + inst->dst.file != QFILE_NULL || + !v3d_qpu_writes_flags(&inst->qpu)) { + continue; + } + + /* If we already had a flags_inst we should've moved it after + * this instruction in the if (flags_inst) above. + */ + assert(!flags_inst); + flags_inst = inst; + } + + /* If we reached the beginning of the block and we still have a flags + * instruction selected we can put it at the top of the block. + */ + if (flags_inst) { + list_move_to(&flags_inst->link, &block->instructions); + flags_inst = NULL; + } +} + +/** + * The purpose of this pass is to emit instructions that are only concerned + * with producing flags as early as possible to hopefully reduce liveness + * of their source arguments. + */ +static void +sched_flags(struct v3d_compile *c) +{ + vir_for_each_block(block, c) + sched_flags_in_block(c, block); +} + static void nir_to_vir(struct v3d_compile *c) { @@ -4894,6 +4959,7 @@ v3d_nir_to_vir(struct v3d_compile *c) } vir_optimize(c); + sched_flags(c); vir_check_payload_w(c);