diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index c4e774e4f76..6e5cc66b573 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -1182,7 +1182,8 @@ agx_emit_logical_end(agx_builder *b) agx_logical_end(b); } -/* NIR loops are treated as a pair of AGX loops: +/* + * NIR loops are treated as a pair of AGX loops: * * do { * do { @@ -1190,15 +1191,14 @@ agx_emit_logical_end(agx_builder *b) * } while (0); * } while (cond); * - * By manipulating the nesting counter (r0l), we may break out of nested loops, - * so under the model, both break and continue may be implemented as breaks, - * where break breaks out of the outer loop (2 layers) and continue breaks out - * of the inner loop (1 layer). + * By manipulating the nesting counter, we may break out of nested loops, so + * under the model, both break and continue may be implemented as breaks, where + * break breaks out of the outer loop (2 layers) and continue breaks out of the + * inner loop (1 layer). * * After manipulating the nesting counter directly, pop_exec #0 must be used to * flush the update to the execution mask. */ - static void agx_emit_jump(agx_builder *b, nir_jump_instr *instr) { @@ -1217,8 +1217,7 @@ agx_emit_jump(agx_builder *b, nir_jump_instr *instr) } /* Update the counter and flush */ - agx_index r0l = agx_register(0, false); - agx_mov_to(b, r0l, agx_immediate(nestings)); + agx_nest(b, agx_immediate(nestings)); /* Jumps must come at the end of a block */ agx_emit_logical_end(b); @@ -1440,8 +1439,8 @@ emit_loop(agx_context *ctx, nir_loop *nloop) ctx->loop_nesting = pushed_nesting; } -/* Before the first control flow structure, the nesting counter (r0l) needs to - * be zeroed for correct operation. This only happens at most once, since by +/* Before the first control flow structure, the nesting counter needs to be + * zeroed for correct operation. This only happens at most once, since by * definition this occurs at the end of the first block, which dominates the * rest of the program. */ @@ -1452,9 +1451,7 @@ emit_first_cf(agx_context *ctx) return; agx_builder _b = agx_init_builder(ctx, agx_after_block(ctx->current_block)); - agx_index r0l = agx_register(0, false); - - agx_mov_to(&_b, r0l, agx_immediate(0)); + agx_nest(&_b, agx_immediate(0)); ctx->any_cf = true; } diff --git a/src/asahi/compiler/agx_opcodes.py b/src/asahi/compiler/agx_opcodes.py index a0e13f5bd49..e8cf206dd27 100644 --- a/src/asahi/compiler/agx_opcodes.py +++ b/src/asahi/compiler/agx_opcodes.py @@ -285,3 +285,6 @@ op("unit_test", _, dests = 0, srcs = 1, can_eliminate = False) # Like mov, but takes a register and can only appear at the start. Gauranteed # to be coalesced during RA, rather than lowered to a real move. op("preload", _, srcs = 1) + +# Set the nesting counter. Lowers to mov r0l, x after RA. +op("nest", _, dests = 0, srcs = 1, can_eliminate = False) diff --git a/src/asahi/compiler/agx_register_allocate.c b/src/asahi/compiler/agx_register_allocate.c index 971d96f69ba..4211f71085d 100644 --- a/src/asahi/compiler/agx_register_allocate.c +++ b/src/asahi/compiler/agx_register_allocate.c @@ -101,7 +101,7 @@ agx_assign_regs(BITSET_WORD *used_regs, unsigned count, unsigned align, unsigned /** Assign registers to SSA values in a block. */ static void -agx_ra_assign_local(agx_block *block, uint8_t *ssa_to_reg, uint8_t *ncomps) +agx_ra_assign_local(agx_context *ctx, agx_block *block, uint8_t *ssa_to_reg, uint8_t *ncomps) { BITSET_DECLARE(used_regs, AGX_NUM_REGS) = { 0 }; @@ -110,7 +110,11 @@ agx_ra_assign_local(agx_block *block, uint8_t *ssa_to_reg, uint8_t *ncomps) used_regs[i] |= (*pred)->regs_out[i]; } - BITSET_SET(used_regs, 0); // control flow writes r0l + /* Force the nesting counter r0l live throughout shaders using control flow. + * This could be optimized (sync with agx_calc_register_demand). + */ + if (ctx->any_cf) + BITSET_SET(used_regs, 0); agx_foreach_instr_in_block(block, I) { /* Optimization: if a split contains the last use of a vector, the split @@ -294,7 +298,7 @@ agx_ra(agx_context *ctx) * to a NIR invariant, so we do not need special handling for this. */ agx_foreach_block(ctx, block) { - agx_ra_assign_local(block, ssa_to_reg, ncomps); + agx_ra_assign_local(ctx, block, ssa_to_reg, ncomps); } agx_foreach_instr_global(ctx, ins) { @@ -394,6 +398,14 @@ agx_ra(agx_context *ctx) } break; + /* Writes to the nesting counter lowered to the real register */ + case AGX_OPCODE_NEST: { + agx_builder b = agx_init_builder(ctx, agx_before_instr(I)); + agx_mov_to(&b, agx_register(0, AGX_SIZE_16), I->src[0]); + agx_remove_instruction(I); + break; + } + default: break; }