diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 93acf03009a..167db6ba8e2 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -691,6 +691,12 @@ ir3_start_block(struct ir3 *ir) return list_first_entry(&ir->block_list, struct ir3_block, node); } +static inline struct ir3_block * +ir3_end_block(struct ir3 *ir) +{ + return list_last_entry(&ir->block_list, struct ir3_block, node); +} + static inline struct ir3_block * ir3_after_preamble(struct ir3 *ir) { @@ -1074,6 +1080,53 @@ is_input(struct ir3_instruction *instr) } } +/* Whether non-helper invocations can read the value of helper invocations. We + * cannot insert (eq) before these instructions. + */ +static inline bool +uses_helpers(struct ir3_instruction *instr) +{ + switch (instr->opc) { + /* These require helper invocations to be present */ + case OPC_SAM: + case OPC_SAMB: + case OPC_GETLOD: + case OPC_DSX: + case OPC_DSY: + case OPC_DSXPP_1: + case OPC_DSYPP_1: + case OPC_DSXPP_MACRO: + case OPC_DSYPP_MACRO: + case OPC_QUAD_SHUFFLE_BRCST: + case OPC_QUAD_SHUFFLE_HORIZ: + case OPC_QUAD_SHUFFLE_VERT: + case OPC_QUAD_SHUFFLE_DIAG: + case OPC_META_TEX_PREFETCH: + return true; + + /* Subgroup operations don't require helper invocations to be present, but + * will use helper invocations if they are present. + */ + case OPC_BALLOT_MACRO: + case OPC_ANY_MACRO: + case OPC_ALL_MACRO: + case OPC_ELECT_MACRO: + case OPC_READ_FIRST_MACRO: + case OPC_READ_COND_MACRO: + case OPC_MOVMSK: + case OPC_BRCST_ACTIVE: + return true; + + /* Catch lowered READ_FIRST/READ_COND. */ + case OPC_MOV: + return (instr->dsts[0]->flags & IR3_REG_SHARED) && + !(instr->srcs[0]->flags & IR3_REG_SHARED); + + default: + return false; + } +} + static inline bool is_bool(struct ir3_instruction *instr) { @@ -1704,6 +1757,9 @@ __ssa_srcp_n(struct ir3_instruction *instr, unsigned n) /* iterators for instructions: */ #define foreach_instr(__instr, __list) \ list_for_each_entry (struct ir3_instruction, __instr, __list, node) +#define foreach_instr_from(__instr, __start, __list) \ + list_for_each_entry_from(struct ir3_instruction, __instr, &(__start)->node, \ + __list, node) #define foreach_instr_rev(__instr, __list) \ list_for_each_entry_rev (struct ir3_instruction, __instr, __list, node) #define foreach_instr_safe(__instr, __list) \ diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index a344a009ea1..0626113d7a0 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -908,6 +908,215 @@ nop_sched(struct ir3 *ir, struct ir3_shader_variant *so) } } +struct ir3_helper_block_data { + /* Whether helper invocations may be used on any path starting at the + * beginning of the block. + */ + bool uses_helpers_beginning; + + /* Whether helper invocations may be used by the end of the block. Branch + * instructions are considered to be "between" blocks, because (eq) has to be + * inserted after them in the successor blocks, so branch instructions using + * helpers will result in uses_helpers_end = true for their block. + */ + bool uses_helpers_end; +}; + +/* Insert (eq) after the last instruction using the results of helper + * invocations. Use a backwards dataflow analysis to determine at which points + * in the program helper invocations are definitely never used, and then insert + * (eq) at the point where we cross from a point where they may be used to a + * point where they are never used. + */ +static void +helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir, + struct ir3_shader_variant *so) +{ + bool non_prefetch_helpers = false; + + foreach_block (block, &ir->block_list) { + struct ir3_helper_block_data *bd = + rzalloc(ctx, struct ir3_helper_block_data); + foreach_instr (instr, &block->instr_list) { + if (uses_helpers(instr)) { + bd->uses_helpers_beginning = true; + if (instr->opc != OPC_META_TEX_PREFETCH) { + non_prefetch_helpers = true; + break; + } + } + + if (instr->opc == OPC_SHPE) { + /* (eq) is not allowed in preambles, mark the whole preamble as + * requiring helpers to avoid putting it there. + */ + bd->uses_helpers_beginning = true; + bd->uses_helpers_end = true; + } + } + + if (block->brtype == IR3_BRANCH_ALL || + block->brtype == IR3_BRANCH_ANY || + block->brtype == IR3_BRANCH_GETONE) { + bd->uses_helpers_end = true; + } + + block->data = bd; + } + + /* If only prefetches use helpers then we can disable them in the shader via + * a register setting. + */ + if (!non_prefetch_helpers) { + so->prefetch_end_of_quad = true; + return; + } + + bool progress; + do { + progress = false; + foreach_block_rev (block, &ir->block_list) { + struct ir3_helper_block_data *bd = block->data; + + if (!bd->uses_helpers_beginning) + continue; + + for (unsigned i = 0; i < block->predecessors_count; i++) { + struct ir3_block *pred = block->predecessors[i]; + struct ir3_helper_block_data *pred_bd = pred->data; + if (!pred_bd->uses_helpers_end) { + pred_bd->uses_helpers_end = true; + } + if (!pred_bd->uses_helpers_beginning) { + pred_bd->uses_helpers_beginning = true; + progress = true; + } + } + } + } while (progress); + + /* Now, we need to determine the points where helper invocations become + * unused. + */ + foreach_block (block, &ir->block_list) { + struct ir3_helper_block_data *bd = block->data; + if (bd->uses_helpers_end) + continue; + + /* We need to check the predecessors because of situations with critical + * edges like this that can occur after optimizing jumps: + * + * br p0.x, #endif + * ... + * sam ... + * ... + * endif: + * ... + * end + * + * The endif block will have uses_helpers_beginning = false and + * uses_helpers_end = false, but because we jump to there from the + * beginning of the if where uses_helpers_end = true, we still want to + * add an (eq) at the beginning of the block: + * + * br p0.x, #endif + * ... + * sam ... + * (eq)nop + * ... + * endif: + * (eq)nop + * ... + * end + * + * This an extra nop in the case where the branch isn't taken, but that's + * probably preferable to adding an extra jump instruction which is what + * would happen if we ran this pass before optimizing jumps: + * + * br p0.x, #else + * ... + * sam ... + * (eq)nop + * ... + * jump #endif + * else: + * (eq)nop + * endif: + * ... + * end + * + * We also need this to make sure we insert (eq) after branches which use + * helper invocations. + */ + bool pred_uses_helpers = bd->uses_helpers_beginning; + for (unsigned i = 0; i < block->predecessors_count; i++) { + struct ir3_block *pred = block->predecessors[i]; + struct ir3_helper_block_data *pred_bd = pred->data; + if (pred_bd->uses_helpers_end) { + pred_uses_helpers = true; + break; + } + } + + if (!pred_uses_helpers) + continue; + + /* The last use of helpers is somewhere between the beginning and the + * end. first_instr will be the first instruction where helpers are no + * longer required, or NULL if helpers are not required just at the end. + */ + struct ir3_instruction *first_instr = NULL; + foreach_instr_rev (instr, &block->instr_list) { + /* Skip prefetches because they actually execute before the block + * starts and at this stage they aren't guaranteed to be at the start + * of the block. + */ + if (uses_helpers(instr) && instr->opc != OPC_META_TEX_PREFETCH) + break; + first_instr = instr; + } + + bool killed = false; + bool expensive_instruction_in_block = false; + if (first_instr) { + foreach_instr_from (instr, first_instr, &block->instr_list) { + /* If there's already a nop, we don't have to worry about whether to + * insert one. + */ + if (instr->opc == OPC_NOP) { + instr->flags |= IR3_INSTR_EQ; + killed = true; + break; + } + + /* ALU and SFU instructions probably aren't going to benefit much + * from killing helper invocations, because they complete at least + * an entire quad in a cycle and don't access any quad-divergent + * memory, so delay emitting (eq) in the hopes that we find a nop + * afterwards. + */ + if (is_alu(instr) || is_sfu(instr)) + continue; + + expensive_instruction_in_block = true; + break; + } + } + + /* If this block isn't the last block before the end instruction, assume + * that there may be expensive instructions in later blocks so it's worth + * it to insert a nop. + */ + if (!killed && (expensive_instruction_in_block || + block->successors[0] != ir3_end_block(ir))) { + struct ir3_instruction *nop = ir3_NOP(block); + nop->flags |= IR3_INSTR_EQ; + if (first_instr) + ir3_instr_move_before(nop, first_instr); + } + } +} + bool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) { @@ -976,6 +1185,11 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) while (opt_jump(ir)) ; + /* TODO: does (eq) exist before a6xx? */ + if (so->type == MESA_SHADER_FRAGMENT && so->need_pixlod && + so->compiler->gen >= 6) + helper_sched(ctx, ir, so); + ir3_count_instructions(ir); resolve_jumps(ir);