diff --git a/src/compiler/nir/nir_opt_rematerialize_compares.c b/src/compiler/nir/nir_opt_rematerialize_compares.c index 39a480c9f83..429dd15a14e 100644 --- a/src/compiler/nir/nir_opt_rematerialize_compares.c +++ b/src/compiler/nir/nir_opt_rematerialize_compares.c @@ -57,6 +57,36 @@ is_two_src_comparison(const nir_alu_instr *instr) } } +static inline bool +is_zero(const nir_alu_instr *instr, unsigned src, unsigned num_components, + const uint8_t *swizzle) +{ + /* only constant srcs: */ + if (!nir_src_is_const(instr->src[src].src)) + return false; + + for (unsigned i = 0; i < num_components; i++) { + nir_alu_type type = nir_op_infos[instr->op].input_types[src]; + switch (nir_alu_type_get_base_type(type)) { + case nir_type_int: + case nir_type_uint: { + if (nir_src_comp_as_int(instr->src[src].src, swizzle[i]) != 0) + return false; + break; + } + case nir_type_float: { + if (nir_src_comp_as_float(instr->src[src].src, swizzle[i]) != 0) + return false; + break; + } + default: + return false; + } + } + + return true; +} + static bool all_uses_are_bcsel(const nir_alu_instr *instr) { @@ -79,6 +109,28 @@ all_uses_are_bcsel(const nir_alu_instr *instr) return true; } +static bool +all_uses_are_compare_with_zero(const nir_alu_instr *instr) +{ + nir_foreach_use(use, &instr->def) { + if (use->parent_instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *const alu = nir_instr_as_alu(use->parent_instr); + if (!is_two_src_comparison(alu)) + return false; + + if (!is_zero(alu, 0, 1, alu->src[0].swizzle) && + !is_zero(alu, 1, 1, alu->src[1].swizzle)) + return false; + + if (!all_uses_are_bcsel(alu)) + return false; + } + + return true; +} + static bool nir_opt_rematerialize_compares_impl(nir_shader *shader, nir_function_impl *impl) { @@ -160,6 +212,106 @@ nir_opt_rematerialize_compares_impl(nir_shader *shader, nir_function_impl *impl) return progress; } +static bool +nir_opt_rematerialize_alu_impl(nir_shader *shader, nir_function_impl *impl) +{ + bool progress = false; + + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *const alu = nir_instr_as_alu(instr); + + /* This list only include ALU ops that are likely to be able to have + * cmod propagation on Intel GPUs. + */ + switch (alu->op) { + case nir_op_ineg: + case nir_op_iabs: + case nir_op_fneg: + case nir_op_fabs: + case nir_op_fadd: + case nir_op_iadd: + case nir_op_iadd_sat: + case nir_op_uadd_sat: + case nir_op_isub_sat: + case nir_op_usub_sat: + case nir_op_irhadd: + case nir_op_urhadd: + case nir_op_fmul: + case nir_op_inot: + case nir_op_iand: + case nir_op_ior: + case nir_op_ixor: + case nir_op_ffloor: + case nir_op_ffract: + case nir_op_uclz: + case nir_op_ishl: + case nir_op_ishr: + case nir_op_ushr: + case nir_op_urol: + case nir_op_uror: + break; /* ... from switch. */ + default: + continue; /* ... with loop. */ + } + + /* To help prevent increasing live ranges, require that one of the + * sources be a constant. + */ + if (nir_op_infos[alu->op].num_inputs == 2 && + !nir_src_is_const(alu->src[0].src) && + !nir_src_is_const(alu->src[1].src)) + continue; + + if (!all_uses_are_compare_with_zero(alu)) + continue; + + /* At this point it is known that the alu is only used by a + * comparison with zero that is used by nir_op_bcsel and possibly by + * if-statements (though the latter has not been explicitly checked). + * + * Iterate through each use of the ALU. For every use that is in a + * different block, emit a copy of the ALU. Care must be taken here. + * The original instruction must be duplicated only once in each + * block because CSE cannot be run after this pass. + */ + nir_foreach_use_safe(use, &alu->def) { + nir_instr *const use_instr = use->parent_instr; + + /* If the use is in the same block as the def, don't + * rematerialize. + */ + if (use_instr->block == alu->instr.block) + continue; + + nir_alu_instr *clone = nir_alu_instr_clone(shader, alu); + + nir_instr_insert_before(use_instr, &clone->instr); + + nir_alu_instr *const use_alu = nir_instr_as_alu(use_instr); + for (unsigned i = 0; i < nir_op_infos[use_alu->op].num_inputs; i++) { + if (use_alu->src[i].src.ssa == &alu->def) { + nir_src_rewrite(&use_alu->src[i].src, &clone->def); + progress = true; + } + } + } + } + } + + if (progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + } else { + nir_metadata_preserve(impl, nir_metadata_all); + } + + return progress; +} + bool nir_opt_rematerialize_compares(nir_shader *shader) { @@ -167,6 +319,8 @@ nir_opt_rematerialize_compares(nir_shader *shader) nir_foreach_function_impl(impl, shader) { progress = nir_opt_rematerialize_compares_impl(shader, impl) || progress; + + progress = nir_opt_rematerialize_alu_impl(shader, impl) || progress; } return progress;