diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index fb096bdee26..0418507866d 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -6472,6 +6472,7 @@ typedef struct nir_opt_access_options { bool nir_opt_access(nir_shader *shader, const nir_opt_access_options *options); bool nir_opt_algebraic(nir_shader *shader); bool nir_opt_algebraic_before_ffma(nir_shader *shader); +bool nir_opt_algebraic_before_lower_int64(nir_shader *shader); bool nir_opt_algebraic_late(nir_shader *shader); bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader); bool nir_opt_constant_folding(nir_shader *shader); diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 69edcada0f9..3d3be0702d1 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -3447,9 +3447,48 @@ distribute_src_mods = [ (('fabs', ('fsign(is_used_once)', a)), ('fsign', ('fabs', a))), ] +before_lower_int64_optimizations = [ + # The i2i64(a) implies that 'a' has at most 32-bits of data. + (('ishl', ('i2i64', a), b), + # Effective shift count of zero, just return 'a'. + ('bcsel', ('ieq', ('iand', b, 63), 0), ('i2i64', a), + ('bcsel', ('ilt', ('iand', b, 63), 32), + # Shifting less than 32 bits, so both 32-bit halves will have + # some data. These (and the else case) shift counts are of 32-bit + # values, so the shift counts are implicitly moduolo 32. + ('pack_64_2x32_split', ('ishl', ('i2i32', a), b), ('ishr', ('i2i32', a), ('iadd', ('ineg', b), 32) )), + # Shifting 32 bits or more, so lower 32 bits must be zero. + ('pack_64_2x32_split', 0 , ('ishl', ('i2i32', a), ('iabs', ('iadd', ('ineg', b), 32)))))), + '(options->lower_int64_options & nir_lower_shift64) != 0'), + + (('ishl', ('u2u64', a), b), + ('bcsel', ('ieq', ('iand', b, 63), 0), ('u2u64', a), + ('bcsel', ('ilt', ('iand', b, 63), 32), + ('pack_64_2x32_split', ('ishl', ('u2u32', a), b), ('ushr', ('u2u32', a), ('iadd', ('ineg', b), 32) )), + ('pack_64_2x32_split', 0 , ('ishl', ('u2u32', a), ('iabs', ('iadd', ('ineg', b), 32)))))), + '(options->lower_int64_options & nir_lower_shift64) != 0'), + + # If ineg64 is lowered, then the negation is not free. Try to eliminate + # some of the negations. + (('iadd@64', ('ineg', a), ('ineg(is_used_once)', b)), ('isub', ('ineg', a), b), '(options->lower_int64_options & nir_lower_ineg64) != 0'), + (('iadd@64', a, ('ineg', b)), ('isub', a, b), '(options->lower_int64_options & nir_lower_ineg64) != 0'), + (('isub@64', a, ('ineg', b)), ('iadd', a, b), '(options->lower_int64_options & nir_lower_ineg64) != 0'), + (('isub@64', ('ineg', a), ('ineg', b)), ('isub', b, a), '(options->lower_int64_options & nir_lower_ineg64) != 0'), + + (('imul@64', ('ineg', a), ('ineg', b)), ('imul', a, b)), + (('idiv@64', ('ineg', a), ('ineg', b)), ('idiv', a, b)), + + # If the hardware can do int64, the shift is the same cost as the add. It + # should be fine to do this transformation unconditionally. + (('iadd', ('i2i64', a), ('i2i64', a)), ('ishl', ('i2i64', a), 1)), + (('iadd', ('u2u64', a), ('u2u64', a)), ('ishl', ('u2u64', a), 1)), +] + print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()) print(nir_algebraic.AlgebraicPass("nir_opt_algebraic_before_ffma", before_ffma_optimizations).render()) +print(nir_algebraic.AlgebraicPass("nir_opt_algebraic_before_lower_int64", + before_lower_int64_optimizations).render()) print(nir_algebraic.AlgebraicPass("nir_opt_algebraic_late", late_optimizations).render()) print(nir_algebraic.AlgebraicPass("nir_opt_algebraic_distribute_src_mods", diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 8ef19301236..40ef84d5379 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -1678,6 +1678,12 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, brw_vectorize_lower_mem_access(nir, compiler, robust_flags); + /* Potentially perform this optimization pass twice because it can create + * additional opportunities for itself. + */ + if (OPT(nir_opt_algebraic_before_lower_int64)) + OPT(nir_opt_algebraic_before_lower_int64); + if (OPT(nir_lower_int64)) brw_nir_optimize(nir, devinfo); @@ -1762,6 +1768,8 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, if (OPT(nir_opt_uniform_atomics)) { OPT(nir_lower_subgroups, &subgroups_options); + OPT(nir_opt_algebraic_before_lower_int64); + if (OPT(nir_lower_int64)) brw_nir_optimize(nir, devinfo);