nir/idiv_const: optimize imod/irem

fossil-db changes (Sienna Cichlid):
Totals from 223 (0.15% of 150170) affected shaders:
CodeSize: 384564 -> 370824 (-3.57%)
Instrs: 74518 -> 71961 (-3.43%)
Latency: 351620 -> 344640 (-1.99%)
InvThroughput: 80122 -> 74846 (-6.58%)
VClause: 919 -> 920 (+0.11%)
SClause: 2879 -> 2877 (-0.07%); split: -0.10%, +0.03%
Copies: 3099 -> 3103 (+0.13%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12039>
This commit is contained in:
Rhys Perry
2021-07-21 17:02:11 +01:00
committed by Marge Bot
parent 96168301f9
commit b627b9fcec

View File

@@ -100,6 +100,53 @@ build_idiv(nir_builder *b, nir_ssa_def *n, int64_t d)
}
}
static nir_ssa_def *
build_irem(nir_builder *b, nir_ssa_def *n, int64_t d)
{
int64_t int_min = u_intN_min(n->bit_size);
if (d == 0) {
return nir_imm_intN_t(b, 0, n->bit_size);
} else if (d == int_min) {
return nir_bcsel(b, nir_ieq_imm(b, n, int_min), nir_imm_intN_t(b, 0, n->bit_size), n);
} else {
d = d < 0 ? -d : d;
if (util_is_power_of_two_or_zero64(d)) {
nir_ssa_def *tmp = nir_bcsel(b, nir_ilt(b, n, nir_imm_intN_t(b, 0, n->bit_size)),
nir_iadd_imm(b, n, d - 1), n);
return nir_isub(b, n, nir_iand_imm(b, tmp, -d));
} else {
return nir_isub(b, n, nir_imul(b, build_idiv(b, n, d),
nir_imm_intN_t(b, d, n->bit_size)));
}
}
}
static nir_ssa_def *
build_imod(nir_builder *b, nir_ssa_def *n, int64_t d)
{
int64_t int_min = u_intN_min(n->bit_size);
if (d == 0) {
return nir_imm_intN_t(b, 0, n->bit_size);
} else if (d == int_min) {
nir_ssa_def *int_min_def = nir_imm_intN_t(b, int_min, n->bit_size);
nir_ssa_def *is_neg_not_int_min = nir_ult(b, int_min_def, n);
nir_ssa_def *is_zero = nir_ieq_imm(b, n, 0);
return nir_bcsel(b, nir_ior(b, is_neg_not_int_min, is_zero), n, nir_iadd(b, int_min_def, n));
} else if (d > 0 && util_is_power_of_two_or_zero64(d)) {
return nir_iand(b, n, nir_imm_intN_t(b, d - 1, n->bit_size));
} else if (d < 0 && util_is_power_of_two_or_zero64(-d)) {
nir_ssa_def *d_def = nir_imm_intN_t(b, d, n->bit_size);
nir_ssa_def *res = nir_ior(b, n, d_def);
return nir_bcsel(b, nir_ieq(b, res, d_def), nir_imm_intN_t(b, 0, n->bit_size), res);
} else {
nir_ssa_def *rem = build_irem(b, n, d);
nir_ssa_def *zero = nir_imm_intN_t(b, 0, n->bit_size);
nir_ssa_def *sign_same = d < 0 ? nir_ilt(b, n, zero) : nir_ige(b, n, zero);
nir_ssa_def *rem_zero = nir_ieq(b, rem, zero);
return nir_bcsel(b, nir_ior(b, rem_zero, sign_same), rem, nir_iadd_imm(b, rem, d));
}
}
static bool
nir_opt_idiv_const_instr(nir_builder *b, nir_alu_instr *alu)
{
@@ -143,6 +190,12 @@ nir_opt_idiv_const_instr(nir_builder *b, nir_alu_instr *alu)
case nir_op_umod:
q[comp] = build_umod(b, n, d);
break;
case nir_op_imod:
q[comp] = build_imod(b, n, d);
break;
case nir_op_irem:
q[comp] = build_irem(b, n, d);
break;
default:
unreachable("Unknown integer division op");
}
@@ -171,7 +224,9 @@ nir_opt_idiv_const_impl(nir_function_impl *impl, unsigned min_bit_size)
nir_alu_instr *alu = nir_instr_as_alu(instr);
if (alu->op != nir_op_udiv &&
alu->op != nir_op_idiv &&
alu->op != nir_op_umod)
alu->op != nir_op_umod &&
alu->op != nir_op_imod &&
alu->op != nir_op_irem)
continue;
assert(alu->dest.dest.is_ssa);