nir/algebraic: Eliminate useless fsat() on operand of comparison w/value in (0, 1)
v2: Fix copy-and-paste bug in a cmp b vs b cmp a cases. All Gen7+ platforms had similar results. (Ice Lake shown) total instructions in shared programs: 17224337 -> 17224269 (<.01%) instructions in affected programs: 13578 -> 13510 (-0.50%) helped: 68 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 0.31% max: 3.12% x̄: 0.84% x̃: 0.42% 95% mean confidence interval for instructions value: -1.00 -1.00 95% mean confidence interval for instructions %-change: -1.05% -0.63% Instructions are helped. total cycles in shared programs: 360826090 -> 360825137 (<.01%) cycles in affected programs: 94867 -> 93914 (-1.00%) helped: 58 HURT: 1 helped stats (abs) min: 2 max: 28 x̄: 17.74 x̃: 18 helped stats (rel) min: 0.08% max: 3.17% x̄: 1.39% x̃: 1.22% HURT stats (abs) min: 76 max: 76 x̄: 76.00 x̃: 76 HURT stats (rel) min: 2.86% max: 2.86% x̄: 2.86% x̃: 2.86% 95% mean confidence interval for cycles value: -19.53 -12.78 95% mean confidence interval for cycles %-change: -1.56% -1.08% Cycles are helped. No changes on any other Intel platform. Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
This commit is contained in:
@@ -213,6 +213,18 @@ optimizations = [
|
||||
(('fne', ('fneg', a), -1.0), ('fne', 1.0, a)),
|
||||
(('feq', -1.0, ('fneg', a)), ('feq', a, 1.0)),
|
||||
|
||||
(('flt', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('flt', a, b)),
|
||||
(('flt', '#b(is_gt_0_and_lt_1)', ('fsat(is_used_once)', a)), ('flt', b, a)),
|
||||
(('fge', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('fge', a, b)),
|
||||
(('fge', '#b(is_gt_0_and_lt_1)', ('fsat(is_used_once)', a)), ('fge', b, a)),
|
||||
(('feq', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('feq', a, b)),
|
||||
(('fne', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('fne', a, b)),
|
||||
|
||||
(('fge', ('fsat(is_used_once)', a), 1.0), ('fge', a, 1.0)),
|
||||
(('flt', ('fsat(is_used_once)', a), 1.0), ('flt', a, 1.0)),
|
||||
(('fge', 0.0, ('fsat(is_used_once)', a)), ('fge', 0.0, a)),
|
||||
(('flt', 0.0, ('fsat(is_used_once)', a)), ('flt', 0.0, a)),
|
||||
|
||||
# 0.0 >= b2f(a)
|
||||
# b2f(a) <= 0.0
|
||||
# b2f(a) == 0.0 because b2f(a) can only be 0 or 1
|
||||
@@ -1136,6 +1148,21 @@ late_optimizations = [
|
||||
# optimization loop can prevent other optimizations.
|
||||
(('fneg', ('fneg', a)), a),
|
||||
|
||||
# These are duplicated from the main optimizations table. The late
|
||||
# patterns that rearrange expressions like x - .5 < 0 to x < .5 can create
|
||||
# new patterns like these. The patterns that compare with zero are removed
|
||||
# because they are unlikely to be created in by anything in
|
||||
# late_optimizations.
|
||||
(('flt', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('flt', a, b)),
|
||||
(('flt', '#b(is_gt_0_and_lt_1)', ('fsat(is_used_once)', a)), ('flt', b, a)),
|
||||
(('fge', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('fge', a, b)),
|
||||
(('fge', '#b(is_gt_0_and_lt_1)', ('fsat(is_used_once)', a)), ('fge', b, a)),
|
||||
(('feq', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('feq', a, b)),
|
||||
(('fne', ('fsat(is_used_once)', a), '#b(is_gt_0_and_lt_1)'), ('fne', a, b)),
|
||||
|
||||
(('fge', ('fsat(is_used_once)', a), 1.0), ('fge', a, 1.0)),
|
||||
(('flt', ('fsat(is_used_once)', a), 1.0), ('flt', a, 1.0)),
|
||||
|
||||
(('~fge', ('fmin(is_used_once)', ('fadd(is_used_once)', a, b), ('fadd', c, d)), 0.0), ('iand', ('fge', a, ('fneg', b)), ('fge', c, ('fneg', d)))),
|
||||
|
||||
(('flt', ('fneg', a), ('fneg', b)), ('flt', b, a)),
|
||||
|
@@ -109,6 +109,36 @@ is_zero_to_one(nir_alu_instr *instr, unsigned src, unsigned num_components,
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Exclusive compare with (0, 1).
|
||||
*
|
||||
* This differs from \c is_zero_to_one because that function tests 0 <= src <=
|
||||
* 1 while this function tests 0 < src < 1.
|
||||
*/
|
||||
static inline bool
|
||||
is_gt_0_and_lt_1(nir_alu_instr *instr, unsigned src, unsigned num_components,
|
||||
const uint8_t *swizzle)
|
||||
{
|
||||
/* only constant srcs: */
|
||||
if (!nir_src_is_const(instr->src[src].src))
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
switch (nir_op_infos[instr->op].input_types[src]) {
|
||||
case nir_type_float: {
|
||||
double val = nir_src_comp_as_float(instr->src[src].src, swizzle[i]);
|
||||
if (isnan(val) || val <= 0.0f || val >= 1.0f)
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_not_const_zero(nir_alu_instr *instr, unsigned src, unsigned num_components,
|
||||
const uint8_t *swizzle)
|
||||
|
Reference in New Issue
Block a user