nir/range-analysis: Range tracking for fpow
One shader from Metro Last Light and the rest from Rochard. In the Rochard cases, something like: min(1.0, max(pow(saturate(x), y), z)) was transformed to saturate(max(pow(saturate(x), y), z)) because the result of the pow must be >= 0. The Metro Last Light case was similar. An instance of min(pow(abs(x), y), 1.0) became saturate(pow(abs(x), y)) v2: Fix some comments. Suggested by Caio. v3: Fix setting is_intgral when the exponent might be negative. See also Mesa MR !1778. Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> All Intel platforms had similar results. (Ice Lake shown) total instructions in shared programs: 16280670 -> 16280659 (<.01%) instructions in affected programs: 1130 -> 1119 (-0.97%) helped: 11 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 0.72% max: 1.43% x̄: 1.03% x̃: 0.97% 95% mean confidence interval for instructions value: -1.00 -1.00 95% mean confidence interval for instructions %-change: -1.19% -0.86% Instructions are helped. total cycles in shared programs: 367168430 -> 367168270 (<.01%) cycles in affected programs: 10281 -> 10121 (-1.56%) helped: 10 HURT: 1 helped stats (abs) min: 16 max: 18 x̄: 17.00 x̃: 17 helped stats (rel) min: 1.31% max: 2.43% x̄: 1.79% x̃: 1.70% HURT stats (abs) min: 10 max: 10 x̄: 10.00 x̃: 10 HURT stats (rel) min: 3.10% max: 3.10% x̄: 3.10% x̃: 3.10% 95% mean confidence interval for cycles value: -20.06 -9.04 95% mean confidence interval for cycles %-change: -2.36% -0.32% Cycles are helped.
This commit is contained in:
@@ -31,6 +31,12 @@
|
||||
* the result.
|
||||
*/
|
||||
|
||||
static bool
|
||||
is_not_negative(enum ssa_ranges r)
|
||||
{
|
||||
return r == gt_zero || r == ge_zero || r == eq_zero;
|
||||
}
|
||||
|
||||
static void *
|
||||
pack_data(const struct ssa_result_range r)
|
||||
{
|
||||
@@ -722,6 +728,66 @@ analyze_expression(const nir_alu_instr *instr, unsigned src,
|
||||
r = (struct ssa_result_range){le_zero, false};
|
||||
break;
|
||||
|
||||
case nir_op_fpow: {
|
||||
/* Due to flush-to-zero semanatics of floating-point numbers with very
|
||||
* small mangnitudes, we can never really be sure a result will be
|
||||
* non-zero.
|
||||
*
|
||||
* NIR uses pow() and powf() to constant evaluate nir_op_fpow. The man
|
||||
* page for that function says:
|
||||
*
|
||||
* If y is 0, the result is 1.0 (even if x is a NaN).
|
||||
*
|
||||
* gt_zero: pow(*, eq_zero)
|
||||
* | pow(eq_zero, lt_zero) # 0^-y = +inf
|
||||
* | pow(eq_zero, le_zero) # 0^-y = +inf or 0^0 = 1.0
|
||||
* ;
|
||||
*
|
||||
* eq_zero: pow(eq_zero, gt_zero)
|
||||
* ;
|
||||
*
|
||||
* ge_zero: pow(gt_zero, gt_zero)
|
||||
* | pow(gt_zero, ge_zero)
|
||||
* | pow(gt_zero, lt_zero)
|
||||
* | pow(gt_zero, le_zero)
|
||||
* | pow(gt_zero, ne_zero)
|
||||
* | pow(gt_zero, unknown)
|
||||
* | pow(ge_zero, gt_zero)
|
||||
* | pow(ge_zero, ge_zero)
|
||||
* | pow(ge_zero, lt_zero)
|
||||
* | pow(ge_zero, le_zero)
|
||||
* | pow(ge_zero, ne_zero)
|
||||
* | pow(ge_zero, unknown)
|
||||
* | pow(eq_zero, ge_zero) # 0^0 = 1.0 or 0^+y = 0.0
|
||||
* | pow(eq_zero, ne_zero) # 0^-y = +inf or 0^+y = 0.0
|
||||
* | pow(eq_zero, unknown) # union of all other y cases
|
||||
* ;
|
||||
*
|
||||
* All other cases are unknown.
|
||||
*
|
||||
* We could do better if the right operand is a constant, integral
|
||||
* value.
|
||||
*/
|
||||
static const enum ssa_ranges table[last_range + 1][last_range + 1] = {
|
||||
/* left\right unknown lt_zero le_zero gt_zero ge_zero ne_zero eq_zero */
|
||||
/* unknown */ { _______, _______, _______, _______, _______, _______, gt_zero },
|
||||
/* lt_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
|
||||
/* le_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
|
||||
/* gt_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
|
||||
/* ge_zero */ { ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, ge_zero, gt_zero },
|
||||
/* ne_zero */ { _______, _______, _______, _______, _______, _______, gt_zero },
|
||||
/* eq_zero */ { ge_zero, gt_zero, gt_zero, eq_zero, ge_zero, ge_zero, gt_zero },
|
||||
};
|
||||
|
||||
const struct ssa_result_range left = analyze_expression(alu, 0, ht);
|
||||
const struct ssa_result_range right = analyze_expression(alu, 1, ht);
|
||||
|
||||
r.is_integral = left.is_integral && right.is_integral &&
|
||||
is_not_negative(right.range);
|
||||
r.range = table[left.range][right.range];
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_ffma: {
|
||||
const struct ssa_result_range first = analyze_expression(alu, 0, ht);
|
||||
const struct ssa_result_range second = analyze_expression(alu, 1, ht);
|
||||
|
Reference in New Issue
Block a user