nir/algebraic: Rearrange bcsel sequences generated by nir_opt_peephole_select
Reviewed-by: Matt Turner <mattst88@gmail.com> All Intel platforms had similar results. (Ice Lake shown) total instructions in shared programs: 14660366 -> 14653437 (-0.05%) instructions in affected programs: 316166 -> 309237 (-2.19%) helped: 905 HURT: 10 helped stats (abs) min: 1 max: 36 x̄: 7.67 x̃: 6 helped stats (rel) min: 0.13% max: 18.75% x̄: 4.28% x̃: 3.60% HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 HURT stats (rel) min: 0.10% max: 1.33% x̄: 0.70% x̃: 0.97% 95% mean confidence interval for instructions value: -7.91 -7.23 95% mean confidence interval for instructions %-change: -4.46% -3.99% Instructions are helped. total cycles in shared programs: 228571646 -> 228549759 (<.01%) cycles in affected programs: 56239919 -> 56218032 (-0.04%) helped: 681 HURT: 216 helped stats (abs) min: 1 max: 5156 x̄: 45.49 x̃: 10 helped stats (rel) min: <.01% max: 10.45% x̄: 1.29% x̃: 0.65% HURT stats (abs) min: 1 max: 320 x̄: 42.09 x̃: 14 HURT stats (rel) min: <.01% max: 37.04% x̄: 1.38% x̃: 0.49% 95% mean confidence interval for cycles value: -41.51 -7.29 95% mean confidence interval for cycles %-change: -0.80% -0.49% Cycles are helped. LOST: 1 GAINED: 0
This commit is contained in:
@@ -1419,6 +1419,44 @@ optimizations += [
|
||||
(('imadsh_mix16', 'a@32', '#b@32(is_upper_half_zero)', 'c@32'), ('c')),
|
||||
]
|
||||
|
||||
# These kinds of sequences can occur after nir_opt_peephole_select.
|
||||
#
|
||||
# NOTE: fadd is not handled here because that gets in the way of ffma
|
||||
# generation in the i965 driver. Instead, fadd and ffma are handled in
|
||||
# late_optimizations.
|
||||
|
||||
for op in ['flrp']:
|
||||
optimizations += [
|
||||
(('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, c, e)), (op, b, c, ('bcsel', a, d, e))),
|
||||
(('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, c, e)), (op, b, c, ('bcsel', a, d, e))),
|
||||
(('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, e, d)), (op, b, ('bcsel', a, c, e), d)),
|
||||
(('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, e, d)), (op, b, ('bcsel', a, c, e), d)),
|
||||
(('bcsel', a, (op + '(is_used_once)', b, c, d), (op, e, c, d)), (op, ('bcsel', a, b, e), c, d)),
|
||||
(('bcsel', a, (op, b, c, d), (op + '(is_used_once)', e, c, d)), (op, ('bcsel', a, b, e), c, d)),
|
||||
]
|
||||
|
||||
for op in ['fmul', 'iadd', 'imul', 'iand', 'ior', 'ixor', 'fmin', 'fmax', 'imin', 'imax', 'umin', 'umax']:
|
||||
optimizations += [
|
||||
(('bcsel', a, (op + '(is_used_once)', b, c), (op, b, 'd(is_not_const)')), (op, b, ('bcsel', a, c, d))),
|
||||
(('bcsel', a, (op + '(is_used_once)', b, 'c(is_not_const)'), (op, b, d)), (op, b, ('bcsel', a, c, d))),
|
||||
(('bcsel', a, (op, b, 'c(is_not_const)'), (op + '(is_used_once)', b, d)), (op, b, ('bcsel', a, c, d))),
|
||||
(('bcsel', a, (op, b, c), (op + '(is_used_once)', b, 'd(is_not_const)')), (op, b, ('bcsel', a, c, d))),
|
||||
]
|
||||
|
||||
for op in ['fpow']:
|
||||
optimizations += [
|
||||
(('bcsel', a, (op + '(is_used_once)', b, c), (op, b, d)), (op, b, ('bcsel', a, c, d))),
|
||||
(('bcsel', a, (op, b, c), (op + '(is_used_once)', b, d)), (op, b, ('bcsel', a, c, d))),
|
||||
(('bcsel', a, (op + '(is_used_once)', b, c), (op, d, c)), (op, ('bcsel', a, b, d), c)),
|
||||
(('bcsel', a, (op, b, c), (op + '(is_used_once)', d, c)), (op, ('bcsel', a, b, d), c)),
|
||||
]
|
||||
|
||||
for op in ['frcp', 'frsq', 'fsqrt', 'fexp2', 'flog2', 'fsign', 'fsin', 'fcos']:
|
||||
optimizations += [
|
||||
(('bcsel', a, (op + '(is_used_once)', b), (op, c)), (op, ('bcsel', a, b, c))),
|
||||
(('bcsel', a, (op, b), (op + '(is_used_once)', c)), (op, ('bcsel', a, b, c))),
|
||||
]
|
||||
|
||||
# This section contains "late" optimizations that should be run before
|
||||
# creating ffmas and calling regular optimizations for the final time.
|
||||
# Optimizations should go here if they help code generation and conflict
|
||||
@@ -1581,6 +1619,21 @@ late_optimizations = [
|
||||
('ffma', a, b, ('ffma', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'),
|
||||
]
|
||||
|
||||
for op in ['fadd']:
|
||||
late_optimizations += [
|
||||
(('bcsel', a, (op + '(is_used_once)', b, c), (op, b, d)), (op, b, ('bcsel', a, c, d))),
|
||||
(('bcsel', a, (op, b, c), (op + '(is_used_once)', b, d)), (op, b, ('bcsel', a, c, d))),
|
||||
]
|
||||
|
||||
for op in ['ffma']:
|
||||
late_optimizations += [
|
||||
(('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, c, e)), (op, b, c, ('bcsel', a, d, e))),
|
||||
(('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, c, e)), (op, b, c, ('bcsel', a, d, e))),
|
||||
|
||||
(('bcsel', a, (op + '(is_used_once)', b, c, d), (op, b, e, d)), (op, b, ('bcsel', a, c, e), d)),
|
||||
(('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, e, d)), (op, b, ('bcsel', a, c, e), d)),
|
||||
]
|
||||
|
||||
print(nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render())
|
||||
print(nir_algebraic.AlgebraicPass("nir_opt_algebraic_before_ffma",
|
||||
before_ffma_optimizations).render())
|
||||
|
Reference in New Issue
Block a user