nir/i965: add before ffma algebraic opts

This shuffles constants down in the reverse of what the previous
patch does and applies some simpilifications that may be made
possible from doing so.

Shader-db results BDW:

total instructions in shared programs: 12980814 -> 12977822 (-0.02%)
instructions in affected programs: 281889 -> 278897 (-1.06%)
helped: 1231
HURT: 128

total cycles in shared programs: 246562852 -> 246567288 (0.00%)
cycles in affected programs: 11271524 -> 11275960 (0.04%)
helped: 1630
HURT: 1378

V2: mark float opts as inexact

Reviewed-by: Elie Tournier <elie.tournier@collabora.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Timothy Arceri
2017-01-13 17:25:11 +11:00
committed by Timothy Arceri
parent fb2269fed1
commit 7a7ee40c2d
3 changed files with 30 additions and 0 deletions

View File

@@ -2598,6 +2598,7 @@ bool nir_lower_phis_to_regs_block(nir_block *block);
bool nir_lower_ssa_defs_to_regs_block(nir_block *block); bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
bool nir_opt_algebraic(nir_shader *shader); bool nir_opt_algebraic(nir_shader *shader);
bool nir_opt_algebraic_before_ffma(nir_shader *shader);
bool nir_opt_algebraic_late(nir_shader *shader); bool nir_opt_algebraic_late(nir_shader *shader);
bool nir_opt_constant_folding(nir_shader *shader); bool nir_opt_constant_folding(nir_shader *shader);

View File

@@ -530,6 +530,27 @@ for op in ['flt', 'fge', 'feq', 'fne',
('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))), ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
] ]
# This section contains "late" optimizations that should be run before
# creating ffmas and calling regular optimizations for the final time.
# Optimizations should go here if they help code generation and conflict
# with the regular optimizations.
before_ffma_optimizations = [
# Propagate constants down multiplication chains
(('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fmul', ('fmul', a, c), b)),
(('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('imul', ('imul', a, c), b)),
(('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('fadd', ('fadd', a, c), b)),
(('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', '#b'), 'c(is_not_const)'), ('iadd', ('iadd', a, c), b)),
(('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
(('~fadd', ('fneg', a), a), 0.0),
(('iadd', ('ineg', a), a), 0),
(('iadd', ('ineg', a), ('iadd', a, b)), b),
(('iadd', a, ('iadd', ('ineg', a), b)), b),
(('~fadd', ('fneg', a), ('fadd', a, b)), b),
(('~fadd', a, ('fadd', ('fneg', a), b)), b),
]
# This section contains "late" optimizations that should be run after the # This section contains "late" optimizations that should be run after the
# regular optimizations have finished. Optimizations should go here if # regular optimizations have finished. Optimizations should go here if
# they help code generation but do not necessarily produce code that is # they help code generation but do not necessarily produce code that is
@@ -556,5 +577,7 @@ late_optimizations = [
] ]
print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render() print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
print nir_algebraic.AlgebraicPass("nir_opt_algebraic_before_ffma",
before_ffma_optimizations).render()
print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late", print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
late_optimizations).render() late_optimizations).render()

View File

@@ -605,6 +605,12 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
UNUSED bool progress; /* Written by OPT */ UNUSED bool progress; /* Written by OPT */
do {
progress = false;
OPT(nir_opt_algebraic_before_ffma);
} while (progress);
nir = nir_optimize(nir, compiler, is_scalar); nir = nir_optimize(nir, compiler, is_scalar);
if (devinfo->gen >= 6) { if (devinfo->gen >= 6) {