nir/algebraic: support for power-of-two optimizations
Some optimizations, like converting integer multiply/divide into left/ right shifts, have additional constraints on the search expression. Like requiring that a variable is a constant power of two. Support these cases by allowing a fxn name to be appended to the search var expression (ie. "a#32(is_power_of_two)"). Signed-off-by: Rob Clark <robclark@freedesktop.org> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
@@ -1651,6 +1651,9 @@ typedef struct nir_shader_compiler_options {
|
|||||||
/* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
|
/* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
|
||||||
bool lower_scmp;
|
bool lower_scmp;
|
||||||
|
|
||||||
|
/** enables rules to lower idiv by power-of-two: */
|
||||||
|
bool lower_idiv;
|
||||||
|
|
||||||
/* Does the native fdot instruction replicate its result for four
|
/* Does the native fdot instruction replicate its result for four
|
||||||
* components? If so, then opt_algebraic_late will turn all fdotN
|
* components? If so, then opt_algebraic_late will turn all fdotN
|
||||||
* instructions into fdot_replicatedN instructions.
|
* instructions into fdot_replicatedN instructions.
|
||||||
|
@@ -76,6 +76,7 @@ class Value(object):
|
|||||||
return Constant(val, name_base)
|
return Constant(val, name_base)
|
||||||
|
|
||||||
__template = mako.template.Template("""
|
__template = mako.template.Template("""
|
||||||
|
#include "compiler/nir/nir_search_helpers.h"
|
||||||
static const ${val.c_type} ${val.name} = {
|
static const ${val.c_type} ${val.name} = {
|
||||||
{ ${val.type_enum}, ${val.bit_size} },
|
{ ${val.type_enum}, ${val.bit_size} },
|
||||||
% if isinstance(val, Constant):
|
% if isinstance(val, Constant):
|
||||||
@@ -84,6 +85,7 @@ static const ${val.c_type} ${val.name} = {
|
|||||||
${val.index}, /* ${val.var_name} */
|
${val.index}, /* ${val.var_name} */
|
||||||
${'true' if val.is_constant else 'false'},
|
${'true' if val.is_constant else 'false'},
|
||||||
${val.type() or 'nir_type_invalid' },
|
${val.type() or 'nir_type_invalid' },
|
||||||
|
${val.cond if val.cond else 'NULL'},
|
||||||
% elif isinstance(val, Expression):
|
% elif isinstance(val, Expression):
|
||||||
${'true' if val.inexact else 'false'},
|
${'true' if val.inexact else 'false'},
|
||||||
nir_op_${val.opcode},
|
nir_op_${val.opcode},
|
||||||
@@ -113,7 +115,7 @@ static const ${val.c_type} ${val.name} = {
|
|||||||
Variable=Variable,
|
Variable=Variable,
|
||||||
Expression=Expression)
|
Expression=Expression)
|
||||||
|
|
||||||
_constant_re = re.compile(r"(?P<value>[^@]+)(?:@(?P<bits>\d+))?")
|
_constant_re = re.compile(r"(?P<value>[^@\(]+)(?:@(?P<bits>\d+))?")
|
||||||
|
|
||||||
class Constant(Value):
|
class Constant(Value):
|
||||||
def __init__(self, val, name):
|
def __init__(self, val, name):
|
||||||
@@ -150,7 +152,8 @@ class Constant(Value):
|
|||||||
return "nir_type_float"
|
return "nir_type_float"
|
||||||
|
|
||||||
_var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)"
|
_var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)"
|
||||||
r"(?:@(?P<type>int|uint|bool|float)?(?P<bits>\d+)?)?")
|
r"(?:@(?P<type>int|uint|bool|float)?(?P<bits>\d+)?)?"
|
||||||
|
r"(?P<cond>\([^\)]+\))?")
|
||||||
|
|
||||||
class Variable(Value):
|
class Variable(Value):
|
||||||
def __init__(self, val, name, varset):
|
def __init__(self, val, name, varset):
|
||||||
@@ -161,6 +164,7 @@ class Variable(Value):
|
|||||||
|
|
||||||
self.var_name = m.group('name')
|
self.var_name = m.group('name')
|
||||||
self.is_constant = m.group('const') is not None
|
self.is_constant = m.group('const') is not None
|
||||||
|
self.cond = m.group('cond')
|
||||||
self.required_type = m.group('type')
|
self.required_type = m.group('type')
|
||||||
self.bit_size = int(m.group('bits')) if m.group('bits') else 0
|
self.bit_size = int(m.group('bits')) if m.group('bits') else 0
|
||||||
|
|
||||||
|
@@ -45,10 +45,11 @@ d = 'd'
|
|||||||
# however, be used for backend-requested lowering operations as those need to
|
# however, be used for backend-requested lowering operations as those need to
|
||||||
# happen regardless of precision.
|
# happen regardless of precision.
|
||||||
#
|
#
|
||||||
# Variable names are specified as "[#]name[@type]" where "#" inicates that
|
# Variable names are specified as "[#]name[@type][(cond)]" where "#" inicates
|
||||||
# the given variable will only match constants and the type indicates that
|
# that the given variable will only match constants and the type indicates that
|
||||||
# the given variable will only match values from ALU instructions with the
|
# the given variable will only match values from ALU instructions with the
|
||||||
# given output type.
|
# given output type, and (cond) specifies an additional condition function
|
||||||
|
# (see nir_search_helpers.h).
|
||||||
#
|
#
|
||||||
# For constants, you have to be careful to make sure that it is the right
|
# For constants, you have to be careful to make sure that it is the right
|
||||||
# type because python is unaware of the source and destination types of the
|
# type because python is unaware of the source and destination types of the
|
||||||
@@ -62,6 +63,14 @@ d = 'd'
|
|||||||
# constructed value should have that bit-size.
|
# constructed value should have that bit-size.
|
||||||
|
|
||||||
optimizations = [
|
optimizations = [
|
||||||
|
|
||||||
|
(('imul', a, '#b@32(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b))),
|
||||||
|
(('imul', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('ishl', a, ('find_lsb', ('iabs', b))))),
|
||||||
|
(('udiv', a, '#b@32(is_pos_power_of_two)'), ('ushr', a, ('find_lsb', b))),
|
||||||
|
(('idiv', a, '#b@32(is_pos_power_of_two)'), ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', b))), 'options->lower_idiv'),
|
||||||
|
(('idiv', a, '#b@32(is_neg_power_of_two)'), ('ineg', ('imul', ('isign', a), ('ushr', ('iabs', a), ('find_lsb', ('iabs', b))))), 'options->lower_idiv'),
|
||||||
|
(('umod', a, '#b(is_pos_power_of_two)'), ('iand', a, ('isub', b, 1))),
|
||||||
|
|
||||||
(('fneg', ('fneg', a)), a),
|
(('fneg', ('fneg', a)), a),
|
||||||
(('ineg', ('ineg', a)), a),
|
(('ineg', ('ineg', a)), a),
|
||||||
(('fabs', ('fabs', a)), ('fabs', a)),
|
(('fabs', ('fabs', a)), ('fabs', a)),
|
||||||
|
@@ -127,6 +127,9 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
|
|||||||
instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const)
|
instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if (var->cond && !var->cond(instr, src, num_components, new_swizzle))
|
||||||
|
return false;
|
||||||
|
|
||||||
if (var->type != nir_type_invalid) {
|
if (var->type != nir_type_invalid) {
|
||||||
if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
|
if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
|
||||||
return false;
|
return false;
|
||||||
|
@@ -68,6 +68,16 @@ typedef struct {
|
|||||||
* never match anything.
|
* never match anything.
|
||||||
*/
|
*/
|
||||||
nir_alu_type type;
|
nir_alu_type type;
|
||||||
|
|
||||||
|
/** Optional condition fxn ptr
|
||||||
|
*
|
||||||
|
* This is only allowed in search expressions, and allows additional
|
||||||
|
* constraints to be placed on the match. Typically used for 'is_constant'
|
||||||
|
* variables to require, for example, power-of-two in order for the search
|
||||||
|
* to match.
|
||||||
|
*/
|
||||||
|
bool (*cond)(nir_alu_instr *instr, unsigned src,
|
||||||
|
unsigned num_components, const uint8_t *swizzle);
|
||||||
} nir_search_variable;
|
} nir_search_variable;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
94
src/compiler/nir/nir_search_helpers.h
Normal file
94
src/compiler/nir/nir_search_helpers.h
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
/*
|
||||||
|
* Copyright © 2016 Red Hat
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
* IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
* Authors:
|
||||||
|
* Rob Clark <robclark@freedesktop.org>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _NIR_SEARCH_HELPERS_
|
||||||
|
#define _NIR_SEARCH_HELPERS_
|
||||||
|
|
||||||
|
#include "nir.h"
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
__is_power_of_two(unsigned int x)
|
||||||
|
{
|
||||||
|
return ((x != 0) && !(x & (x - 1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
is_pos_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
|
||||||
|
const uint8_t *swizzle)
|
||||||
|
{
|
||||||
|
nir_const_value *val = nir_src_as_const_value(instr->src[src].src);
|
||||||
|
|
||||||
|
/* only constant src's: */
|
||||||
|
if (!val)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < num_components; i++) {
|
||||||
|
switch (nir_op_infos[instr->op].input_types[src]) {
|
||||||
|
case nir_type_int:
|
||||||
|
if (val->i32[swizzle[i]] < 0)
|
||||||
|
return false;
|
||||||
|
if (!__is_power_of_two(val->i32[swizzle[i]]))
|
||||||
|
return false;
|
||||||
|
break;
|
||||||
|
case nir_type_uint:
|
||||||
|
if (!__is_power_of_two(val->u32[swizzle[i]]))
|
||||||
|
return false;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
is_neg_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
|
||||||
|
const uint8_t *swizzle)
|
||||||
|
{
|
||||||
|
nir_const_value *val = nir_src_as_const_value(instr->src[src].src);
|
||||||
|
|
||||||
|
/* only constant src's: */
|
||||||
|
if (!val)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < num_components; i++) {
|
||||||
|
switch (nir_op_infos[instr->op].input_types[src]) {
|
||||||
|
case nir_type_int:
|
||||||
|
if (val->i32[swizzle[i]] > 0)
|
||||||
|
return false;
|
||||||
|
if (!__is_power_of_two(abs(val->i32[swizzle[i]])))
|
||||||
|
return false;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* _NIR_SEARCH_ */
|
Reference in New Issue
Block a user