Files
third_party_mesa3d/src/compiler/nir/nir_search_helpers.h

292 lines
8.1 KiB
C
Raw Normal View History

/*
* Copyright © 2016 Red Hat
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#ifndef _NIR_SEARCH_HELPERS_
#define _NIR_SEARCH_HELPERS_
#include "nir.h"
#include "util/bitscan.h"
#include <math.h>
static inline bool
is_pos_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
if (!nir_src_is_const(instr->src[src].src))
return false;
for (unsigned i = 0; i < num_components; i++) {
switch (nir_op_infos[instr->op].input_types[src]) {
case nir_type_int: {
int64_t val = nir_src_comp_as_int(instr->src[src].src, swizzle[i]);
if (val <= 0 || !util_is_power_of_two_or_zero64(val))
return false;
break;
}
case nir_type_uint: {
uint64_t val = nir_src_comp_as_uint(instr->src[src].src, swizzle[i]);
if (val == 0 || !util_is_power_of_two_or_zero64(val))
return false;
break;
}
default:
return false;
}
}
return true;
}
static inline bool
is_neg_power_of_two(nir_alu_instr *instr, unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
if (!nir_src_is_const(instr->src[src].src))
return false;
for (unsigned i = 0; i < num_components; i++) {
switch (nir_op_infos[instr->op].input_types[src]) {
case nir_type_int: {
int64_t val = nir_src_comp_as_int(instr->src[src].src, swizzle[i]);
if (val >= 0 || !util_is_power_of_two_or_zero64(-val))
return false;
break;
}
default:
return false;
}
}
return true;
}
static inline bool
is_zero_to_one(nir_alu_instr *instr, unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
if (!nir_src_is_const(instr->src[src].src))
return false;
for (unsigned i = 0; i < num_components; i++) {
switch (nir_op_infos[instr->op].input_types[src]) {
case nir_type_float: {
double val = nir_src_comp_as_float(instr->src[src].src, swizzle[i]);
if (isnan(val) || val < 0.0f || val > 1.0f)
return false;
break;
}
default:
return false;
}
}
return true;
}
/**
* Exclusive compare with (0, 1).
*
* This differs from \c is_zero_to_one because that function tests 0 <= src <=
* 1 while this function tests 0 < src < 1.
*/
static inline bool
is_gt_0_and_lt_1(nir_alu_instr *instr, unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
if (!nir_src_is_const(instr->src[src].src))
return false;
for (unsigned i = 0; i < num_components; i++) {
switch (nir_op_infos[instr->op].input_types[src]) {
case nir_type_float: {
double val = nir_src_comp_as_float(instr->src[src].src, swizzle[i]);
if (isnan(val) || val <= 0.0f || val >= 1.0f)
return false;
break;
}
default:
return false;
}
}
return true;
}
static inline bool
is_not_const_zero(nir_alu_instr *instr, unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
return true;
for (unsigned i = 0; i < num_components; i++) {
switch (nir_op_infos[instr->op].input_types[src]) {
case nir_type_float:
if (nir_src_comp_as_float(instr->src[src].src, swizzle[i]) == 0.0)
return false;
break;
case nir_type_bool:
case nir_type_int:
case nir_type_uint:
if (nir_src_comp_as_uint(instr->src[src].src, swizzle[i]) == 0)
return false;
break;
default:
return false;
}
}
return true;
}
nir: shuffle constants to the top V2: mark float opts as inexact If one of the inputs to an mul/add is the result of another mul/add there is a chance that we can reuse the result of that mul/add in other calls if we do the multiplication in the right order. Also by attempting to move all constants to the top we increase the chance of constant folding. For example it is a fairly common pattern for shaders to do something similar to this: const float a = 0.5; in vec4 b; in float c; ... b.x = b.x * c; b.y = b.y * c; ... b.x = b.x * a + a; b.y = b.y * a + a; So by simply detecting that constant a is part of the multiplication in ffma and switching it with previous fmul that updates b we end up with: ... c = a * c; ... b.x = b.x * c + a; b.y = b.y * c + a; Shader-db results BDW: total instructions in shared programs: 13011050 -> 12967888 (-0.33%) instructions in affected programs: 4118366 -> 4075204 (-1.05%) helped: 17739 HURT: 1343 total cycles in shared programs: 246717952 -> 246410716 (-0.12%) cycles in affected programs: 166870802 -> 166563566 (-0.18%) helped: 18493 HURT: 7965 total spills in shared programs: 14937 -> 14560 (-2.52%) spills in affected programs: 9331 -> 8954 (-4.04%) helped: 284 HURT: 33 total fills in shared programs: 20211 -> 19671 (-2.67%) fills in affected programs: 12586 -> 12046 (-4.29%) helped: 286 HURT: 33 LOST: 39 GAINED: 33 Some of the hurt will go away when we shuffle things back down to the bottom in the following patch. It's also noteworthy that almost all of the spill changes are in Deus Ex both hurt and helped. Reviewed-by: Elie Tournier <elie.tournier@collabora.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-12 13:10:55 +11:00
static inline bool
is_not_const(nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components,
UNUSED const uint8_t *swizzle)
nir: shuffle constants to the top V2: mark float opts as inexact If one of the inputs to an mul/add is the result of another mul/add there is a chance that we can reuse the result of that mul/add in other calls if we do the multiplication in the right order. Also by attempting to move all constants to the top we increase the chance of constant folding. For example it is a fairly common pattern for shaders to do something similar to this: const float a = 0.5; in vec4 b; in float c; ... b.x = b.x * c; b.y = b.y * c; ... b.x = b.x * a + a; b.y = b.y * a + a; So by simply detecting that constant a is part of the multiplication in ffma and switching it with previous fmul that updates b we end up with: ... c = a * c; ... b.x = b.x * c + a; b.y = b.y * c + a; Shader-db results BDW: total instructions in shared programs: 13011050 -> 12967888 (-0.33%) instructions in affected programs: 4118366 -> 4075204 (-1.05%) helped: 17739 HURT: 1343 total cycles in shared programs: 246717952 -> 246410716 (-0.12%) cycles in affected programs: 166870802 -> 166563566 (-0.18%) helped: 18493 HURT: 7965 total spills in shared programs: 14937 -> 14560 (-2.52%) spills in affected programs: 9331 -> 8954 (-4.04%) helped: 284 HURT: 33 total fills in shared programs: 20211 -> 19671 (-2.67%) fills in affected programs: 12586 -> 12046 (-4.29%) helped: 286 HURT: 33 LOST: 39 GAINED: 33 Some of the hurt will go away when we shuffle things back down to the bottom in the following patch. It's also noteworthy that almost all of the spill changes are in Deus Ex both hurt and helped. Reviewed-by: Elie Tournier <elie.tournier@collabora.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-12 13:10:55 +11:00
{
return !nir_src_is_const(instr->src[src].src);
nir: shuffle constants to the top V2: mark float opts as inexact If one of the inputs to an mul/add is the result of another mul/add there is a chance that we can reuse the result of that mul/add in other calls if we do the multiplication in the right order. Also by attempting to move all constants to the top we increase the chance of constant folding. For example it is a fairly common pattern for shaders to do something similar to this: const float a = 0.5; in vec4 b; in float c; ... b.x = b.x * c; b.y = b.y * c; ... b.x = b.x * a + a; b.y = b.y * a + a; So by simply detecting that constant a is part of the multiplication in ffma and switching it with previous fmul that updates b we end up with: ... c = a * c; ... b.x = b.x * c + a; b.y = b.y * c + a; Shader-db results BDW: total instructions in shared programs: 13011050 -> 12967888 (-0.33%) instructions in affected programs: 4118366 -> 4075204 (-1.05%) helped: 17739 HURT: 1343 total cycles in shared programs: 246717952 -> 246410716 (-0.12%) cycles in affected programs: 166870802 -> 166563566 (-0.18%) helped: 18493 HURT: 7965 total spills in shared programs: 14937 -> 14560 (-2.52%) spills in affected programs: 9331 -> 8954 (-4.04%) helped: 284 HURT: 33 total fills in shared programs: 20211 -> 19671 (-2.67%) fills in affected programs: 12586 -> 12046 (-4.29%) helped: 286 HURT: 33 LOST: 39 GAINED: 33 Some of the hurt will go away when we shuffle things back down to the bottom in the following patch. It's also noteworthy that almost all of the spill changes are in Deus Ex both hurt and helped. Reviewed-by: Elie Tournier <elie.tournier@collabora.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2017-01-12 13:10:55 +11:00
}
nir/algebraic: Commute 1-fsat(a) to fsat(1-a) for all non-fmul instructions The goal is to avoid having an extra MOV instruction to perform the saturate. Doing the subtraction first allows the saturate to be applied to the ADD instruction making the MOV unnecessary. Values generated in different block and values from non-ALU instructions (e.g., texture instructions) almost always need the extra MOV. Multiply instructions are restricted because doing this rearrangement can interfere with the generation of flrp and ffma instructions. v2: Now that the final method has been selected, squash three commits into one. All Intel platforms has similar results. (Ice Lake shown) total instructions in shared programs: 17223214 -> 17219386 (-0.02%) instructions in affected programs: 1524376 -> 1520548 (-0.25%) helped: 2686 HURT: 26 helped stats (abs) min: 1 max: 32 x̄: 1.44 x̃: 1 helped stats (rel) min: 0.03% max: 16.67% x̄: 0.54% x̃: 0.37% HURT stats (abs) min: 1 max: 2 x̄: 1.69 x̃: 2 HURT stats (rel) min: 0.33% max: 1.67% x̄: 0.54% x̃: 0.35% 95% mean confidence interval for instructions value: -1.46 -1.36 95% mean confidence interval for instructions %-change: -0.56% -0.50% Instructions are helped. total cycles in shared programs: 360811571 -> 360791896 (<.01%) cycles in affected programs: 103650214 -> 103630539 (-0.02%) helped: 1557 HURT: 675 helped stats (abs) min: 1 max: 1773 x̄: 41.44 x̃: 16 helped stats (rel) min: <.01% max: 26.77% x̄: 1.37% x̃: 0.64% HURT stats (abs) min: 1 max: 1513 x̄: 66.44 x̃: 14 HURT stats (rel) min: <.01% max: 46.16% x̄: 2.00% x̃: 0.49% 95% mean confidence interval for cycles value: -14.82 -2.81 95% mean confidence interval for cycles %-change: -0.50% -0.20% Cycles are helped. LOST: 2 GAINED: 0 Reviewed-by: Matt Turner <mattst88@gmail.com> [v1] Reviewed-by: Thomas Helland <thomashelland90@gmail.com>
2018-03-27 22:57:07 -07:00
static inline bool
is_not_fmul(nir_alu_instr *instr, unsigned src,
UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)
{
nir_alu_instr *src_alu =
nir_src_as_alu_instr(instr->src[src].src);
if (src_alu == NULL)
return true;
if (src_alu->op == nir_op_fneg)
return is_not_fmul(src_alu, 0, 0, NULL);
return src_alu->op != nir_op_fmul;
}
static inline bool
is_used_once(nir_alu_instr *instr)
{
bool zero_if_use = list_empty(&instr->dest.dest.ssa.if_uses);
bool zero_use = list_empty(&instr->dest.dest.ssa.uses);
if (zero_if_use && zero_use)
return false;
if (!zero_if_use && list_is_singular(&instr->dest.dest.ssa.uses))
return false;
if (!zero_use && list_is_singular(&instr->dest.dest.ssa.if_uses))
return false;
if (!list_is_singular(&instr->dest.dest.ssa.if_uses) &&
!list_is_singular(&instr->dest.dest.ssa.uses))
return false;
return true;
}
nir/algebraic: Replace i2b used by bcsel or if-statement with comparison All of the helped shaders are in Deus Ex. I looked at a couple shaders, and they have a pattern like: vec1 32 ssa_373 = i2b32 ssa_345.w vec1 32 ssa_374 = bcsel ssa_373, ssa_20, ssa_0 ... vec1 32 ssa_377 = ine ssa_345.w, ssa_0 if ssa_377 { ... vec1 32 ssa_416 = i2b32 ssa_385.w vec1 32 ssa_417 = bcsel ssa_416, ssa_386, ssa_374 ... } The massive help occurs because the i2b32 is removed, then other passes determine that ssa_374 must be ssa_20 inside the if-statement allowing the first bcsel to also be deleted. v2: Rebase on 1-bit Boolean changes. v3: Fix i2b32 vs ine problem in if-statement replacement. Noticed by Bas. Skylake total instructions in shared programs: 15241394 -> 15186287 (-0.36%) instructions in affected programs: 890583 -> 835476 (-6.19%) helped: 355 HURT: 0 helped stats (abs) min: 1 max: 497 x̄: 155.23 x̃: 149 helped stats (rel) min: 0.09% max: 16.49% x̄: 6.10% x̃: 6.59% 95% mean confidence interval for instructions value: -165.07 -145.39 95% mean confidence interval for instructions %-change: -6.42% -5.77% Instructions are helped. total cycles in shared programs: 373846583 -> 371023357 (-0.76%) cycles in affected programs: 118972102 -> 116148876 (-2.37%) helped: 343 HURT: 14 helped stats (abs) min: 45 max: 118284 x̄: 8332.32 x̃: 6089 helped stats (rel) min: 0.03% max: 38.19% x̄: 2.48% x̃: 1.77% HURT stats (abs) min: 120 max: 4126 x̄: 2482.79 x̃: 3019 HURT stats (rel) min: 0.16% max: 17.37% x̄: 2.13% x̃: 1.11% 95% mean confidence interval for cycles value: -8723.28 -7093.12 95% mean confidence interval for cycles %-change: -2.57% -2.02% Cycles are helped. total spills in shared programs: 32401 -> 23465 (-27.58%) spills in affected programs: 24457 -> 15521 (-36.54%) helped: 343 HURT: 0 total fills in shared programs: 37866 -> 31765 (-16.11%) fills in affected programs: 18889 -> 12788 (-32.30%) helped: 343 HURT: 0 Broadwell and Haswell had similar results. (Haswell shown) Haswell total instructions in shared programs: 13764783 -> 13750679 (-0.10%) instructions in affected programs: 1176256 -> 1162152 (-1.20%) helped: 334 HURT: 21 helped stats (abs) min: 1 max: 358 x̄: 42.59 x̃: 47 helped stats (rel) min: 0.09% max: 11.81% x̄: 1.30% x̃: 1.37% HURT stats (abs) min: 1 max: 61 x̄: 5.76 x̃: 1 HURT stats (rel) min: 0.03% max: 1.84% x̄: 0.17% x̃: 0.03% 95% mean confidence interval for instructions value: -43.99 -35.47 95% mean confidence interval for instructions %-change: -1.35% -1.08% Instructions are helped. total cycles in shared programs: 386511910 -> 385402528 (-0.29%) cycles in affected programs: 143831110 -> 142721728 (-0.77%) helped: 327 HURT: 39 helped stats (abs) min: 16 max: 25219 x̄: 3519.74 x̃: 3570 helped stats (rel) min: <.01% max: 10.26% x̄: 0.95% x̃: 0.96% HURT stats (abs) min: 16 max: 4881 x̄: 1065.95 x̃: 997 HURT stats (rel) min: <.01% max: 16.67% x̄: 0.70% x̃: 0.24% 95% mean confidence interval for cycles value: -3375.59 -2686.60 95% mean confidence interval for cycles %-change: -0.92% -0.64% Cycles are helped. total spills in shared programs: 100480 -> 97846 (-2.62%) spills in affected programs: 84702 -> 82068 (-3.11%) helped: 316 HURT: 21 total fills in shared programs: 96877 -> 94369 (-2.59%) fills in affected programs: 69167 -> 66659 (-3.63%) helped: 316 HURT: 9 No changes on Ivy Bridge or earlier platforms. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2018-12-03 16:30:44 -08:00
static inline bool
is_used_by_if(nir_alu_instr *instr)
{
return !list_empty(&instr->dest.dest.ssa.if_uses);
}
static inline bool
is_not_used_by_if(nir_alu_instr *instr)
{
return list_empty(&instr->dest.dest.ssa.if_uses);
}
nir/algebraic: Push unary operations into source operands of fsat source Pushing a unary operation, like fneg, into the operation that generates its operand allows the fsat to be applied to the inner instruction instead of on a separate instruction that performs the unary operation. This changes fmul ssa_100, ssa_99, ssa_98 fmov.sat ssa_101, -ssa_100 into fmul.sat ssa_100, -ssa_99, ssa_98 Ice Lake, Skylake, and Broadwell had similar results. (Ice Lake shown) total instructions in shared programs: 17228658 -> 17228584 (<.01%) instructions in affected programs: 3163 -> 3089 (-2.34%) helped: 49 HURT: 0 helped stats (abs) min: 1 max: 2 x̄: 1.51 x̃: 2 helped stats (rel) min: 0.58% max: 9.09% x̄: 3.69% x̃: 3.51% 95% mean confidence interval for instructions value: -1.66 -1.37 95% mean confidence interval for instructions %-change: -4.37% -3.00% Instructions are helped. total cycles in shared programs: 360937144 -> 360936431 (<.01%) cycles in affected programs: 24029 -> 23316 (-2.97%) helped: 47 HURT: 2 helped stats (abs) min: 4 max: 18 x̄: 15.34 x̃: 16 helped stats (rel) min: 0.69% max: 6.18% x̄: 3.78% x̃: 4.27% HURT stats (abs) min: 4 max: 4 x̄: 4.00 x̃: 4 HURT stats (rel) min: 0.34% max: 0.67% x̄: 0.50% x̃: 0.50% 95% mean confidence interval for cycles value: -16.05 -13.05 95% mean confidence interval for cycles %-change: -4.07% -3.15% Cycles are helped. All Gen7 and earlier platforms had similar results. (Haswell shown) total instructions in shared programs: 13536059 -> 13535884 (<.01%) instructions in affected programs: 8797 -> 8622 (-1.99%) helped: 150 HURT: 0 helped stats (abs) min: 1 max: 2 x̄: 1.17 x̃: 1 helped stats (rel) min: 0.40% max: 11.11% x̄: 3.51% x̃: 1.96% 95% mean confidence interval for instructions value: -1.23 -1.11 95% mean confidence interval for instructions %-change: -3.97% -3.05% Instructions are helped. total cycles in shared programs: 357696119 -> 357694193 (<.01%) cycles in affected programs: 50216 -> 48290 (-3.84%) helped: 109 HURT: 14 helped stats (abs) min: 2 max: 92 x̄: 18.97 x̃: 16 helped stats (rel) min: 0.26% max: 19.09% x̄: 7.37% x̃: 5.37% HURT stats (abs) min: 2 max: 26 x̄: 10.14 x̃: 5 HURT stats (rel) min: 0.18% max: 4.73% x̄: 1.84% x̃: 0.92% 95% mean confidence interval for cycles value: -19.27 -12.05 95% mean confidence interval for cycles %-change: -7.34% -5.31% Cycles are helped. Reviewed-by: Matt Turner <mattst88@gmail.com>
2018-10-23 14:30:41 -07:00
static inline bool
is_used_by_non_fsat(nir_alu_instr *instr)
{
nir_foreach_use(src, &instr->dest.dest.ssa) {
const nir_instr *const user_instr = src->parent_instr;
if (user_instr->type != nir_instr_type_alu)
return true;
const nir_alu_instr *const user_alu = nir_instr_as_alu(user_instr);
assert(instr != user_alu);
if (user_alu->op != nir_op_fsat)
return true;
}
return false;
}
/**
* Returns true if a NIR ALU src represents a constant integer
* of either 32 or 64 bits, and the higher word (bit-size / 2)
* of all its components is zero.
*/
static inline bool
is_upper_half_zero(nir_alu_instr *instr, unsigned src,
unsigned num_components, const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
return false;
for (unsigned i = 0; i < num_components; i++) {
unsigned half_bit_size = nir_src_bit_size(instr->src[src].src) / 2;
uint32_t high_bits = ((1 << half_bit_size) - 1) << half_bit_size;
if ((nir_src_comp_as_uint(instr->src[src].src,
swizzle[i]) & high_bits) != 0) {
return false;
}
}
return true;
}
/**
* Returns true if a NIR ALU src represents a constant integer
* of either 32 or 64 bits, and the lower word (bit-size / 2)
* of all its components is zero.
*/
static inline bool
is_lower_half_zero(nir_alu_instr *instr, unsigned src,
unsigned num_components, const uint8_t *swizzle)
{
if (nir_src_as_const_value(instr->src[src].src) == NULL)
return false;
for (unsigned i = 0; i < num_components; i++) {
uint32_t low_bits =
(1 << (nir_src_bit_size(instr->src[src].src) / 2)) - 1;
if ((nir_src_comp_as_int(instr->src[src].src, swizzle[i]) & low_bits) != 0)
return false;
}
return true;
}
#endif /* _NIR_SEARCH_ */