intel/fs: Add constant propagation for ADD3

v2: Require that the constant value be representable as either uint16_t
or int16_t. Suggested by Matt.

v3: Remove redundant patterns. Noticed by Matt.

shader-db:

DG2
total instructions in shared programs: 23103767 -> 23103577 (<.01%)
instructions in affected programs: 51822 -> 51632 (-0.37%)
helped: 98 / HURT: 15

total cycles in shared programs: 842347714 -> 842380017 (<.01%)
cycles in affected programs: 1942595 -> 1974898 (1.66%)
helped: 97 / HURT: 32

Nearly all of the affected shaders (around 9,900) are shaders in
Cyberpunk 2077. It's about an even split between vertex and fragment
shaders. The majority of the remaining affected shaders (3,600) are
from Strange Brigade. This was also a nearly even split between
fragment and vertex.

All but two of the lost shaders are SIMD32 fragment shaders in
Cyberpunk 2077. The other two are SIMD32 fragment shaders in Dota2.

fossil-db:

DG2
Instructions in all programs: 196379107 -> 196248608 (-0.1%)
helped: 13467 / HURT: 1210

Cycles in all programs: 13931355281 -> 13929955971 (-0.0%)
helped: 11801 / HURT: 2922

Lost: 90

Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23262>
This commit is contained in:
Ian Romanick
2023-05-19 09:56:42 -07:00
committed by Marge Bot
parent 7b34808649
commit 7ef45e661f
3 changed files with 60 additions and 1 deletions

View File

@@ -2861,9 +2861,14 @@ late_optimizations.extend([
(('iadd', a, ('ineg', 'b')), ('isub', 'a', 'b'), 'options->has_isub || options->lower_ineg'),
(('ineg', a), ('isub', 0, a), 'options->lower_ineg'),
(('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'),
# On Intel GPUs, the constant field for an ADD3 instruction must be either
# int16_t or uint16_t.
(('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'),
(('iadd', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'),
(('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_16_bits)'), ('iadd3', a, b, c), 'options->has_iadd3'),
(('iadd', ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'),
(('iadd', ('ineg', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'),
(('iadd', ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), '#c(is_16_bits)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'),
# fneg_lo / fneg_hi
(('vec2(is_only_used_as_float)', ('fneg@16', a), b), ('fmul', ('vec2', a, b), ('vec2', -1.0, 1.0)), 'options->vectorize_vec2_16bit'),

View File

@@ -284,6 +284,27 @@ is_first_5_bits_uge_2(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
return true;
}
/** Is this a constant that could be either int16_t or uint16_t? */
static inline bool
is_16_bits(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
unsigned src, unsigned num_components,
const uint8_t *swizzle)
{
/* only constant srcs: */
if (!nir_src_is_const(instr->src[src].src))
return false;
for (unsigned i = 0; i < num_components; i++) {
const int64_t val =
nir_src_comp_as_int(instr->src[src].src, swizzle[i]);
if (val > 0xffff || val < -0x8000)
return false;
}
return true;
}
static inline bool
is_not_const(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
unsigned src, UNUSED unsigned num_components,

View File

@@ -983,6 +983,30 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
}
break;
case BRW_OPCODE_ADD3:
/* add3 can have a single imm16 source. Proceed if the source type is
* already W or UW or the value can be coerced to one of those types.
*/
if (val.type == BRW_REGISTER_TYPE_W || val.type == BRW_REGISTER_TYPE_UW)
; /* Nothing to do. */
else if (val.ud <= 0xffff)
val = brw_imm_uw(val.ud);
else if (val.d >= -0x8000 && val.d <= 0x7fff)
val = brw_imm_w(val.d);
else
break;
if (i == 2) {
inst->src[i] = val;
progress = true;
} else if (inst->src[2].file != IMM) {
inst->src[i] = inst->src[2];
inst->src[2] = val;
progress = true;
}
break;
case BRW_OPCODE_CMP:
case BRW_OPCODE_IF:
if (i == 1) {
@@ -1088,6 +1112,15 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
}
}
/* ADD3 can only have the immediate as src0. */
if (progress && inst->opcode == BRW_OPCODE_ADD3) {
if (inst->src[2].file == IMM) {
const auto src0 = inst->src[0];
inst->src[0] = inst->src[2];
inst->src[2] = src0;
}
}
return progress;
}