intel/fs: Add constant propagation for ADD3
v2: Require that the constant value be representable as either uint16_t or int16_t. Suggested by Matt. v3: Remove redundant patterns. Noticed by Matt. shader-db: DG2 total instructions in shared programs: 23103767 -> 23103577 (<.01%) instructions in affected programs: 51822 -> 51632 (-0.37%) helped: 98 / HURT: 15 total cycles in shared programs: 842347714 -> 842380017 (<.01%) cycles in affected programs: 1942595 -> 1974898 (1.66%) helped: 97 / HURT: 32 Nearly all of the affected shaders (around 9,900) are shaders in Cyberpunk 2077. It's about an even split between vertex and fragment shaders. The majority of the remaining affected shaders (3,600) are from Strange Brigade. This was also a nearly even split between fragment and vertex. All but two of the lost shaders are SIMD32 fragment shaders in Cyberpunk 2077. The other two are SIMD32 fragment shaders in Dota2. fossil-db: DG2 Instructions in all programs: 196379107 -> 196248608 (-0.1%) helped: 13467 / HURT: 1210 Cycles in all programs: 13931355281 -> 13929955971 (-0.0%) helped: 11801 / HURT: 2922 Lost: 90 Reviewed-by: Matt Turner <mattst88@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23262>
This commit is contained in:
@@ -2861,9 +2861,14 @@ late_optimizations.extend([
|
||||
(('iadd', a, ('ineg', 'b')), ('isub', 'a', 'b'), 'options->has_isub || options->lower_ineg'),
|
||||
(('ineg', a), ('isub', 0, a), 'options->lower_ineg'),
|
||||
(('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'),
|
||||
|
||||
# On Intel GPUs, the constant field for an ADD3 instruction must be either
|
||||
# int16_t or uint16_t.
|
||||
(('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'),
|
||||
(('iadd', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'),
|
||||
(('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_16_bits)'), ('iadd3', a, b, c), 'options->has_iadd3'),
|
||||
(('iadd', ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'),
|
||||
(('iadd', ('ineg', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'),
|
||||
(('iadd', ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), '#c(is_16_bits)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'),
|
||||
|
||||
# fneg_lo / fneg_hi
|
||||
(('vec2(is_only_used_as_float)', ('fneg@16', a), b), ('fmul', ('vec2', a, b), ('vec2', -1.0, 1.0)), 'options->vectorize_vec2_16bit'),
|
||||
|
@@ -284,6 +284,27 @@ is_first_5_bits_uge_2(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Is this a constant that could be either int16_t or uint16_t? */
|
||||
static inline bool
|
||||
is_16_bits(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
|
||||
unsigned src, unsigned num_components,
|
||||
const uint8_t *swizzle)
|
||||
{
|
||||
/* only constant srcs: */
|
||||
if (!nir_src_is_const(instr->src[src].src))
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
const int64_t val =
|
||||
nir_src_comp_as_int(instr->src[src].src, swizzle[i]);
|
||||
|
||||
if (val > 0xffff || val < -0x8000)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_not_const(UNUSED struct hash_table *ht, const nir_alu_instr *instr,
|
||||
unsigned src, UNUSED unsigned num_components,
|
||||
|
@@ -983,6 +983,30 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ADD3:
|
||||
/* add3 can have a single imm16 source. Proceed if the source type is
|
||||
* already W or UW or the value can be coerced to one of those types.
|
||||
*/
|
||||
if (val.type == BRW_REGISTER_TYPE_W || val.type == BRW_REGISTER_TYPE_UW)
|
||||
; /* Nothing to do. */
|
||||
else if (val.ud <= 0xffff)
|
||||
val = brw_imm_uw(val.ud);
|
||||
else if (val.d >= -0x8000 && val.d <= 0x7fff)
|
||||
val = brw_imm_w(val.d);
|
||||
else
|
||||
break;
|
||||
|
||||
if (i == 2) {
|
||||
inst->src[i] = val;
|
||||
progress = true;
|
||||
} else if (inst->src[2].file != IMM) {
|
||||
inst->src[i] = inst->src[2];
|
||||
inst->src[2] = val;
|
||||
progress = true;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_CMP:
|
||||
case BRW_OPCODE_IF:
|
||||
if (i == 1) {
|
||||
@@ -1088,6 +1112,15 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
||||
}
|
||||
}
|
||||
|
||||
/* ADD3 can only have the immediate as src0. */
|
||||
if (progress && inst->opcode == BRW_OPCODE_ADD3) {
|
||||
if (inst->src[2].file == IMM) {
|
||||
const auto src0 = inst->src[0];
|
||||
inst->src[0] = inst->src[2];
|
||||
inst->src[2] = src0;
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user