nir/algebraic: Add nir_lower_int64_options::nir_lower_iadd3_64

This allows us to not generate 64-bit iadd3 on Intel but continue
generating it for NVIDIA.

No shader-db or fossil-db changes.

v2: Add nir_lower_iadd3_64 flag so we can continue to generate 64-bit
iadd3 on NVIDIA platforms.

v3: s/bit_size == 64/s == 64/. This cut-and-paste bug prevented any of
the optimizations from ever occuring.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29148>
This commit is contained in:
Ian Romanick
2023-08-14 12:58:51 -07:00
parent fdc483df25
commit 22095c60bc
4 changed files with 22 additions and 7 deletions

View File

@@ -3602,6 +3602,7 @@ typedef enum {
nir_lower_find_lsb64 = (1 << 22),
nir_lower_conv64 = (1 << 23),
nir_lower_uadd_sat64 = (1 << 24),
nir_lower_iadd3_64 = (1 << 25),
} nir_lower_int64_options;
typedef enum {

View File

@@ -3081,15 +3081,27 @@ late_optimizations.extend([
(('iadd', a, ('ineg', 'b')), ('isub', 'a', 'b'), 'options->has_isub || options->lower_ineg'),
(('ineg', a), ('isub', 0, a), 'options->lower_ineg'),
(('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'),
])
for s in [8, 16, 32, 64]:
cond = 'options->has_iadd3'
if s == 64:
cond += ' && !(options->lower_int64_options & nir_lower_iadd3_64)'
iadd = "iadd@{}".format(s)
# On Intel GPUs, the constant field for an ADD3 instruction must be either
# int16_t or uint16_t.
(('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'),
(('iadd', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'),
(('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_16_bits)'), ('iadd3', a, b, c), 'options->has_iadd3'),
(('iadd', ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'),
(('iadd', ('ineg', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'),
(('iadd', ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), '#c(is_16_bits)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'),
late_optimizations.extend([
((iadd, ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), cond),
((iadd, ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), cond),
((iadd, ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_16_bits)'), ('iadd3', a, b, c), cond),
((iadd, ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), cond),
((iadd, ('ineg', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), cond),
((iadd, ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), '#c(is_16_bits)'), ('iadd3', ('ineg', a), ('ineg', b), c), cond),
])
late_optimizations.extend([
# fneg_lo / fneg_hi
(('vec2(is_only_used_as_float)', ('fneg@16', a), b), ('fmul', ('vec2', a, b), ('vec2', -1.0, 1.0)), 'options->vectorize_vec2_16bit'),
(('vec2(is_only_used_as_float)', a, ('fneg@16', b)), ('fmul', ('vec2', a, b), ('vec2', 1.0, -1.0)), 'options->vectorize_vec2_16bit'),

View File

@@ -112,7 +112,8 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
nir_lower_imul_high64 |
nir_lower_find_lsb64 |
nir_lower_ufind_msb64 |
nir_lower_bit_count64;
nir_lower_bit_count64 |
nir_lower_iadd3_64;
nir_lower_doubles_options fp64_options =
nir_lower_drcp |
nir_lower_dsqrt |

View File

@@ -1154,6 +1154,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr,
break;
case nir_op_iadd3:
assert(instr->def.bit_size < 64);
bld.ADD3(result, op[0], op[1], op[2]);
break;