ir3: some 8-bit subgroup intrinsics must execute as 16-bit instructions

ir3 8-bit quad-broadcast, quad-swap, scan and reduce instructions only
work correctly when done in 16-bit space. A nir_lower_bit_size pass is
used to upcast the source value and then downcast the result back to 8
bits.

Signed-off-by: Zan Dobersek <zdobersek@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29875>
This commit is contained in:
Zan Dobersek
2024-07-14 08:36:33 +02:00
committed by Marge Bot
parent 8b7beca572
commit f8602612ed

View File

@@ -158,6 +158,29 @@ ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
return true;
}
static unsigned
ir3_lower_bit_size(const nir_instr *instr, UNUSED void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return 0;
nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
switch (intrinsic->intrinsic) {
case nir_intrinsic_exclusive_scan:
case nir_intrinsic_inclusive_scan:
case nir_intrinsic_quad_broadcast:
case nir_intrinsic_quad_swap_diagonal:
case nir_intrinsic_quad_swap_horizontal:
case nir_intrinsic_quad_swap_vertical:
case nir_intrinsic_reduce:
return intrinsic->def.bit_size == 8 ? 16 : 0;
default:
break;
}
return 0;
}
#define OPT(nir, pass, ...) \
({ \
bool this_progress = false; \
@@ -221,6 +244,7 @@ ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s)
progress |= OPT(s, nir_opt_algebraic);
progress |= OPT(s, nir_lower_alu);
progress |= OPT(s, nir_lower_pack);
progress |= OPT(s, nir_lower_bit_size, ir3_lower_bit_size, NULL);
progress |= OPT(s, nir_opt_constant_folding);
const nir_opt_offsets_options offset_options = {