ir3: some 8-bit subgroup intrinsics must execute as 16-bit instructions
ir3 8-bit quad-broadcast, quad-swap, scan and reduce instructions only work correctly when done in 16-bit space. A nir_lower_bit_size pass is used to upcast the source value and then downcast the result back to 8 bits. Signed-off-by: Zan Dobersek <zdobersek@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29875>
This commit is contained in:
@@ -158,6 +158,29 @@ ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
||||
return true;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
ir3_lower_bit_size(const nir_instr *instr, UNUSED void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return 0;
|
||||
|
||||
nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
|
||||
switch (intrinsic->intrinsic) {
|
||||
case nir_intrinsic_exclusive_scan:
|
||||
case nir_intrinsic_inclusive_scan:
|
||||
case nir_intrinsic_quad_broadcast:
|
||||
case nir_intrinsic_quad_swap_diagonal:
|
||||
case nir_intrinsic_quad_swap_horizontal:
|
||||
case nir_intrinsic_quad_swap_vertical:
|
||||
case nir_intrinsic_reduce:
|
||||
return intrinsic->def.bit_size == 8 ? 16 : 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define OPT(nir, pass, ...) \
|
||||
({ \
|
||||
bool this_progress = false; \
|
||||
@@ -221,6 +244,7 @@ ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s)
|
||||
progress |= OPT(s, nir_opt_algebraic);
|
||||
progress |= OPT(s, nir_lower_alu);
|
||||
progress |= OPT(s, nir_lower_pack);
|
||||
progress |= OPT(s, nir_lower_bit_size, ir3_lower_bit_size, NULL);
|
||||
progress |= OPT(s, nir_opt_constant_folding);
|
||||
|
||||
const nir_opt_offsets_options offset_options = {
|
||||
|
Reference in New Issue
Block a user