nir/lower_subgroups: add lower_shuffle_to_swizzle_amd

masked_swizzle_amd can be much faster than shuffle.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5695>
This commit is contained in:
Rhys Perry
2020-06-23 17:37:37 +01:00
committed by Marge Bot
parent 9c317cb278
commit 7ba645d5cb
2 changed files with 38 additions and 0 deletions

View File

@@ -4153,6 +4153,7 @@ typedef struct nir_lower_subgroups_options {
bool lower_subgroup_masks:1;
bool lower_shuffle:1;
bool lower_shuffle_to_32bit:1;
bool lower_shuffle_to_swizzle_amd:1;
bool lower_quad:1;
bool lower_quad_broadcast_dynamic:1;
bool lower_quad_broadcast_dynamic_to_const:1;

View File

@@ -222,10 +222,47 @@ lower_vote_eq_to_ballot(nir_builder *b, nir_intrinsic_instr *intrin,
nir_imm_intN_t(b, 0, options->ballot_bit_size));
}
static nir_ssa_def *
lower_shuffle_to_swizzle(nir_builder *b, nir_intrinsic_instr *intrin,
const nir_lower_subgroups_options *options)
{
unsigned mask = nir_src_as_uint(intrin->src[1]);
if (mask >= 32)
return NULL;
nir_intrinsic_instr *swizzle = nir_intrinsic_instr_create(
b->shader, nir_intrinsic_masked_swizzle_amd);
swizzle->num_components = intrin->num_components;
nir_src_copy(&swizzle->src[0], &intrin->src[0], swizzle);
nir_intrinsic_set_swizzle_mask(swizzle, (mask << 10) | 0x1f);
nir_ssa_dest_init(&swizzle->instr, &swizzle->dest,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, NULL);
if (options->lower_to_scalar && swizzle->num_components > 1) {
return lower_subgroup_op_to_scalar(b, swizzle, options->lower_shuffle_to_32bit);
} else if (options->lower_shuffle_to_32bit && swizzle->src[0].ssa->bit_size == 64) {
return lower_subgroup_op_to_32bit(b, swizzle);
} else {
nir_builder_instr_insert(b, &swizzle->instr);
return &swizzle->dest.ssa;
}
}
static nir_ssa_def *
lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
const nir_lower_subgroups_options *options)
{
if (intrin->intrinsic == nir_intrinsic_shuffle_xor &&
options->lower_shuffle_to_swizzle_amd &&
nir_src_is_const(intrin->src[1])) {
nir_ssa_def *result =
lower_shuffle_to_swizzle(b, intrin, options);
if (result)
return result;
}
nir_ssa_def *index = nir_load_subgroup_invocation(b);
bool is_shuffle = false;
switch (intrin->intrinsic) {