broadcom/compiler: fix incorrect flags update for subgroup elect

c->execute is 0 (not the block index) for lanes currently active
under non-uniform control flow.

Also this simplifies a bit the instructions we emit for flag
generation, both for uniform and non-uniform control flow.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27211>
(cherry picked from commit 7bdc8898b1)
This commit is contained in:
Iago Toral Quiroga
2024-01-24 09:34:41 +01:00
committed by Eric Engestrom
parent 7c9be70f96
commit 7b21191144
2 changed files with 17 additions and 32 deletions

View File

@@ -334,7 +334,7 @@
"description": "broadcom/compiler: fix incorrect flags update for subgroup elect",
"nominated": true,
"nomination_type": 0,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View File

@@ -3243,34 +3243,6 @@ emit_load_local_invocation_index(struct v3d_compile *c)
vir_uniform_ui(c, 32 - c->local_invocation_index_bits));
}
/* Various subgroup operations rely on the A flags, so this helper ensures that
* A flags represents currently active lanes in the subgroup.
*/
static void
set_a_flags_for_subgroup(struct v3d_compile *c)
{
/* MSF returns 0 for disabled lanes in compute shaders so
* PUSHZ will set A=1 for disabled lanes. We want the inverse
* of this but we don't have any means to negate the A flags
* directly, but we can do it by repeating the same operation
* with NORZ (A = ~A & ~Z).
*/
assert(c->s->info.stage == MESA_SHADER_COMPUTE);
vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ);
vir_set_uf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_UF_NORZ);
/* If we are under non-uniform control flow we also need to
* AND the A flags with the current execute mask.
*/
if (vir_in_nonuniform_control_flow(c)) {
const uint32_t bidx = c->cur_block->index;
vir_set_uf(c, vir_XOR_dest(c, vir_nop_reg(),
c->execute,
vir_uniform_ui(c, bidx)),
V3D_QPU_UF_ANDZ);
}
}
static void
ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
{
@@ -3752,10 +3724,23 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_elect: {
set_a_flags_for_subgroup(c);
struct qreg first = vir_FLAFIRST(c);
struct qreg first;
if (vir_in_nonuniform_control_flow(c)) {
/* Sets A=1 for lanes enabled in the execution mask */
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_PF_PUSHZ);
/* Updates A ANDing with lanes enabled in MSF */
vir_set_uf(c, vir_MSF_dest(c, vir_nop_reg()),
V3D_QPU_UF_ANDNZ);
first = vir_FLAFIRST(c);
} else {
/* Sets A=1 for inactive lanes */
vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()),
V3D_QPU_PF_PUSHZ);
first = vir_FLNAFIRST(c);
}
/* Produce a boolean result from Flafirst */
/* Produce a boolean result */
vir_set_pf(c, vir_XOR_dest(c, vir_nop_reg(),
first, vir_uniform_ui(c, 1)),
V3D_QPU_PF_PUSHZ);