broadcom/compiler: support subgroup reduction operations from fragment shaders

In fragment shaders these instructions consider a lane active when
any lane in the same quad is active, which is not what we want, so
we need to include the current sample mask in the condition mask
used with these instructions to limit lane selection to those that
are really active.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27211>
This commit is contained in:
Iago Toral Quiroga
2024-01-30 08:35:37 +01:00
committed by Marge Bot
parent 3544113ee2
commit e5bfce6f46

View File

@@ -3280,6 +3280,49 @@ emit_load_local_invocation_index(struct v3d_compile *c)
vir_uniform_ui(c, 32 - c->local_invocation_index_bits));
}
/* For the purposes of reduction operations (ballot, alleq, allfeq, bcastf) in
* fragment shaders a lane is considered active if any sample flags are set
* for *any* lane in the same quad. This is not what we want. To fix this we
* can emit MSF to get the active lanes and produce a condition that we
* can then use with these operations to limit execution only to lanes that
* are really active in each quad. Further, we also need to disable lanes
* that may be disabled because of non-uniform control flow.
*/
static enum v3d_qpu_cond
setup_subgroup_reduction_condition(struct v3d_compile *c)
{
assert(c->s->info.stage == MESA_SHADER_FRAGMENT ||
c->s->info.stage == MESA_SHADER_COMPUTE);
enum v3d_qpu_cond cond = V3D_QPU_COND_NONE;
/* Produce condition for 'lane is active' from current sample flags.
* Only required for fragment shaders.
*/
if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), vir_MSF(c)),
V3D_QPU_PF_PUSHZ);
cond = V3D_QPU_COND_IFNA;
}
/* If we are in non-uniform control-flow update the condition to
* also limit lanes to those in the current execution mask.
*/
if (vir_in_nonuniform_control_flow(c)) {
if (cond == V3D_QPU_COND_IFNA) {
vir_set_uf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_UF_NORNZ);
} else {
assert(cond == V3D_QPU_COND_NONE);
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_PF_PUSHZ);
}
cond = V3D_QPU_COND_IFA;
}
return cond;
}
static void
emit_compute_barrier(struct v3d_compile *c)
{
@@ -3840,21 +3883,9 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_ballot: {
assert(c->devinfo->ver >= 71);
struct qreg value = ntq_get_src(c, instr->src[0], 0);
enum v3d_qpu_cond cond = setup_subgroup_reduction_condition(c);
struct qreg res = vir_get_temp(c);
if (vir_in_nonuniform_control_flow(c)) {
/* Ballot uses the MSF mask and the condition mask to
* identify active lanes. Particularly, it uses the
* condition mask to filter out lanes disabled by
* control flow.
*/
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_PF_PUSHZ);
vir_set_cond(vir_BALLOT_dest(c, res, value),
V3D_QPU_COND_IFA);
} else {
vir_BALLOT_dest(c, res, value);
}
vir_set_cond(vir_BALLOT_dest(c, res, value), cond);
ntq_store_def(c, &instr->def, 0, vir_MOV(c, res));
break;
}
@@ -3871,21 +3902,9 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_read_first_invocation: {
assert(c->devinfo->ver >= 71);
struct qreg value = ntq_get_src(c, instr->src[0], 0);
enum v3d_qpu_cond cond = setup_subgroup_reduction_condition(c);
struct qreg res = vir_get_temp(c);
if (vir_in_nonuniform_control_flow(c)) {
/* Bcastf uses the MSF mask and the condition mask to
* identify active lanes. Particularly, it uses the
* condition mask to filter out lanes disabled by
* control flow.
*/
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_PF_PUSHZ);
vir_set_cond(vir_BCASTF_dest(c, res, value),
V3D_QPU_COND_IFA);
} else {
vir_BCASTF_dest(c, res, value);
}
vir_set_cond(vir_BCASTF_dest(c, res, value), cond);
ntq_store_def(c, &instr->def, 0, vir_MOV(c, res));
break;
}
@@ -3903,25 +3922,12 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_vote_ieq: {
assert(c->devinfo->ver >= 71);
struct qreg value = ntq_get_src(c, instr->src[0], 0);
enum v3d_qpu_cond cond = setup_subgroup_reduction_condition(c);
struct qreg res = vir_get_temp(c);
if (vir_in_nonuniform_control_flow(c)) {
/* Alleq uses the MSF mask and the condition mask to
* identify active lanes. Particularly, it uses the
* condition mask to filter out lanes disabled by
* control flow.
*/
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_PF_PUSHZ);
vir_set_cond(instr->intrinsic == nir_intrinsic_vote_ieq ?
vir_ALLEQ_dest(c, res, value) :
vir_ALLFEQ_dest(c, res, value),
V3D_QPU_COND_IFA);
} else {
if (instr->intrinsic == nir_intrinsic_vote_ieq)
vir_ALLEQ_dest(c, res, value);
else
vir_ALLFEQ_dest(c, res, value);
}
vir_set_cond(instr->intrinsic == nir_intrinsic_vote_ieq ?
vir_ALLEQ_dest(c, res, value) :
vir_ALLFEQ_dest(c, res, value),
cond);
/* Produce boolean result */
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), res),
@@ -3934,20 +3940,9 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_vote_all: {
assert(c->devinfo->ver >= 71);
struct qreg value = ntq_get_src(c, instr->src[0], 0);
enum v3d_qpu_cond cond = setup_subgroup_reduction_condition(c);
struct qreg res = vir_get_temp(c);
if (vir_in_nonuniform_control_flow(c)) {
/* Alleq uses the MSF mask and the condition mask to
* identify active lanes. Particularly, it uses the
* condition mask to filter out lanes disabled by
* control flow.
*/
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_PF_PUSHZ);
vir_set_cond(vir_ALLEQ_dest(c, res, value),
V3D_QPU_COND_IFA);
} else {
vir_ALLEQ_dest(c, res, value);
}
vir_set_cond(vir_ALLEQ_dest(c, res, value), cond);
/* We want to check if 'all lanes are equal (alleq != 0) and
* their value is True (value != 0)'.
@@ -3969,20 +3964,9 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_vote_any: {
assert(c->devinfo->ver >= 71);
struct qreg value = ntq_get_src(c, instr->src[0], 0);
enum v3d_qpu_cond cond = setup_subgroup_reduction_condition(c);
struct qreg res = vir_get_temp(c);
if (vir_in_nonuniform_control_flow(c)) {
/* Alleq uses the MSF mask and the condition mask to
* identify active lanes. Particularly, it uses the
* condition mask to filter out lanes disabled by
* control flow.
*/
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), c->execute),
V3D_QPU_PF_PUSHZ);
vir_set_cond(vir_ALLEQ_dest(c, res, value),
V3D_QPU_COND_IFA);
} else {
vir_ALLEQ_dest(c, res, value);
}
vir_set_cond(vir_ALLEQ_dest(c, res, value), cond);
/* We want to check 'not (all lanes are equal (alleq != 0)'
* and their value is False (value == 0))'.