broadcom/compiler: be more careful with unifa in non-uniform control flow
If the lane from which the hardware writes the unifa address is disabled, then we may end up with a bogus address and invalid memory accesses from follow-up ldunifa. Instead of always disabling unifa loads in non-uniform control flow we can try to see if the address is prouced from a nir register (which is the only case where we do conditional writes under non-uniform control flow in ntq_store_def), and only disable it in that case. When enabling subgroups for graphics pipelines, this fixes a GMP violation in the simulator with the following test (which has non-uniform control flow writing unifa with lane 0 disabled, which is the lane from which the unifa takes the address): dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcastfirst_int Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27211>
This commit is contained in:

committed by
Marge Bot

parent
31e8740808
commit
5b269814fc
@@ -3026,6 +3026,46 @@ emit_ldunifa(struct v3d_compile *c, struct qreg *result)
|
|||||||
c->current_unifa_offset += 4;
|
c->current_unifa_offset += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Checks if the value of a nir src is derived from a nir register */
|
||||||
|
static bool
|
||||||
|
nir_src_derived_from_reg(nir_src src)
|
||||||
|
{
|
||||||
|
nir_def *def = src.ssa;
|
||||||
|
if (nir_load_reg_for_def(def))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
nir_instr *parent = def->parent_instr;
|
||||||
|
switch (parent->type) {
|
||||||
|
case nir_instr_type_alu: {
|
||||||
|
nir_alu_instr *alu = nir_instr_as_alu(parent);
|
||||||
|
int num_srcs = nir_op_infos[alu->op].num_inputs;
|
||||||
|
for (int i = 0; i < num_srcs; i++) {
|
||||||
|
if (nir_src_derived_from_reg(alu->src[i].src))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
case nir_instr_type_intrinsic: {
|
||||||
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
|
||||||
|
int num_srcs = nir_intrinsic_infos[intr->intrinsic].num_srcs;
|
||||||
|
for (int i = 0; i < num_srcs; i++) {
|
||||||
|
if (nir_src_derived_from_reg(intr->src[i]))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
case nir_instr_type_load_const:
|
||||||
|
case nir_instr_type_undef:
|
||||||
|
return false;
|
||||||
|
default:
|
||||||
|
/* By default we assume it may come from a register, the above
|
||||||
|
* cases should be able to handle the majority of situations
|
||||||
|
* though.
|
||||||
|
*/
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
ntq_emit_load_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
ntq_emit_load_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
||||||
{
|
{
|
||||||
@@ -3048,6 +3088,24 @@ ntq_emit_load_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||||||
if (nir_src_is_divergent(offset))
|
if (nir_src_is_divergent(offset))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/* Emitting loads from unifa may not be safe under non-uniform control
|
||||||
|
* flow. It seems the address that is used to write to the unifa
|
||||||
|
* register is taken from the first lane and if that lane is disabled
|
||||||
|
* by control flow then the value we read may be bogus and lead to
|
||||||
|
* invalid memory accesses on follow-up ldunifa instructions. However,
|
||||||
|
* ntq_store_def only emits conditional writes for nir registersas long
|
||||||
|
* we can be certain that the offset isn't derived from a load_reg we
|
||||||
|
* should be fine.
|
||||||
|
*
|
||||||
|
* The following CTS test can be used to trigger the problem, which
|
||||||
|
* causes a GMP violations in the sim without this check:
|
||||||
|
* dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcastfirst_int
|
||||||
|
*/
|
||||||
|
if (vir_in_nonuniform_control_flow(c) &&
|
||||||
|
nir_src_derived_from_reg(offset)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/* We can only use unifa with SSBOs if they are read-only. Otherwise
|
/* We can only use unifa with SSBOs if they are read-only. Otherwise
|
||||||
* ldunifa won't see the shader writes to that address (possibly
|
* ldunifa won't see the shader writes to that address (possibly
|
||||||
* because ldunifa doesn't read from the L2T cache).
|
* because ldunifa doesn't read from the L2T cache).
|
||||||
|
Reference in New Issue
Block a user