broadcom/compiler: be more careful with unifa in non-uniform control flow
If the lane from which the hardware writes the unifa address
is disabled, then we may end up with a bogus address and invalid
memory accesses from follow-up ldunifa.
Instead of always disabling unifa loads in non-uniform control
flow we can try to see if the address is prouced from a nir
register (which is the only case where we do conditional writes
under non-uniform control flow in ntq_store_def), and only
disable it in that case.
When enabling subgroups for graphics pipelines, this fixes a
GMP violation in the simulator with the following test
(which has non-uniform control flow writing unifa with lane 0
disabled, which is the lane from which the unifa takes the
address):
dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcastfirst_int
Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27211>
(cherry picked from commit 5b269814fc
)
This commit is contained in:

committed by
Eric Engestrom

parent
7b21191144
commit
c5fd24a1d4
@@ -254,7 +254,7 @@
|
||||
"description": "broadcom/compiler: be more careful with unifa in non-uniform control flow",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
@@ -3047,6 +3047,46 @@ emit_ldunifa(struct v3d_compile *c, struct qreg *result)
|
||||
c->current_unifa_offset += 4;
|
||||
}
|
||||
|
||||
/* Checks if the value of a nir src is derived from a nir register */
|
||||
static bool
|
||||
nir_src_derived_from_reg(nir_src src)
|
||||
{
|
||||
nir_def *def = src.ssa;
|
||||
if (nir_load_reg_for_def(def))
|
||||
return true;
|
||||
|
||||
nir_instr *parent = def->parent_instr;
|
||||
switch (parent->type) {
|
||||
case nir_instr_type_alu: {
|
||||
nir_alu_instr *alu = nir_instr_as_alu(parent);
|
||||
int num_srcs = nir_op_infos[alu->op].num_inputs;
|
||||
for (int i = 0; i < num_srcs; i++) {
|
||||
if (nir_src_derived_from_reg(alu->src[i].src))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent);
|
||||
int num_srcs = nir_intrinsic_infos[intr->intrinsic].num_srcs;
|
||||
for (int i = 0; i < num_srcs; i++) {
|
||||
if (nir_src_derived_from_reg(intr->src[i]))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
case nir_instr_type_load_const:
|
||||
case nir_instr_type_undef:
|
||||
return false;
|
||||
default:
|
||||
/* By default we assume it may come from a register, the above
|
||||
* cases should be able to handle the majority of situations
|
||||
* though.
|
||||
*/
|
||||
return true;
|
||||
};
|
||||
}
|
||||
|
||||
static bool
|
||||
ntq_emit_load_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
||||
{
|
||||
@@ -3069,6 +3109,24 @@ ntq_emit_load_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
||||
if (nir_src_is_divergent(offset))
|
||||
return false;
|
||||
|
||||
/* Emitting loads from unifa may not be safe under non-uniform control
|
||||
* flow. It seems the address that is used to write to the unifa
|
||||
* register is taken from the first lane and if that lane is disabled
|
||||
* by control flow then the value we read may be bogus and lead to
|
||||
* invalid memory accesses on follow-up ldunifa instructions. However,
|
||||
* ntq_store_def only emits conditional writes for nir registersas long
|
||||
* we can be certain that the offset isn't derived from a load_reg we
|
||||
* should be fine.
|
||||
*
|
||||
* The following CTS test can be used to trigger the problem, which
|
||||
* causes a GMP violations in the sim without this check:
|
||||
* dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcastfirst_int
|
||||
*/
|
||||
if (vir_in_nonuniform_control_flow(c) &&
|
||||
nir_src_derived_from_reg(offset)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* We can only use unifa with SSBOs if they are read-only. Otherwise
|
||||
* ldunifa won't see the shader writes to that address (possibly
|
||||
* because ldunifa doesn't read from the L2T cache).
|
||||
|
Reference in New Issue
Block a user