From 5b269814fcfcc2a947b8abc0dc4144124e7e59b2 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Tue, 30 Jan 2024 08:06:24 +0100 Subject: [PATCH] broadcom/compiler: be more careful with unifa in non-uniform control flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the lane from which the hardware writes the unifa address is disabled, then we may end up with a bogus address and invalid memory accesses from follow-up ldunifa. Instead of always disabling unifa loads in non-uniform control flow we can try to see if the address is prouced from a nir register (which is the only case where we do conditional writes under non-uniform control flow in ntq_store_def), and only disable it in that case. When enabling subgroups for graphics pipelines, this fixes a GMP violation in the simulator with the following test (which has non-uniform control flow writing unifa with lane 0 disabled, which is the lane from which the unifa takes the address): dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcastfirst_int Reviewed-by: Alejandro PiƱeiro Cc: mesa-stable Part-of: --- src/broadcom/compiler/nir_to_vir.c | 58 ++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 4ebb04d23b3..1038cbff9ae 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -3026,6 +3026,46 @@ emit_ldunifa(struct v3d_compile *c, struct qreg *result) c->current_unifa_offset += 4; } +/* Checks if the value of a nir src is derived from a nir register */ +static bool +nir_src_derived_from_reg(nir_src src) +{ + nir_def *def = src.ssa; + if (nir_load_reg_for_def(def)) + return true; + + nir_instr *parent = def->parent_instr; + switch (parent->type) { + case nir_instr_type_alu: { + nir_alu_instr *alu = nir_instr_as_alu(parent); + int num_srcs = nir_op_infos[alu->op].num_inputs; + for (int i = 0; i < num_srcs; i++) { + if (nir_src_derived_from_reg(alu->src[i].src)) + return true; + } + return false; + } + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent); + int num_srcs = nir_intrinsic_infos[intr->intrinsic].num_srcs; + for (int i = 0; i < num_srcs; i++) { + if (nir_src_derived_from_reg(intr->src[i])) + return true; + } + return false; + } + case nir_instr_type_load_const: + case nir_instr_type_undef: + return false; + default: + /* By default we assume it may come from a register, the above + * cases should be able to handle the majority of situations + * though. + */ + return true; + }; +} + static bool ntq_emit_load_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) { @@ -3048,6 +3088,24 @@ ntq_emit_load_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) if (nir_src_is_divergent(offset)) return false; + /* Emitting loads from unifa may not be safe under non-uniform control + * flow. It seems the address that is used to write to the unifa + * register is taken from the first lane and if that lane is disabled + * by control flow then the value we read may be bogus and lead to + * invalid memory accesses on follow-up ldunifa instructions. However, + * ntq_store_def only emits conditional writes for nir registersas long + * we can be certain that the offset isn't derived from a load_reg we + * should be fine. + * + * The following CTS test can be used to trigger the problem, which + * causes a GMP violations in the sim without this check: + * dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcastfirst_int + */ + if (vir_in_nonuniform_control_flow(c) && + nir_src_derived_from_reg(offset)) { + return false; + } + /* We can only use unifa with SSBOs if they are read-only. Otherwise * ldunifa won't see the shader writes to that address (possibly * because ldunifa doesn't read from the L2T cache).