From 7561ea8fa17f28072c75ccf08bb8a56c610756ab Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Wed, 2 Feb 2022 11:23:11 +0100 Subject: [PATCH] broadcom/compiler: allow ldunifa with read-only SSBOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Alejandro PiƱeiro Part-of: --- src/broadcom/compiler/nir_to_vir.c | 32 +++++++++++++++++++++------- src/broadcom/compiler/v3d_compiler.h | 1 + 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 9149b304c7b..ed2333d1365 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -3003,8 +3003,11 @@ emit_ldunifa(struct v3d_compile *c, struct qreg *result) } static bool -ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) +ntq_emit_load_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) { + assert(instr->intrinsic == nir_intrinsic_load_ubo || + instr->intrinsic == nir_intrinsic_load_ssbo); + /* Every ldunifa auto-increments the unifa address by 4 bytes, so our * current unifa offset is 4 bytes ahead of the offset of the last load. */ @@ -3015,6 +3018,11 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) if (nir_src_is_divergent(instr->src[1])) return false; + /* We can only use unifa with SSBOs if they are read-only */ + bool is_ubo = instr->intrinsic == nir_intrinsic_load_ubo; + if (!is_ubo && !(nir_intrinsic_access(instr) & ACCESS_NON_WRITEABLE)) + return false; + /* ldunifa is a 32-bit load instruction so we can only use it with * 32-bit aligned addresses. We always produce 32-bit aligned addresses * except for types smaller than 32-bit, so in these cases we can only @@ -3046,11 +3054,11 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) * shifted up by 1 (0 is gallium's constant buffer 0). */ uint32_t index = nir_src_as_uint(instr->src[0]); - if (c->key->environment == V3D_ENVIRONMENT_OPENGL) + if (is_ubo && c->key->environment == V3D_ENVIRONMENT_OPENGL) index++; /* We can only keep track of the last unifa address we used with - * constant offset loads. If the new load targets the same UBO and + * constant offset loads. If the new load targets the same buffer and * is close enough to the previous load, we can skip the unifa register * write by emitting dummy ldunifa instructions to update the unifa * address. @@ -3060,6 +3068,7 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) if (dynamic_src) { c->current_unifa_block = NULL; } else if (c->cur_block == c->current_unifa_block && + c->current_unifa_is_ubo == is_ubo && c->current_unifa_index == index && c->current_unifa_offset <= const_offset && c->current_unifa_offset + max_unifa_skip_dist >= const_offset) { @@ -3067,18 +3076,25 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) ldunifa_skips = (const_offset - c->current_unifa_offset) / 4; } else { c->current_unifa_block = c->cur_block; + c->current_unifa_is_ubo = is_ubo; c->current_unifa_index = index; c->current_unifa_offset = const_offset; } if (!skip_unifa) { - struct qreg base_offset = + struct qreg base_offset = is_ubo ? vir_uniform(c, QUNIFORM_UBO_ADDR, - v3d_unit_data_create(index, const_offset)); + v3d_unit_data_create(index, const_offset)) : + vir_uniform(c, QUNIFORM_SSBO_OFFSET, index); struct qreg unifa = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA); if (!dynamic_src) { - vir_MOV_dest(c, unifa, base_offset); + if (is_ubo) { + vir_MOV_dest(c, unifa, base_offset); + } else { + vir_ADD_dest(c, unifa, base_offset, + vir_uniform_ui(c, const_offset)); + } } else { vir_ADD_dest(c, unifa, base_offset, ntq_get_src(c, instr->src[1], 0)); @@ -3185,7 +3201,8 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) break; case nir_intrinsic_load_ubo: - if (!ntq_emit_load_ubo_unifa(c, instr)) + case nir_intrinsic_load_ssbo: + if (!ntq_emit_load_unifa(c, instr)) ntq_emit_tmu_general(c, instr, false); break; @@ -3199,7 +3216,6 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) case nir_intrinsic_ssbo_atomic_xor: case nir_intrinsic_ssbo_atomic_exchange: case nir_intrinsic_ssbo_atomic_comp_swap: - case nir_intrinsic_load_ssbo: case nir_intrinsic_store_ssbo: ntq_emit_tmu_general(c, instr, false); break; diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 3cf6fdc14e8..e42ea184c45 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -720,6 +720,7 @@ struct v3d_compile { struct qblock *current_unifa_block; int32_t current_unifa_index; uint32_t current_unifa_offset; + bool current_unifa_is_ubo; /* State for whether we're executing on each channel currently. 0 if * yes, otherwise a block number + 1 that the channel jumped to.