broadcom/compiler: allow ldunifa with read-only SSBOs

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14830>
This commit is contained in:
Iago Toral Quiroga
2022-02-02 11:23:11 +01:00
committed by Marge Bot
parent 0a8449b07c
commit 7561ea8fa1
2 changed files with 25 additions and 8 deletions

View File

@@ -3003,8 +3003,11 @@ emit_ldunifa(struct v3d_compile *c, struct qreg *result)
} }
static bool static bool
ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr) ntq_emit_load_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
{ {
assert(instr->intrinsic == nir_intrinsic_load_ubo ||
instr->intrinsic == nir_intrinsic_load_ssbo);
/* Every ldunifa auto-increments the unifa address by 4 bytes, so our /* Every ldunifa auto-increments the unifa address by 4 bytes, so our
* current unifa offset is 4 bytes ahead of the offset of the last load. * current unifa offset is 4 bytes ahead of the offset of the last load.
*/ */
@@ -3015,6 +3018,11 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
if (nir_src_is_divergent(instr->src[1])) if (nir_src_is_divergent(instr->src[1]))
return false; return false;
/* We can only use unifa with SSBOs if they are read-only */
bool is_ubo = instr->intrinsic == nir_intrinsic_load_ubo;
if (!is_ubo && !(nir_intrinsic_access(instr) & ACCESS_NON_WRITEABLE))
return false;
/* ldunifa is a 32-bit load instruction so we can only use it with /* ldunifa is a 32-bit load instruction so we can only use it with
* 32-bit aligned addresses. We always produce 32-bit aligned addresses * 32-bit aligned addresses. We always produce 32-bit aligned addresses
* except for types smaller than 32-bit, so in these cases we can only * except for types smaller than 32-bit, so in these cases we can only
@@ -3046,11 +3054,11 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
* shifted up by 1 (0 is gallium's constant buffer 0). * shifted up by 1 (0 is gallium's constant buffer 0).
*/ */
uint32_t index = nir_src_as_uint(instr->src[0]); uint32_t index = nir_src_as_uint(instr->src[0]);
if (c->key->environment == V3D_ENVIRONMENT_OPENGL) if (is_ubo && c->key->environment == V3D_ENVIRONMENT_OPENGL)
index++; index++;
/* We can only keep track of the last unifa address we used with /* We can only keep track of the last unifa address we used with
* constant offset loads. If the new load targets the same UBO and * constant offset loads. If the new load targets the same buffer and
* is close enough to the previous load, we can skip the unifa register * is close enough to the previous load, we can skip the unifa register
* write by emitting dummy ldunifa instructions to update the unifa * write by emitting dummy ldunifa instructions to update the unifa
* address. * address.
@@ -3060,6 +3068,7 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
if (dynamic_src) { if (dynamic_src) {
c->current_unifa_block = NULL; c->current_unifa_block = NULL;
} else if (c->cur_block == c->current_unifa_block && } else if (c->cur_block == c->current_unifa_block &&
c->current_unifa_is_ubo == is_ubo &&
c->current_unifa_index == index && c->current_unifa_index == index &&
c->current_unifa_offset <= const_offset && c->current_unifa_offset <= const_offset &&
c->current_unifa_offset + max_unifa_skip_dist >= const_offset) { c->current_unifa_offset + max_unifa_skip_dist >= const_offset) {
@@ -3067,18 +3076,25 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
ldunifa_skips = (const_offset - c->current_unifa_offset) / 4; ldunifa_skips = (const_offset - c->current_unifa_offset) / 4;
} else { } else {
c->current_unifa_block = c->cur_block; c->current_unifa_block = c->cur_block;
c->current_unifa_is_ubo = is_ubo;
c->current_unifa_index = index; c->current_unifa_index = index;
c->current_unifa_offset = const_offset; c->current_unifa_offset = const_offset;
} }
if (!skip_unifa) { if (!skip_unifa) {
struct qreg base_offset = struct qreg base_offset = is_ubo ?
vir_uniform(c, QUNIFORM_UBO_ADDR, vir_uniform(c, QUNIFORM_UBO_ADDR,
v3d_unit_data_create(index, const_offset)); v3d_unit_data_create(index, const_offset)) :
vir_uniform(c, QUNIFORM_SSBO_OFFSET, index);
struct qreg unifa = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA); struct qreg unifa = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
if (!dynamic_src) { if (!dynamic_src) {
vir_MOV_dest(c, unifa, base_offset); if (is_ubo) {
vir_MOV_dest(c, unifa, base_offset);
} else {
vir_ADD_dest(c, unifa, base_offset,
vir_uniform_ui(c, const_offset));
}
} else { } else {
vir_ADD_dest(c, unifa, base_offset, vir_ADD_dest(c, unifa, base_offset,
ntq_get_src(c, instr->src[1], 0)); ntq_get_src(c, instr->src[1], 0));
@@ -3185,7 +3201,8 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
break; break;
case nir_intrinsic_load_ubo: case nir_intrinsic_load_ubo:
if (!ntq_emit_load_ubo_unifa(c, instr)) case nir_intrinsic_load_ssbo:
if (!ntq_emit_load_unifa(c, instr))
ntq_emit_tmu_general(c, instr, false); ntq_emit_tmu_general(c, instr, false);
break; break;
@@ -3199,7 +3216,6 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_ssbo_atomic_xor: case nir_intrinsic_ssbo_atomic_xor:
case nir_intrinsic_ssbo_atomic_exchange: case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_ssbo_atomic_comp_swap: case nir_intrinsic_ssbo_atomic_comp_swap:
case nir_intrinsic_load_ssbo:
case nir_intrinsic_store_ssbo: case nir_intrinsic_store_ssbo:
ntq_emit_tmu_general(c, instr, false); ntq_emit_tmu_general(c, instr, false);
break; break;

View File

@@ -720,6 +720,7 @@ struct v3d_compile {
struct qblock *current_unifa_block; struct qblock *current_unifa_block;
int32_t current_unifa_index; int32_t current_unifa_index;
uint32_t current_unifa_offset; uint32_t current_unifa_offset;
bool current_unifa_is_ubo;
/* State for whether we're executing on each channel currently. 0 if /* State for whether we're executing on each channel currently. 0 if
* yes, otherwise a block number + 1 that the channel jumped to. * yes, otherwise a block number + 1 that the channel jumped to.