panvk: Lower load_push_constant with dynamic offset to global loads

The csel-ladder we have in bi_lower_load_push_const_with_dyn_offset()
is not great, and is relying on base/range being valid. It turns out
nir_lower_mem_access_bit_sizes(), which we rely on to make push constant
accesses 32-bit aligned, doesn't preserve those when splitting accesses.

Let's simplify the thing by lowering push constant accesses with a
dynamic offset to global loads.

We also reset the base and offset values in the lowering pass, to make
sure the backend doesn't rely on them for other things.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Chia-I Wu <olvaffe@gmail.com>
Reviewed-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32415>
This commit is contained in:
Boris Brezillon
2024-12-04 16:53:57 +01:00
committed by Marge Bot
parent 58d8d4ce7b
commit 0361c1ca08
3 changed files with 77 additions and 0 deletions

View File

@@ -69,6 +69,8 @@ struct panvk_graphics_sysvals {
uint32_t noperspective_varyings;
} vs;
uint64_t push_consts;
#if PAN_ARCH <= 7
/* gl_Layer on Bifrost is a bit of hack. We have to issue one draw per
* layer, and filter primitives at the VS level.
@@ -92,6 +94,8 @@ struct panvk_compute_sysvals {
uint32_t x, y, z;
} local_group_size;
uint64_t push_consts;
#if PAN_ARCH <= 7
struct {
uint64_t sets[PANVK_DESC_TABLE_COMPUTE_COUNT];

View File

@@ -21,6 +21,11 @@ panvk_per_arch(cmd_prepare_push_uniforms)(struct panvk_cmd_buffer *cmdbuf,
cmdbuf, desc, SYSVALS_PUSH_CONST_BASE + sysvals_sz, 16);
if (push_uniforms.gpu) {
if (ptype == VK_PIPELINE_BIND_POINT_GRAPHICS)
cmdbuf->state.gfx.sysvals.push_consts = push_uniforms.gpu;
else
cmdbuf->state.compute.sysvals.push_consts = push_uniforms.gpu;
/* The first half is used for push constants. */
memcpy(push_uniforms.cpu, cmdbuf->state.push_constants.data,
sizeof(cmdbuf->state.push_constants.data));

View File

@@ -462,6 +462,55 @@ valhall_lower_get_ssbo_size(struct nir_builder *b,
return true;
}
static bool
lower_load_push_consts(nir_builder *b, nir_intrinsic_instr *intr,
UNUSED void *data)
{
if (intr->intrinsic != nir_intrinsic_load_push_constant)
return false;
unsigned base = nir_intrinsic_base(intr);
/* We always set the range to zero, to make sure no pass is using it after
* that point. */
nir_intrinsic_set_range(intr, 0);
b->cursor = nir_before_instr(&intr->instr);
/* Offset is constant, we just propagate base to the offset if it's not
* already zero. */
if (nir_src_is_const(intr->src[0])) {
if (base == 0)
return true;
nir_src_rewrite(&intr->src[0],
nir_imm_int(b, nir_src_as_uint(intr->src[0]) + base));
nir_intrinsic_set_base(intr, 0);
return true;
}
/* We don't use load_sysval() on purpose, because it would set
* .base=SYSVALS_PUSH_CONST_BASE, and we're supposed to force a base of
* zero in this pass. */
unsigned push_const_addr_offset =
SYSVALS_PUSH_CONST_BASE +
(b->shader->info.stage == MESA_SHADER_COMPUTE
? offsetof(struct panvk_compute_sysvals, push_consts)
: offsetof(struct panvk_graphics_sysvals, push_consts));
nir_def *push_const_buf =
nir_load_push_constant(b, 1, 64, nir_imm_int(b, push_const_addr_offset));
nir_def *offset = nir_iadd_imm(b, intr->src[0].ssa, base);
unsigned align = nir_combined_align(nir_intrinsic_align_mul(intr),
nir_intrinsic_align_offset(intr));
nir_def *value =
nir_load_global(b, nir_iadd(b, push_const_buf, nir_u2u64(b, offset)),
align, intr->def.num_components, intr->def.bit_size);
nir_def_replace(&intr->def, value);
return true;
}
static void
panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
uint32_t set_layout_count,
@@ -585,6 +634,25 @@ panvk_lower_nir(struct panvk_device *dev, nir_shader *nir,
NIR_PASS(_, nir, nir_shader_instructions_pass, panvk_lower_sysvals,
nir_metadata_control_flow, NULL);
/* Before we lower load_push_constant()s with a dynamic offset to global
* loads, we want to run a few optimization passes to get rid of offset
* calculation involving only constant values. */
bool progress = false;
do {
progress = false;
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_remove_phis);
NIR_PASS(progress, nir, nir_opt_dce);
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_cse);
NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);
} while (progress);
NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_load_push_consts,
nir_metadata_control_flow, NULL);
}
static VkResult