panfrost: allow promoting sysval UBO to push constants

We already had a path for sysvals in panfrost_emit_const_buf, but it was
unused because we only allowed pushing the default UBO 0. Improves
glmark2 score on G610 from 3051 to 3071, but mostly we need it as a
prerequisite for dynamic blend constants.

Signed-off-by: Olivia Lee <benjamin.lee@collabora.com>
Fixes: 59a3e12039 ("panfrost: do not push "true" UBOs")
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34664>
(cherry picked from commit e93261f579263fcf3900ce3767da087f412c1515)
This commit is contained in:
Olivia Lee
2025-04-21 23:29:40 -07:00
committed by Eric Engestrom
parent 518f052e98
commit 3c85516be1
6 changed files with 41 additions and 31 deletions

View File

@@ -624,7 +624,7 @@
"description": "panfrost: allow promoting sysval UBO to push constants",
"nominated": true,
"nomination_type": 2,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "59a3e12039cde5df1451193557512b38cea0039e",
"notes": null

View File

@@ -133,7 +133,6 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
struct panfrost_compile_inputs inputs = {
.gpu_id = panfrost_device_gpu_id(dev),
.push_uniforms = true,
};
if (dev->arch >= 9)
@@ -200,6 +199,18 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
NIR_PASS(_, s, panfrost_nir_lower_sysvals, dev->arch, &out->sysvals);
/* For now, we only allow pushing the default UBO 0, and the sysval UBO (if
* present). Both of these are mapped on the CPU, but other UBOs are not.
* When we switch to pushing UBOs with a compute kernel (or CSF instructions)
* we can relax this. */
assert(s->info.first_ubo_is_default_ubo);
inputs.pushable_ubos = BITFIELD_BIT(0);
if (out->sysvals.sysval_count != 0) {
unsigned sysval_ubo = s->info.num_ubos - 1;
inputs.pushable_ubos |= BITFIELD_BIT(sysval_ubo);
}
/* Lower resource indices */
NIR_PASS(_, s, panfrost_nir_lower_res_indices, &inputs);

View File

@@ -36,16 +36,17 @@ bi_is_ubo(bi_instr *ins)
(ins->seg == BI_SEG_UBO);
}
/* For now, we only allow pushing UBO 0. This matches the Gallium convention
* where UBO 0 is mapped on the CPU but other UBOs are not. When we switch to
* pushing UBOs with a compute kernel (or CSF instructions), we can relax this.
*/
static bool
bi_is_pushable_ubo(bi_instr *ins)
bi_is_pushable_ubo(bi_context *ctx, bi_instr *ins)
{
return bi_is_ubo(ins) && (ins->src[0].type == BI_INDEX_CONSTANT) &&
(ins->src[1].type == BI_INDEX_CONSTANT) &&
((ins->src[0].value & 0x3) == 0) && (ins->src[1].value == 0);
if (!(bi_is_ubo(ins) && (ins->src[0].type == BI_INDEX_CONSTANT) &&
(ins->src[1].type == BI_INDEX_CONSTANT)))
return false;
unsigned ubo = pan_res_handle_get_index(ins->src[1].value);
unsigned offset = ins->src[0].value;
return ctx->inputs->pushable_ubos & BITFIELD_BIT(ubo) && (offset & 0x3) == 0;
}
/* Represents use data for a single UBO */
@@ -73,7 +74,7 @@ bi_analyze_ranges(bi_context *ctx)
res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block));
bi_foreach_instr_global(ctx, ins) {
if (!bi_is_pushable_ubo(ins))
if (!bi_is_pushable_ubo(ctx, ins))
continue;
unsigned ubo = pan_res_handle_get_index(ins->src[1].value);
@@ -134,9 +135,6 @@ bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis)
void
bi_opt_push_ubo(bi_context *ctx)
{
/* We only push from the "default" UBO 0 */
assert(ctx->nir->info.first_ubo_is_default_ubo && "precondition");
struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
bi_pick_ubo(ctx->info.push, &analysis);
@@ -149,7 +147,7 @@ bi_opt_push_ubo(bi_context *ctx)
unsigned ubo = pan_res_handle_get_index(ins->src[1].value);
unsigned offset = ins->src[0].value;
if (!bi_is_pushable_ubo(ins)) {
if (!bi_is_pushable_ubo(ctx, ins)) {
/* The load can't be pushed, so this UBO needs to be
* uploaded conventionally */
if (ins->src[1].type == BI_INDEX_CONSTANT)

View File

@@ -1346,7 +1346,7 @@ bi_emit_load_ubo(bi_builder *b, nir_intrinsic_instr *instr)
static void
bi_emit_load_push_constant(bi_builder *b, nir_intrinsic_instr *instr)
{
assert(!b->shader->inputs->push_uniforms && "can't mix push constant forms");
assert(!b->shader->inputs->pushable_ubos && "can't mix push constant forms");
nir_src *offset = &instr->src[0];
assert(!nir_intrinsic_base(instr) && "base must be zero");
@@ -5963,7 +5963,7 @@ bi_compile_variant_nir(nir_shader *nir,
bi_validate(ctx, "Early lowering");
/* Runs before copy prop */
if (optimize && ctx->inputs->push_uniforms) {
if (optimize && ctx->inputs->pushable_ubos) {
bi_opt_push_ubo(ctx);
}
@@ -5988,7 +5988,7 @@ bi_compile_variant_nir(nir_shader *nir,
bi_opt_dce(ctx, false);
bi_opt_cse(ctx);
bi_opt_dce(ctx, false);
if (ctx->inputs->push_uniforms)
if (ctx->inputs->pushable_ubos)
bi_opt_reorder_push(ctx);
bi_validate(ctx, "Optimization passes");
}

View File

@@ -42,15 +42,17 @@ mir_is_ubo(midgard_instruction *ins)
return (ins->type == TAG_LOAD_STORE_4) && (OP_IS_UBO_READ(ins->op));
}
/* We only allow pushing UBO 0. This matches the Gallium convention
* where UBO 0 is mapped on the CPU but other UBOs are not.
*/
static bool
mir_is_pushable_ubo(midgard_instruction *ins)
mir_is_pushable_ubo(compiler_context *ctx, midgard_instruction *ins)
{
return mir_is_ubo(ins) && !(ins->constants.u32[0] & 0xF) &&
if (!mir_is_ubo(ins))
return false;
unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
return !(ins->constants.u32[0] & 0xF) &&
(ins->src[1] == ~0) && (ins->src[2] == ~0) &&
midgard_unpack_ubo_index_imm(ins->load_store) == 0;
(ctx->inputs->pushable_ubos & BITFIELD_BIT(ubo));
}
/* Represents use data for a single UBO */
@@ -78,7 +80,7 @@ mir_analyze_ranges(compiler_context *ctx)
res.blocks = calloc(res.nr_blocks, sizeof(struct mir_ubo_block));
mir_foreach_instr_global(ctx, ins) {
if (!mir_is_pushable_ubo(ins))
if (!mir_is_pushable_ubo(ctx, ins))
continue;
unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
@@ -269,16 +271,13 @@ mir_special_indices(compiler_context *ctx)
void
midgard_promote_uniforms(compiler_context *ctx)
{
if (!ctx->inputs->push_uniforms) {
if (!ctx->inputs->pushable_ubos) {
/* If nothing is pushed, all UBOs need to be uploaded
* conventionally */
ctx->ubo_mask = ~0;
return;
}
/* We only push from the "default" UBO 0 */
assert(ctx->nir->info.first_ubo_is_default_ubo && "precondition");
struct mir_ubo_analysis analysis = mir_analyze_ranges(ctx);
unsigned work_count = mir_work_heuristic(ctx, &analysis);
@@ -300,7 +299,7 @@ midgard_promote_uniforms(compiler_context *ctx)
unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
unsigned qword = ins->constants.u32[0] / 16;
if (!mir_is_pushable_ubo(ins)) {
if (!mir_is_pushable_ubo(ctx, ins)) {
if (ins->src[1] == ~0)
ctx->ubo_mask |= BITSET_BIT(ubo);
else

View File

@@ -103,9 +103,11 @@ struct panfrost_compile_inputs {
uint64_t bifrost_blend_desc;
} blend;
bool no_idvs;
bool push_uniforms;
uint32_t view_mask;
/* Mask of UBOs that may be moved to push constants */
uint32_t pushable_ubos;
/* Used on Valhall.
*
* Bit mask of special desktop-only varyings (e.g VARYING_SLOT_TEX0)