panfrost: allow promoting sysval UBO to push constants
We already had a path for sysvals in panfrost_emit_const_buf, but it was
unused because we only allowed pushing the default UBO 0. Improves
glmark2 score on G610 from 3051 to 3071, but mostly we need it as a
prerequisite for dynamic blend constants.
Signed-off-by: Olivia Lee <benjamin.lee@collabora.com>
Fixes: 59a3e12039
("panfrost: do not push "true" UBOs")
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34664>
(cherry picked from commit e93261f579263fcf3900ce3767da087f412c1515)
This commit is contained in:

committed by
Eric Engestrom

parent
518f052e98
commit
3c85516be1
@@ -624,7 +624,7 @@
|
||||
"description": "panfrost: allow promoting sysval UBO to push constants",
|
||||
"nominated": true,
|
||||
"nomination_type": 2,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": "59a3e12039cde5df1451193557512b38cea0039e",
|
||||
"notes": null
|
||||
|
@@ -133,7 +133,6 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
|
||||
|
||||
struct panfrost_compile_inputs inputs = {
|
||||
.gpu_id = panfrost_device_gpu_id(dev),
|
||||
.push_uniforms = true,
|
||||
};
|
||||
|
||||
if (dev->arch >= 9)
|
||||
@@ -200,6 +199,18 @@ panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
|
||||
|
||||
NIR_PASS(_, s, panfrost_nir_lower_sysvals, dev->arch, &out->sysvals);
|
||||
|
||||
/* For now, we only allow pushing the default UBO 0, and the sysval UBO (if
|
||||
* present). Both of these are mapped on the CPU, but other UBOs are not.
|
||||
* When we switch to pushing UBOs with a compute kernel (or CSF instructions)
|
||||
* we can relax this. */
|
||||
assert(s->info.first_ubo_is_default_ubo);
|
||||
inputs.pushable_ubos = BITFIELD_BIT(0);
|
||||
|
||||
if (out->sysvals.sysval_count != 0) {
|
||||
unsigned sysval_ubo = s->info.num_ubos - 1;
|
||||
inputs.pushable_ubos |= BITFIELD_BIT(sysval_ubo);
|
||||
}
|
||||
|
||||
/* Lower resource indices */
|
||||
NIR_PASS(_, s, panfrost_nir_lower_res_indices, &inputs);
|
||||
|
||||
|
@@ -36,16 +36,17 @@ bi_is_ubo(bi_instr *ins)
|
||||
(ins->seg == BI_SEG_UBO);
|
||||
}
|
||||
|
||||
/* For now, we only allow pushing UBO 0. This matches the Gallium convention
|
||||
* where UBO 0 is mapped on the CPU but other UBOs are not. When we switch to
|
||||
* pushing UBOs with a compute kernel (or CSF instructions), we can relax this.
|
||||
*/
|
||||
static bool
|
||||
bi_is_pushable_ubo(bi_instr *ins)
|
||||
bi_is_pushable_ubo(bi_context *ctx, bi_instr *ins)
|
||||
{
|
||||
return bi_is_ubo(ins) && (ins->src[0].type == BI_INDEX_CONSTANT) &&
|
||||
(ins->src[1].type == BI_INDEX_CONSTANT) &&
|
||||
((ins->src[0].value & 0x3) == 0) && (ins->src[1].value == 0);
|
||||
if (!(bi_is_ubo(ins) && (ins->src[0].type == BI_INDEX_CONSTANT) &&
|
||||
(ins->src[1].type == BI_INDEX_CONSTANT)))
|
||||
return false;
|
||||
|
||||
unsigned ubo = pan_res_handle_get_index(ins->src[1].value);
|
||||
unsigned offset = ins->src[0].value;
|
||||
|
||||
return ctx->inputs->pushable_ubos & BITFIELD_BIT(ubo) && (offset & 0x3) == 0;
|
||||
}
|
||||
|
||||
/* Represents use data for a single UBO */
|
||||
@@ -73,7 +74,7 @@ bi_analyze_ranges(bi_context *ctx)
|
||||
res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block));
|
||||
|
||||
bi_foreach_instr_global(ctx, ins) {
|
||||
if (!bi_is_pushable_ubo(ins))
|
||||
if (!bi_is_pushable_ubo(ctx, ins))
|
||||
continue;
|
||||
|
||||
unsigned ubo = pan_res_handle_get_index(ins->src[1].value);
|
||||
@@ -134,9 +135,6 @@ bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis)
|
||||
void
|
||||
bi_opt_push_ubo(bi_context *ctx)
|
||||
{
|
||||
/* We only push from the "default" UBO 0 */
|
||||
assert(ctx->nir->info.first_ubo_is_default_ubo && "precondition");
|
||||
|
||||
struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
|
||||
bi_pick_ubo(ctx->info.push, &analysis);
|
||||
|
||||
@@ -149,7 +147,7 @@ bi_opt_push_ubo(bi_context *ctx)
|
||||
unsigned ubo = pan_res_handle_get_index(ins->src[1].value);
|
||||
unsigned offset = ins->src[0].value;
|
||||
|
||||
if (!bi_is_pushable_ubo(ins)) {
|
||||
if (!bi_is_pushable_ubo(ctx, ins)) {
|
||||
/* The load can't be pushed, so this UBO needs to be
|
||||
* uploaded conventionally */
|
||||
if (ins->src[1].type == BI_INDEX_CONSTANT)
|
||||
|
@@ -1346,7 +1346,7 @@ bi_emit_load_ubo(bi_builder *b, nir_intrinsic_instr *instr)
|
||||
static void
|
||||
bi_emit_load_push_constant(bi_builder *b, nir_intrinsic_instr *instr)
|
||||
{
|
||||
assert(!b->shader->inputs->push_uniforms && "can't mix push constant forms");
|
||||
assert(!b->shader->inputs->pushable_ubos && "can't mix push constant forms");
|
||||
|
||||
nir_src *offset = &instr->src[0];
|
||||
assert(!nir_intrinsic_base(instr) && "base must be zero");
|
||||
@@ -5963,7 +5963,7 @@ bi_compile_variant_nir(nir_shader *nir,
|
||||
bi_validate(ctx, "Early lowering");
|
||||
|
||||
/* Runs before copy prop */
|
||||
if (optimize && ctx->inputs->push_uniforms) {
|
||||
if (optimize && ctx->inputs->pushable_ubos) {
|
||||
bi_opt_push_ubo(ctx);
|
||||
}
|
||||
|
||||
@@ -5988,7 +5988,7 @@ bi_compile_variant_nir(nir_shader *nir,
|
||||
bi_opt_dce(ctx, false);
|
||||
bi_opt_cse(ctx);
|
||||
bi_opt_dce(ctx, false);
|
||||
if (ctx->inputs->push_uniforms)
|
||||
if (ctx->inputs->pushable_ubos)
|
||||
bi_opt_reorder_push(ctx);
|
||||
bi_validate(ctx, "Optimization passes");
|
||||
}
|
||||
|
@@ -42,15 +42,17 @@ mir_is_ubo(midgard_instruction *ins)
|
||||
return (ins->type == TAG_LOAD_STORE_4) && (OP_IS_UBO_READ(ins->op));
|
||||
}
|
||||
|
||||
/* We only allow pushing UBO 0. This matches the Gallium convention
|
||||
* where UBO 0 is mapped on the CPU but other UBOs are not.
|
||||
*/
|
||||
static bool
|
||||
mir_is_pushable_ubo(midgard_instruction *ins)
|
||||
mir_is_pushable_ubo(compiler_context *ctx, midgard_instruction *ins)
|
||||
{
|
||||
return mir_is_ubo(ins) && !(ins->constants.u32[0] & 0xF) &&
|
||||
if (!mir_is_ubo(ins))
|
||||
return false;
|
||||
|
||||
unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
|
||||
|
||||
return !(ins->constants.u32[0] & 0xF) &&
|
||||
(ins->src[1] == ~0) && (ins->src[2] == ~0) &&
|
||||
midgard_unpack_ubo_index_imm(ins->load_store) == 0;
|
||||
(ctx->inputs->pushable_ubos & BITFIELD_BIT(ubo));
|
||||
}
|
||||
|
||||
/* Represents use data for a single UBO */
|
||||
@@ -78,7 +80,7 @@ mir_analyze_ranges(compiler_context *ctx)
|
||||
res.blocks = calloc(res.nr_blocks, sizeof(struct mir_ubo_block));
|
||||
|
||||
mir_foreach_instr_global(ctx, ins) {
|
||||
if (!mir_is_pushable_ubo(ins))
|
||||
if (!mir_is_pushable_ubo(ctx, ins))
|
||||
continue;
|
||||
|
||||
unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
|
||||
@@ -269,16 +271,13 @@ mir_special_indices(compiler_context *ctx)
|
||||
void
|
||||
midgard_promote_uniforms(compiler_context *ctx)
|
||||
{
|
||||
if (!ctx->inputs->push_uniforms) {
|
||||
if (!ctx->inputs->pushable_ubos) {
|
||||
/* If nothing is pushed, all UBOs need to be uploaded
|
||||
* conventionally */
|
||||
ctx->ubo_mask = ~0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* We only push from the "default" UBO 0 */
|
||||
assert(ctx->nir->info.first_ubo_is_default_ubo && "precondition");
|
||||
|
||||
struct mir_ubo_analysis analysis = mir_analyze_ranges(ctx);
|
||||
|
||||
unsigned work_count = mir_work_heuristic(ctx, &analysis);
|
||||
@@ -300,7 +299,7 @@ midgard_promote_uniforms(compiler_context *ctx)
|
||||
unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
|
||||
unsigned qword = ins->constants.u32[0] / 16;
|
||||
|
||||
if (!mir_is_pushable_ubo(ins)) {
|
||||
if (!mir_is_pushable_ubo(ctx, ins)) {
|
||||
if (ins->src[1] == ~0)
|
||||
ctx->ubo_mask |= BITSET_BIT(ubo);
|
||||
else
|
||||
|
@@ -103,9 +103,11 @@ struct panfrost_compile_inputs {
|
||||
uint64_t bifrost_blend_desc;
|
||||
} blend;
|
||||
bool no_idvs;
|
||||
bool push_uniforms;
|
||||
uint32_t view_mask;
|
||||
|
||||
/* Mask of UBOs that may be moved to push constants */
|
||||
uint32_t pushable_ubos;
|
||||
|
||||
/* Used on Valhall.
|
||||
*
|
||||
* Bit mask of special desktop-only varyings (e.g VARYING_SLOT_TEX0)
|
||||
|
Reference in New Issue
Block a user