From ca690579b8fb66c408773bdd9bdd5e14da4d3338 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 14 May 2021 18:00:45 -0400 Subject: [PATCH] panfrost: Prepack partial RSD at compile time Even for fragment shaders! Just need to merge the partial descriptors. Fixes: c21c6d134bc ("panfrost: Use the pan_shader_prepare_rsd() helper") Signed-off-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/panfrost/pan_assemble.c | 22 +++++++++++++------- src/gallium/drivers/panfrost/pan_cmdstream.c | 22 +++++++++++++------- src/gallium/drivers/panfrost/pan_context.h | 3 +++ 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c index d34cb79dc71..79fbc9614dc 100644 --- a/src/gallium/drivers/panfrost/pan_assemble.c +++ b/src/gallium/drivers/panfrost/pan_assemble.c @@ -40,16 +40,22 @@ #include "tgsi/tgsi_dump.h" static void -pan_upload_shader_descriptor(struct panfrost_context *ctx, - struct panfrost_shader_state *state) +pan_prepare_shader_descriptor(struct panfrost_context *ctx, + struct panfrost_shader_state *state, + bool upload) { const struct panfrost_device *dev = pan_device(ctx->base.screen); - struct panfrost_ptr ptr = - panfrost_pool_alloc_desc(&ctx->descs, RENDERER_STATE); + struct mali_renderer_state_packed *out = &state->partial_rsd; - state->state = pan_take_ref(&ctx->descs, ptr.gpu); + if (upload) { + struct panfrost_ptr ptr = + panfrost_pool_alloc_desc(&ctx->descs, RENDERER_STATE); - pan_pack(ptr.cpu, RENDERER_STATE, cfg) { + state->state = pan_take_ref(&ctx->descs, ptr.gpu); + out = ptr.cpu; + } + + pan_pack(out, RENDERER_STATE, cfg) { pan_shader_prepare_rsd(dev, &state->info, state->bin.gpu, &cfg); } @@ -98,8 +104,8 @@ panfrost_shader_compile(struct panfrost_context *ctx, binary.data, binary.size, 128)); } - if (stage != MESA_SHADER_FRAGMENT) - pan_upload_shader_descriptor(ctx, state); + pan_prepare_shader_descriptor(ctx, state, + stage != MESA_SHADER_FRAGMENT); util_dynarray_fini(&binary); diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index c476358b660..049b83f0466 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -460,7 +460,6 @@ static void panfrost_prepare_bifrost_fs_state(struct panfrost_context *ctx, struct MALI_RENDERER_STATE *state) { - const struct panfrost_device *dev = pan_device(ctx->base.screen); struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); struct panfrost_blend_state *so = ctx->blend; bool alpha_to_coverage = so->base.alpha_to_coverage; @@ -471,8 +470,6 @@ panfrost_prepare_bifrost_fs_state(struct panfrost_context *ctx, state->properties.bifrost.allow_forward_pixel_to_be_killed = true; state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY; } else { - pan_shader_prepare_rsd(dev, &fs->info, fs->bin.gpu, state); - /* Track if any colour buffer is reused across draws, either * from reading it directly, or from failing to write it */ bool blend_reads_dest = false; @@ -517,8 +514,6 @@ panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx, state->properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; state->properties.midgard.force_early_z = true; } else { - pan_shader_prepare_rsd(dev, &fs->info, fs->bin.gpu, state); - /* Reasons to disable early-Z from a shader perspective */ bool late_z = fs->info.fs.can_discard || fs->info.writes_global || fs->info.fs.writes_depth || fs->info.fs.writes_stencil || @@ -653,8 +648,15 @@ panfrost_emit_frag_shader(struct panfrost_context *ctx, mali_ptr *blend_shaders) { struct panfrost_device *dev = pan_device(ctx->base.screen); + struct panfrost_shader_state *fs = + panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT); - pan_pack(fragmeta, RENDERER_STATE, cfg) { + /* We need to merge several several partial renderer state descriptors, + * so stage to temporary storage rather than reading back write-combine + * memory, which will trash performance. */ + struct mali_renderer_state_packed rsd; + + pan_pack(&rsd, RENDERER_STATE, cfg) { panfrost_prepare_fs_state(ctx, blend_shaders, &cfg); } @@ -664,8 +666,14 @@ panfrost_emit_frag_shader(struct panfrost_context *ctx, /* Word 14: SFBD Blend Equation */ STATIC_ASSERT(MALI_BLEND_EQUATION_LENGTH == 4); - fragmeta->opaque[14] = ctx->blend->equation[0].opaque[0]; + rsd.opaque[14] = ctx->blend->equation[0].opaque[0]; } + + /* Merge with CSO state and upload */ + if (panfrost_fs_required(fs, ctx->blend, &ctx->pipe_framebuffer)) + pan_merge(rsd, fs->partial_rsd, RENDERER_STATE); + + memcpy(fragmeta, &rsd, sizeof(rsd)); } mali_ptr diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h index 70075f737b8..5208c8ddadc 100644 --- a/src/gallium/drivers/panfrost/pan_context.h +++ b/src/gallium/drivers/panfrost/pan_context.h @@ -214,6 +214,9 @@ struct panfrost_shader_state { /* Respectively, shader binary and Renderer State Descriptor */ struct pan_pool_ref bin, state; + /* For fragment shaders, a prepared (but not uploaded RSD) */ + struct mali_renderer_state_packed partial_rsd; + struct pan_shader_info info; struct pipe_stream_output_info stream_output;