panfrost: Clear with a quad to avoid flushing

Flushing the batch midframe (splitting a renderpass) is expensive on a tiler, as it requires the GPU to flush the framebuffer contents to main memory and read them back. Clearing the framebuffer should not trigger a flush. Apps expect clears to be (almost) free, flushing for a clear is at the very least unexpected behaviour. The only reason we previously flushed is to ensure we could always use a "fast" clear. But a slow clear is a heck of a lot faster than a flush ;-) Instead of flushing, we should clear with a draw (via u_blitter) in case a fast clear isn't possible. This fixes pathological performance for applications that rely on partial clears within a frame. This issue was identified with Inochi2D, which repeatedly clears the stencil buffer midframe, in order to implement masking efficiently with the stencil buffer. In total, the all-important workload of rendering Asahi Lina is improved from 17fps to 29fps on a panfrost device. Fixes: c138ca80d2 ("panfrost: Make sure a clear does not re-use a pre-existing batch") Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17112>
2022-06-17 16:40:32 -04:00
parent 35a80418a1
commit 638b22354e
3 changed files with 23 additions and 14 deletions
--- a/src/gallium/drivers/panfrost/pan_blit.c
+++ b/src/gallium/drivers/panfrost/pan_blit.c
@@ -31,12 +31,10 @@
 #include "pan_util.h"
 #include "util/format/u_format.h"

-static void
-panfrost_blitter_save(
-        struct panfrost_context *ctx,
-        struct blitter_context *blitter,
-        bool render_cond)
+void
+panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond)
 {
+        struct blitter_context *blitter = ctx->blitter;

        util_blitter_save_vertex_buffer_slot(blitter, ctx->vertex_buffers);
        util_blitter_save_vertex_elements(blitter, ctx->vertex);
@@ -82,6 +80,6 @@ panfrost_blit(struct pipe_context *pipe,
        if (!util_blitter_is_blit_supported(ctx->blitter, info))
                unreachable("Unsupported blit\n");

-        panfrost_blitter_save(ctx, ctx->blitter, info->render_condition_enable);
+        panfrost_blitter_save(ctx, info->render_condition_enable);
        util_blitter_blit(ctx->blitter, info);
 }
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -62,18 +62,26 @@ panfrost_clear(
        double depth, unsigned stencil)
 {
        struct panfrost_context *ctx = pan_context(pipe);
+        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);

        if (!panfrost_render_condition_check(ctx))
                return;

-        /* TODO: panfrost_get_fresh_batch_for_fbo() instantiates a new batch if
-         * the existing batch targeting this FBO has draws. We could probably
-         * avoid that by replacing plain clears by quad-draws with a specific
-         * color/depth/stencil value, thus avoiding the generation of extra
-         * fragment jobs.
-         */
-        struct panfrost_batch *batch = panfrost_get_fresh_batch_for_fbo(ctx, "Slow clear");
-        panfrost_batch_clear(batch, buffers, color, depth, stencil);
+        /* At the start of the batch, we can clear for free */
+        if (!batch->scoreboard.first_job) {
+                panfrost_batch_clear(batch, buffers, color, depth, stencil);
+                return;
+        }
+
+        /* Once there is content, clear with a fullscreen quad */
+        panfrost_blitter_save(ctx, false /* render condition */);
+
+        util_blitter_clear(ctx->blitter,
+                           ctx->pipe_framebuffer.width,
+                           ctx->pipe_framebuffer.height,
+                           util_framebuffer_get_num_layers(&ctx->pipe_framebuffer),
+                           buffers, color, depth, stencil,
+                           util_framebuffer_get_num_samples(&ctx->pipe_framebuffer) > 1);
 }

 bool
--- a/src/gallium/drivers/panfrost/pan_resource.h
+++ b/src/gallium/drivers/panfrost/pan_resource.h
@@ -124,6 +124,9 @@ void panfrost_resource_context_init(struct pipe_context *pctx);

 /* Blitting */

+void
+panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond);
+
 void
 panfrost_blit(struct pipe_context *pipe,
              const struct pipe_blit_info *info);