From 212f71734e194eeb2007a94ead2ecbe476fe772b Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Date: Sun, 5 Nov 2023 12:14:48 -0400
Subject: [PATCH] asahi: Refactor encoder data structure

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26056>
---
 src/gallium/drivers/asahi/agx_batch.c | 25 +++++++++++++++++----
 src/gallium/drivers/asahi/agx_pipe.c  |  4 ++--
 src/gallium/drivers/asahi/agx_state.c | 32 +++++++++++++--------------
 src/gallium/drivers/asahi/agx_state.h | 15 ++++++++++---
 4 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/src/gallium/drivers/asahi/agx_batch.c b/src/gallium/drivers/asahi/agx_batch.c
index 9a1f0fa937f..c4ddffca87d 100644
--- a/src/gallium/drivers/asahi/agx_batch.c
+++ b/src/gallium/drivers/asahi/agx_batch.c
@@ -76,6 +76,18 @@ agx_batch_mark_complete(struct agx_batch *batch)
    BITSET_CLEAR(batch->ctx->batches.submitted, batch_idx);
 }
 
+static struct agx_encoder
+agx_encoder_allocate(struct agx_batch *batch, struct agx_device *dev)
+{
+   struct agx_bo *bo = agx_bo_create(dev, 0x80000, 0, "Encoder");
+
+   return (struct agx_encoder){
+      .bo = bo,
+      .current = bo->ptr.cpu,
+      .end = (uint8_t *)bo->ptr.cpu + bo->size,
+   };
+}
+
 static void
 agx_batch_init(struct agx_context *ctx,
                const struct pipe_framebuffer_state *key,
@@ -101,9 +113,13 @@ agx_batch_init(struct agx_context *ctx,
              batch->bo_list.word_count * sizeof(BITSET_WORD));
    }
 
-   batch->encoder = agx_bo_create(dev, 0x80000, 0, "Encoder");
-   batch->encoder_current = batch->encoder->ptr.cpu;
-   batch->encoder_end = batch->encoder_current + batch->encoder->size;
+   if (batch->key.width == AGX_COMPUTE_BATCH_WIDTH) {
+      batch->cdm = agx_encoder_allocate(batch, dev);
+      memset(&batch->vdm, 0, sizeof(batch->vdm));
+   } else {
+      batch->vdm = agx_encoder_allocate(batch, dev);
+      memset(&batch->cdm, 0, sizeof(batch->cdm));
+   }
 
    util_dynarray_init(&batch->scissor, ctx);
    util_dynarray_init(&batch->depth_bias, ctx);
@@ -179,7 +195,8 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset)
       }
    }
 
-   agx_bo_unreference(batch->encoder);
+   agx_bo_unreference(batch->vdm.bo);
+   agx_bo_unreference(batch->cdm.bo);
    agx_pool_cleanup(&batch->pool);
    agx_pool_cleanup(&batch->pipeline_pool);
 
diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c
index 50696b66a74..eb9e286caa8 100644
--- a/src/gallium/drivers/asahi/agx_pipe.c
+++ b/src/gallium/drivers/asahi/agx_pipe.c
@@ -1293,7 +1293,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
 
    /* Finalize the encoder */
    uint8_t stop[5 + 64] = {0x00, 0x00, 0x00, 0xc0, 0x00};
-   memcpy(batch->encoder_current, stop, sizeof(stop));
+   memcpy(batch->vdm.current, stop, sizeof(stop));
 
    uint64_t pipeline_background = agx_build_meta(batch, false, false);
    uint64_t pipeline_background_partial = agx_build_meta(batch, false, true);
@@ -1340,7 +1340,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
     *  - BO for internal shaders
     *  - BOs added to the batch explicitly
     */
-   agx_batch_add_bo(batch, batch->encoder);
+   agx_batch_add_bo(batch, batch->vdm.bo);
 
    /* Occlusion queries are allocated as a contiguous pool */
    unsigned oq_count =
diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c
index 9cf58f5f3c2..05b097d7cba 100644
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@@ -2633,7 +2633,7 @@ agx_batch_init_state(struct agx_batch *batch)
    }
 
    /* Emit state on the batch that we don't change and so don't dirty track */
-   uint8_t *out = batch->encoder_current;
+   uint8_t *out = batch->vdm.current;
    struct agx_ppp_update ppp =
       agx_new_ppp_update(&batch->pool, (struct AGX_PPP_HEADER){
                                           .w_clamp = true,
@@ -2652,7 +2652,7 @@ agx_batch_init_state(struct agx_batch *batch)
    /* clang-format on */
 
    agx_ppp_fini(&out, &ppp);
-   batch->encoder_current = out;
+   batch->vdm.current = out;
 
    /* Mark it as initialized now, since agx_batch_writes() will check this. */
    batch->initialized = true;
@@ -3074,11 +3074,10 @@ agx_scissor_culls_everything(struct agx_context *ctx)
 }
 
 static void
-agx_ensure_cmdbuf_has_space(struct agx_batch *batch, size_t space)
+agx_ensure_vdm_cmdbuf_has_space(struct agx_batch *batch, size_t space)
 {
    /* Assert that we have space for a link tag */
-   assert((batch->encoder_current + AGX_VDM_STREAM_LINK_LENGTH) <=
-             batch->encoder_end &&
+   assert((batch->vdm.current + AGX_VDM_STREAM_LINK_LENGTH) <= batch->vdm.end &&
           "Encoder overflowed");
 
    /* Always leave room for a link tag, in case we run out of space later,
@@ -3089,7 +3088,7 @@ agx_ensure_cmdbuf_has_space(struct agx_batch *batch, size_t space)
    space += AGX_VDM_STREAM_LINK_LENGTH + 0x800;
 
    /* If there is room in the command buffer, we're done */
-   if (likely((batch->encoder_end - batch->encoder_current) >= space))
+   if (likely((batch->vdm.end - batch->vdm.current) >= space))
       return;
 
    /* Otherwise, we need to allocate a new command buffer. We use memory owned
@@ -3099,14 +3098,14 @@ agx_ensure_cmdbuf_has_space(struct agx_batch *batch, size_t space)
    struct agx_ptr T = agx_pool_alloc_aligned(&batch->pool, size, 256);
 
    /* Jump from the old command buffer to the new command buffer */
-   agx_pack(batch->encoder_current, VDM_STREAM_LINK, cfg) {
+   agx_pack(batch->vdm.current, VDM_STREAM_LINK, cfg) {
       cfg.target_lo = T.gpu & BITFIELD_MASK(32);
       cfg.target_hi = T.gpu >> 32;
    }
 
    /* Swap out the command buffer */
-   batch->encoder_current = T.cpu;
-   batch->encoder_end = batch->encoder_current + size;
+   batch->vdm.current = T.cpu;
+   batch->vdm.end = batch->vdm.current + size;
 }
 
 static void
@@ -3227,7 +3226,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
     * We only need to do this once per draw as long as we conservatively
     * estimate the maximum bytes of VDM commands that this draw will emit.
     */
-   agx_ensure_cmdbuf_has_space(
+   agx_ensure_vdm_cmdbuf_has_space(
       batch,
       (AGX_VDM_STATE_LENGTH * 2) + (AGX_PPP_STATE_LENGTH * MAX_PPP_UPDATES) +
          AGX_VDM_STATE_RESTART_INDEX_LENGTH +
@@ -3240,7 +3239,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
          AGX_INDEX_LIST_COUNT_LENGTH + AGX_INDEX_LIST_INSTANCES_LENGTH +
          AGX_INDEX_LIST_START_LENGTH + AGX_INDEX_LIST_BUFFER_SIZE_LENGTH);
 
-   uint8_t *out = agx_encode_state(batch, batch->encoder_current,
+   uint8_t *out = agx_encode_state(batch, batch->vdm.current,
                                    reduced_prim == MESA_PRIM_LINES,
                                    reduced_prim == MESA_PRIM_POINTS);
 
@@ -3349,9 +3348,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
       out += AGX_VDM_BARRIER_LENGTH;
    }
 
-   batch->encoder_current = out;
-   assert((batch->encoder_current + AGX_VDM_STREAM_LINK_LENGTH) <=
-             batch->encoder_end &&
+   batch->vdm.current = out;
+   assert((batch->vdm.current + AGX_VDM_STREAM_LINK_LENGTH) <= batch->vdm.end &&
           "Failed to reserve sufficient space in encoder");
    agx_dirty_reset_graphics(ctx);
 
@@ -3439,7 +3437,7 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
    agx_upload_uniforms(batch);
 
    /* TODO: Ensure space if we allow multiple kernels in a batch */
-   uint8_t *out = batch->encoder_current;
+   uint8_t *out = batch->cdm.current;
 
    agx_pack(out, CDM_HEADER, cfg) {
       if (info->indirect)
@@ -3492,8 +3490,8 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
       ;
    out += AGX_CDM_LAUNCH_LENGTH;
 
-   batch->encoder_current = out;
-   assert(batch->encoder_current <= batch->encoder_end &&
+   batch->cdm.current = out;
+   assert(batch->cdm.current <= batch->cdm.end &&
           "Failed to reserve sufficient space in encoder");
    /* TODO: Dirty tracking? */
 
diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h
index 554dcdf1586..8ea838458b0 100644
--- a/src/gallium/drivers/asahi/agx_state.h
+++ b/src/gallium/drivers/asahi/agx_state.h
@@ -261,6 +261,12 @@ uint16_t agx_sampler_heap_add(struct agx_device *dev,
                               struct agx_sampler_heap *heap,
                               struct agx_sampler_packed *sampler);
 
+struct agx_encoder {
+   struct agx_bo *bo;
+   uint8_t *current;
+   uint8_t *end;
+};
+
 struct agx_batch {
    struct agx_context *ctx;
    struct pipe_framebuffer_state key;
@@ -305,9 +311,12 @@ struct agx_batch {
    } bo_list;
 
    struct agx_pool pool, pipeline_pool;
-   struct agx_bo *encoder;
-   uint8_t *encoder_current;
-   uint8_t *encoder_end;
+
+   /* We may enqueue both CDM and VDM work, possibly to the same batch for
+    * geometry/tessellation.
+    */
+   struct agx_encoder vdm;
+   struct agx_encoder cdm;
 
    /* Scissor and depth-bias descriptors, uploaded at GPU time */
    struct util_dynarray scissor, depth_bias;