From 212f71734e194eeb2007a94ead2ecbe476fe772b Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sun, 5 Nov 2023 12:14:48 -0400 Subject: [PATCH] asahi: Refactor encoder data structure Signed-off-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/asahi/agx_batch.c | 25 +++++++++++++++++---- src/gallium/drivers/asahi/agx_pipe.c | 4 ++-- src/gallium/drivers/asahi/agx_state.c | 32 +++++++++++++-------------- src/gallium/drivers/asahi/agx_state.h | 15 ++++++++++--- 4 files changed, 50 insertions(+), 26 deletions(-) diff --git a/src/gallium/drivers/asahi/agx_batch.c b/src/gallium/drivers/asahi/agx_batch.c index 9a1f0fa937f..c4ddffca87d 100644 --- a/src/gallium/drivers/asahi/agx_batch.c +++ b/src/gallium/drivers/asahi/agx_batch.c @@ -76,6 +76,18 @@ agx_batch_mark_complete(struct agx_batch *batch) BITSET_CLEAR(batch->ctx->batches.submitted, batch_idx); } +static struct agx_encoder +agx_encoder_allocate(struct agx_batch *batch, struct agx_device *dev) +{ + struct agx_bo *bo = agx_bo_create(dev, 0x80000, 0, "Encoder"); + + return (struct agx_encoder){ + .bo = bo, + .current = bo->ptr.cpu, + .end = (uint8_t *)bo->ptr.cpu + bo->size, + }; +} + static void agx_batch_init(struct agx_context *ctx, const struct pipe_framebuffer_state *key, @@ -101,9 +113,13 @@ agx_batch_init(struct agx_context *ctx, batch->bo_list.word_count * sizeof(BITSET_WORD)); } - batch->encoder = agx_bo_create(dev, 0x80000, 0, "Encoder"); - batch->encoder_current = batch->encoder->ptr.cpu; - batch->encoder_end = batch->encoder_current + batch->encoder->size; + if (batch->key.width == AGX_COMPUTE_BATCH_WIDTH) { + batch->cdm = agx_encoder_allocate(batch, dev); + memset(&batch->vdm, 0, sizeof(batch->vdm)); + } else { + batch->vdm = agx_encoder_allocate(batch, dev); + memset(&batch->cdm, 0, sizeof(batch->cdm)); + } util_dynarray_init(&batch->scissor, ctx); util_dynarray_init(&batch->depth_bias, ctx); @@ -179,7 +195,8 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset) } } - agx_bo_unreference(batch->encoder); + agx_bo_unreference(batch->vdm.bo); + agx_bo_unreference(batch->cdm.bo); agx_pool_cleanup(&batch->pool); agx_pool_cleanup(&batch->pipeline_pool); diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index 50696b66a74..eb9e286caa8 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -1293,7 +1293,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) /* Finalize the encoder */ uint8_t stop[5 + 64] = {0x00, 0x00, 0x00, 0xc0, 0x00}; - memcpy(batch->encoder_current, stop, sizeof(stop)); + memcpy(batch->vdm.current, stop, sizeof(stop)); uint64_t pipeline_background = agx_build_meta(batch, false, false); uint64_t pipeline_background_partial = agx_build_meta(batch, false, true); @@ -1340,7 +1340,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) * - BO for internal shaders * - BOs added to the batch explicitly */ - agx_batch_add_bo(batch, batch->encoder); + agx_batch_add_bo(batch, batch->vdm.bo); /* Occlusion queries are allocated as a contiguous pool */ unsigned oq_count = diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 9cf58f5f3c2..05b097d7cba 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -2633,7 +2633,7 @@ agx_batch_init_state(struct agx_batch *batch) } /* Emit state on the batch that we don't change and so don't dirty track */ - uint8_t *out = batch->encoder_current; + uint8_t *out = batch->vdm.current; struct agx_ppp_update ppp = agx_new_ppp_update(&batch->pool, (struct AGX_PPP_HEADER){ .w_clamp = true, @@ -2652,7 +2652,7 @@ agx_batch_init_state(struct agx_batch *batch) /* clang-format on */ agx_ppp_fini(&out, &ppp); - batch->encoder_current = out; + batch->vdm.current = out; /* Mark it as initialized now, since agx_batch_writes() will check this. */ batch->initialized = true; @@ -3074,11 +3074,10 @@ agx_scissor_culls_everything(struct agx_context *ctx) } static void -agx_ensure_cmdbuf_has_space(struct agx_batch *batch, size_t space) +agx_ensure_vdm_cmdbuf_has_space(struct agx_batch *batch, size_t space) { /* Assert that we have space for a link tag */ - assert((batch->encoder_current + AGX_VDM_STREAM_LINK_LENGTH) <= - batch->encoder_end && + assert((batch->vdm.current + AGX_VDM_STREAM_LINK_LENGTH) <= batch->vdm.end && "Encoder overflowed"); /* Always leave room for a link tag, in case we run out of space later, @@ -3089,7 +3088,7 @@ agx_ensure_cmdbuf_has_space(struct agx_batch *batch, size_t space) space += AGX_VDM_STREAM_LINK_LENGTH + 0x800; /* If there is room in the command buffer, we're done */ - if (likely((batch->encoder_end - batch->encoder_current) >= space)) + if (likely((batch->vdm.end - batch->vdm.current) >= space)) return; /* Otherwise, we need to allocate a new command buffer. We use memory owned @@ -3099,14 +3098,14 @@ agx_ensure_cmdbuf_has_space(struct agx_batch *batch, size_t space) struct agx_ptr T = agx_pool_alloc_aligned(&batch->pool, size, 256); /* Jump from the old command buffer to the new command buffer */ - agx_pack(batch->encoder_current, VDM_STREAM_LINK, cfg) { + agx_pack(batch->vdm.current, VDM_STREAM_LINK, cfg) { cfg.target_lo = T.gpu & BITFIELD_MASK(32); cfg.target_hi = T.gpu >> 32; } /* Swap out the command buffer */ - batch->encoder_current = T.cpu; - batch->encoder_end = batch->encoder_current + size; + batch->vdm.current = T.cpu; + batch->vdm.end = batch->vdm.current + size; } static void @@ -3227,7 +3226,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, * We only need to do this once per draw as long as we conservatively * estimate the maximum bytes of VDM commands that this draw will emit. */ - agx_ensure_cmdbuf_has_space( + agx_ensure_vdm_cmdbuf_has_space( batch, (AGX_VDM_STATE_LENGTH * 2) + (AGX_PPP_STATE_LENGTH * MAX_PPP_UPDATES) + AGX_VDM_STATE_RESTART_INDEX_LENGTH + @@ -3240,7 +3239,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, AGX_INDEX_LIST_COUNT_LENGTH + AGX_INDEX_LIST_INSTANCES_LENGTH + AGX_INDEX_LIST_START_LENGTH + AGX_INDEX_LIST_BUFFER_SIZE_LENGTH); - uint8_t *out = agx_encode_state(batch, batch->encoder_current, + uint8_t *out = agx_encode_state(batch, batch->vdm.current, reduced_prim == MESA_PRIM_LINES, reduced_prim == MESA_PRIM_POINTS); @@ -3349,9 +3348,8 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, out += AGX_VDM_BARRIER_LENGTH; } - batch->encoder_current = out; - assert((batch->encoder_current + AGX_VDM_STREAM_LINK_LENGTH) <= - batch->encoder_end && + batch->vdm.current = out; + assert((batch->vdm.current + AGX_VDM_STREAM_LINK_LENGTH) <= batch->vdm.end && "Failed to reserve sufficient space in encoder"); agx_dirty_reset_graphics(ctx); @@ -3439,7 +3437,7 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) agx_upload_uniforms(batch); /* TODO: Ensure space if we allow multiple kernels in a batch */ - uint8_t *out = batch->encoder_current; + uint8_t *out = batch->cdm.current; agx_pack(out, CDM_HEADER, cfg) { if (info->indirect) @@ -3492,8 +3490,8 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) ; out += AGX_CDM_LAUNCH_LENGTH; - batch->encoder_current = out; - assert(batch->encoder_current <= batch->encoder_end && + batch->cdm.current = out; + assert(batch->cdm.current <= batch->cdm.end && "Failed to reserve sufficient space in encoder"); /* TODO: Dirty tracking? */ diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 554dcdf1586..8ea838458b0 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -261,6 +261,12 @@ uint16_t agx_sampler_heap_add(struct agx_device *dev, struct agx_sampler_heap *heap, struct agx_sampler_packed *sampler); +struct agx_encoder { + struct agx_bo *bo; + uint8_t *current; + uint8_t *end; +}; + struct agx_batch { struct agx_context *ctx; struct pipe_framebuffer_state key; @@ -305,9 +311,12 @@ struct agx_batch { } bo_list; struct agx_pool pool, pipeline_pool; - struct agx_bo *encoder; - uint8_t *encoder_current; - uint8_t *encoder_end; + + /* We may enqueue both CDM and VDM work, possibly to the same batch for + * geometry/tessellation. + */ + struct agx_encoder vdm; + struct agx_encoder cdm; /* Scissor and depth-bias descriptors, uploaded at GPU time */ struct util_dynarray scissor, depth_bias;