asahi: make GS inputs explicit

we don't want to assume VS->GS, since we want to reuse the root uniforms across the whole draw with honeykrisp tess+gs. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30382>
2024-07-18 13:27:31 -04:00
parent f6e9e7d3ce
commit 9595d79b89
4 changed files with 38 additions and 5 deletions
--- a/src/asahi/lib/agx_nir_lower_gs.c
+++ b/src/asahi/lib/agx_nir_lower_gs.c
@@ -209,9 +209,21 @@ agx_load_per_vertex_input(nir_builder *b, nir_intrinsic_instr *intr,
   assert(intr->intrinsic == nir_intrinsic_load_per_vertex_input);
   nir_io_semantics sem = nir_intrinsic_io_semantics(intr);

-   nir_def *addr = libagx_vertex_output_address(
-      b, nir_load_vs_output_buffer_agx(b), nir_load_vs_outputs_agx(b), vertex,
-      nir_iadd_imm(b, intr->src[1].ssa, sem.location));
+   nir_def *location = nir_iadd_imm(b, intr->src[1].ssa, sem.location);
+   nir_def *addr;
+
+   if (b->shader->info.stage == MESA_SHADER_GEOMETRY) {
+      /* GS may be preceded by VS or TES so specified as param */
+      addr = libagx_geometry_input_address(
+         b, nir_load_geometry_param_buffer_agx(b), vertex, location);
+   } else {
+      assert(b->shader->info.stage == MESA_SHADER_TESS_CTRL);
+
+      /* TCS always preceded by VS so we use the VS state directly */
+      addr = libagx_vertex_output_address(b, nir_load_vs_output_buffer_agx(b),
+                                          nir_load_vs_outputs_agx(b), vertex,
+                                          location);
+   }

   addr = nir_iadd_imm(b, addr, 4 * nir_intrinsic_component(intr));
   return nir_load_global_constant(b, addr, 4, intr->def.num_components,
--- a/src/asahi/lib/shaders/geometry.cl
+++ b/src/asahi/lib/shaders/geometry.cl
@@ -544,9 +544,12 @@ libagx_gs_setup_indirect(global struct agx_gs_setup_indirect_params *gsi,
   state->heap_bottom +=
      align(p->input_primitives * p->count_buffer_stride, 16);

-   *(gsi->vertex_buffer) = (uintptr_t)(state->heap + state->heap_bottom);
+   p->input_buffer = (uintptr_t)(state->heap + state->heap_bottom);
+   *(gsi->vertex_buffer) = p->input_buffer;
   state->heap_bottom += align(vertex_buffer_size, 4);

+   p->input_mask = gsi->vs_outputs;
+
   if (state->heap_bottom > state->heap_size) {
      global uint *foo = (global uint *)(uintptr_t)0x1deadbeef;
      *foo = 0x1234;
@@ -665,6 +668,14 @@ libagx_vertex_output_address(uintptr_t buffer, uint64_t mask, uint vtx,
   return buffer + libagx_tcs_in_offs(vtx, location, mask);
 }

+uintptr_t
+libagx_geometry_input_address(constant struct agx_geometry_params *p, uint vtx,
+                              gl_varying_slot location)
+{
+   return libagx_vertex_output_address(p->input_buffer, p->input_mask, vtx,
+                                       location);
+}
+
 unsigned
 libagx_input_vertices(constant struct agx_ia_state *ia)
 {
--- a/src/asahi/lib/shaders/geometry.h
+++ b/src/asahi/lib/shaders/geometry.h
@@ -163,6 +163,13 @@ struct agx_geometry_params {
    */
   GLOBAL(uchar) xfb_base[MAX_SO_BUFFERS];

+   /* Address and present mask for the input to the geometry shader. These will
+    * reflect the vertex shader for VS->GS or instead the tessellation
+    * evaluation shader for TES->GS.
+    */
+   uint64_t input_buffer;
+   uint64_t input_mask;
+
   /* Location-indexed mask of flat outputs, used for lowering GL edge flags. */
   uint64_t flat_outputs;

@@ -201,7 +208,7 @@ struct agx_geometry_params {
    */
   uint32_t input_topology;
 } PACKED;
-AGX_STATIC_ASSERT(sizeof(struct agx_geometry_params) == 78 * 4);
+AGX_STATIC_ASSERT(sizeof(struct agx_geometry_params) == 82 * 4);

 /* TCS shared memory layout:
 *
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@@ -4003,6 +4003,7 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
    */
   unsigned count_buffer_stride = batch->ctx->gs->gs_count_words * 4;
   batch->uniforms.vertex_outputs = batch->ctx->vs->b.info.outputs;
+   params.input_mask = batch->uniforms.vertex_outputs;

   if (indirect) {
      params.count_buffer_stride = count_buffer_stride;
@@ -4032,6 +4033,8 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
         uint64_t addr = agx_pool_alloc_aligned(&batch->pool, vb_size, 4).gpu;
         batch->uniforms.vertex_output_buffer_ptr =
            agx_pool_upload(&batch->pool, &addr, 8);
+
+         params.input_buffer = addr;
      }
   }