radv: move more MS info to gather_shader_info_ms()

Only the workgroup size computation remains at the same place, but I think it should be computed in a separate helper later. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18210>
2022-08-23 11:42:49 +02:00
parent cae4eb2904
commit 45f04dae75
2 changed files with 48 additions and 54 deletions
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2055,57 +2055,6 @@ gfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
      S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1));
 }
 static void
 gfx10_get_ngg_ms_info(struct radv_pipeline_stage *stage, struct gfx10_ngg_info *ngg)
 {
   /* Special case for mesh shader workgroups.
    *
    * Mesh shaders don't have any real vertex input, but they can produce
    * an arbitrary number of vertices and primitives (up to 256).
    * We need to precisely control the number of mesh shader workgroups
    * that are launched from draw calls.
    *
    * To achieve that, we set:
    * - input primitive topology to point list
    * - input vertex and primitive count to 1
    * - max output vertex count and primitive amplification factor
    *   to the boundaries of the shader
    *
    * With that, in the draw call:
    * - drawing 1 input vertex ~ launching 1 mesh shader workgroup
    *
    * In the shader:
    * - base vertex ~ first workgroup index (firstTask in NV_mesh_shader)
    * - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D)
    *
    * Notes:
    * - without GS_EN=1 PRIM_AMP_FACTOR and MAX_VERTS_PER_SUBGROUP don't seem to work
    * - with GS_EN=1 we must also set VGT_GS_MAX_VERT_OUT (otherwise the GPU hangs)
    * - with GS_FAST_LAUNCH=1 every lane's VGPRs are initialized to the same input vertex index
    *
    */
   nir_shader *ms = stage->nir;
   ngg->enable_vertex_grouping = true;
   ngg->esgs_ring_size = 1;
   ngg->hw_max_esverts = 1;
   ngg->max_gsprims = 1;
   ngg->max_out_verts = ms->info.mesh.max_vertices_out;
   ngg->max_vert_out_per_gs_instance = false;
   ngg->ngg_emit_size = 0;
   ngg->prim_amp_factor = ms->info.mesh.max_primitives_out;
   ngg->vgt_esgs_ring_itemsize = 1;
   unsigned min_ngg_workgroup_size =
      ac_compute_ngg_workgroup_size(ngg->hw_max_esverts, ngg->max_gsprims,
                                    ngg->max_out_verts, ngg->prim_amp_factor);
   unsigned api_workgroup_size =
      ac_compute_cs_workgroup_size(ms->info.workgroup_size, false, UINT32_MAX);
   stage->info.workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size);
 }
 static void
 gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pipeline,
                   struct radv_pipeline_stage *stages, struct gfx10_ngg_info *ngg)
@@ -4624,9 +4573,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
      else
         unreachable("Missing NGG shader stage.");
-      if (*last_vgt_api_stage == MESA_SHADER_MESH)
+      if (*last_vgt_api_stage != MESA_SHADER_MESH)
         gfx10_get_ngg_ms_info(&stages[MESA_SHADER_MESH], ngg_info);
      else
         gfx10_get_ngg_info(pipeline_key, pipeline, stages, ngg_info);
   } else if (stages[MESA_SHADER_GEOMETRY].nir) {
      struct gfx9_gs_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info.gs_ring_info;
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -414,7 +414,54 @@ gather_shader_info_gs(const nir_shader *nir, struct radv_shader_info *info)
 static void
 gather_shader_info_mesh(const nir_shader *nir, struct radv_shader_info *info)
 {
   struct gfx10_ngg_info *ngg_info = &info->ngg_info;
   info->ms.output_prim = nir->info.mesh.primitive_type;
   /* Special case for mesh shader workgroups.
    *
    * Mesh shaders don't have any real vertex input, but they can produce
    * an arbitrary number of vertices and primitives (up to 256).
    * We need to precisely control the number of mesh shader workgroups
    * that are launched from draw calls.
    *
    * To achieve that, we set:
    * - input primitive topology to point list
    * - input vertex and primitive count to 1
    * - max output vertex count and primitive amplification factor
    *   to the boundaries of the shader
    *
    * With that, in the draw call:
    * - drawing 1 input vertex ~ launching 1 mesh shader workgroup
    *
    * In the shader:
    * - base vertex ~ first workgroup index (firstTask in NV_mesh_shader)
    * - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D)
    *
    * Notes:
    * - without GS_EN=1 PRIM_AMP_FACTOR and MAX_VERTS_PER_SUBGROUP don't seem to work
    * - with GS_EN=1 we must also set VGT_GS_MAX_VERT_OUT (otherwise the GPU hangs)
    * - with GS_FAST_LAUNCH=1 every lane's VGPRs are initialized to the same input vertex index
    *
    */
   ngg_info->enable_vertex_grouping = true;
   ngg_info->esgs_ring_size = 1;
   ngg_info->hw_max_esverts = 1;
   ngg_info->max_gsprims = 1;
   ngg_info->max_out_verts = nir->info.mesh.max_vertices_out;
   ngg_info->max_vert_out_per_gs_instance = false;
   ngg_info->ngg_emit_size = 0;
   ngg_info->prim_amp_factor = nir->info.mesh.max_primitives_out;
   ngg_info->vgt_esgs_ring_itemsize = 1;
   unsigned min_ngg_workgroup_size =
      ac_compute_ngg_workgroup_size(ngg_info->hw_max_esverts, ngg_info->max_gsprims,
                                    ngg_info->max_out_verts, ngg_info->prim_amp_factor);
   unsigned api_workgroup_size =
      ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
   info->workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size);
 }
 static void