radv: move more MS info to gather_shader_info_ms()

Only the workgroup size computation remains at the same place, but I
think it should be computed in a separate helper later.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18210>
This commit is contained in:
Samuel Pitoiset
2022-08-23 11:42:49 +02:00
committed by Marge Bot
parent cae4eb2904
commit 45f04dae75
2 changed files with 48 additions and 54 deletions

View File

@@ -2055,57 +2055,6 @@ gfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1)); S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1));
} }
static void
gfx10_get_ngg_ms_info(struct radv_pipeline_stage *stage, struct gfx10_ngg_info *ngg)
{
/* Special case for mesh shader workgroups.
*
* Mesh shaders don't have any real vertex input, but they can produce
* an arbitrary number of vertices and primitives (up to 256).
* We need to precisely control the number of mesh shader workgroups
* that are launched from draw calls.
*
* To achieve that, we set:
* - input primitive topology to point list
* - input vertex and primitive count to 1
* - max output vertex count and primitive amplification factor
* to the boundaries of the shader
*
* With that, in the draw call:
* - drawing 1 input vertex ~ launching 1 mesh shader workgroup
*
* In the shader:
* - base vertex ~ first workgroup index (firstTask in NV_mesh_shader)
* - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D)
*
* Notes:
* - without GS_EN=1 PRIM_AMP_FACTOR and MAX_VERTS_PER_SUBGROUP don't seem to work
* - with GS_EN=1 we must also set VGT_GS_MAX_VERT_OUT (otherwise the GPU hangs)
* - with GS_FAST_LAUNCH=1 every lane's VGPRs are initialized to the same input vertex index
*
*/
nir_shader *ms = stage->nir;
ngg->enable_vertex_grouping = true;
ngg->esgs_ring_size = 1;
ngg->hw_max_esverts = 1;
ngg->max_gsprims = 1;
ngg->max_out_verts = ms->info.mesh.max_vertices_out;
ngg->max_vert_out_per_gs_instance = false;
ngg->ngg_emit_size = 0;
ngg->prim_amp_factor = ms->info.mesh.max_primitives_out;
ngg->vgt_esgs_ring_itemsize = 1;
unsigned min_ngg_workgroup_size =
ac_compute_ngg_workgroup_size(ngg->hw_max_esverts, ngg->max_gsprims,
ngg->max_out_verts, ngg->prim_amp_factor);
unsigned api_workgroup_size =
ac_compute_cs_workgroup_size(ms->info.workgroup_size, false, UINT32_MAX);
stage->info.workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size);
}
static void static void
gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pipeline, gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pipeline,
struct radv_pipeline_stage *stages, struct gfx10_ngg_info *ngg) struct radv_pipeline_stage *stages, struct gfx10_ngg_info *ngg)
@@ -4624,9 +4573,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
else else
unreachable("Missing NGG shader stage."); unreachable("Missing NGG shader stage.");
if (*last_vgt_api_stage == MESA_SHADER_MESH) if (*last_vgt_api_stage != MESA_SHADER_MESH)
gfx10_get_ngg_ms_info(&stages[MESA_SHADER_MESH], ngg_info);
else
gfx10_get_ngg_info(pipeline_key, pipeline, stages, ngg_info); gfx10_get_ngg_info(pipeline_key, pipeline, stages, ngg_info);
} else if (stages[MESA_SHADER_GEOMETRY].nir) { } else if (stages[MESA_SHADER_GEOMETRY].nir) {
struct gfx9_gs_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info.gs_ring_info; struct gfx9_gs_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info.gs_ring_info;

View File

@@ -414,7 +414,54 @@ gather_shader_info_gs(const nir_shader *nir, struct radv_shader_info *info)
static void static void
gather_shader_info_mesh(const nir_shader *nir, struct radv_shader_info *info) gather_shader_info_mesh(const nir_shader *nir, struct radv_shader_info *info)
{ {
struct gfx10_ngg_info *ngg_info = &info->ngg_info;
info->ms.output_prim = nir->info.mesh.primitive_type; info->ms.output_prim = nir->info.mesh.primitive_type;
/* Special case for mesh shader workgroups.
*
* Mesh shaders don't have any real vertex input, but they can produce
* an arbitrary number of vertices and primitives (up to 256).
* We need to precisely control the number of mesh shader workgroups
* that are launched from draw calls.
*
* To achieve that, we set:
* - input primitive topology to point list
* - input vertex and primitive count to 1
* - max output vertex count and primitive amplification factor
* to the boundaries of the shader
*
* With that, in the draw call:
* - drawing 1 input vertex ~ launching 1 mesh shader workgroup
*
* In the shader:
* - base vertex ~ first workgroup index (firstTask in NV_mesh_shader)
* - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D)
*
* Notes:
* - without GS_EN=1 PRIM_AMP_FACTOR and MAX_VERTS_PER_SUBGROUP don't seem to work
* - with GS_EN=1 we must also set VGT_GS_MAX_VERT_OUT (otherwise the GPU hangs)
* - with GS_FAST_LAUNCH=1 every lane's VGPRs are initialized to the same input vertex index
*
*/
ngg_info->enable_vertex_grouping = true;
ngg_info->esgs_ring_size = 1;
ngg_info->hw_max_esverts = 1;
ngg_info->max_gsprims = 1;
ngg_info->max_out_verts = nir->info.mesh.max_vertices_out;
ngg_info->max_vert_out_per_gs_instance = false;
ngg_info->ngg_emit_size = 0;
ngg_info->prim_amp_factor = nir->info.mesh.max_primitives_out;
ngg_info->vgt_esgs_ring_itemsize = 1;
unsigned min_ngg_workgroup_size =
ac_compute_ngg_workgroup_size(ngg_info->hw_max_esverts, ngg_info->max_gsprims,
ngg_info->max_out_verts, ngg_info->prim_amp_factor);
unsigned api_workgroup_size =
ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
info->workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size);
} }
static void static void