radv: move more MS info to gather_shader_info_ms()
Only the workgroup size computation remains at the same place, but I think it should be computed in a separate helper later. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18210>
This commit is contained in:

committed by
Marge Bot

parent
cae4eb2904
commit
45f04dae75
@@ -2055,57 +2055,6 @@ gfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
|
|||||||
S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1));
|
S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
gfx10_get_ngg_ms_info(struct radv_pipeline_stage *stage, struct gfx10_ngg_info *ngg)
|
|
||||||
{
|
|
||||||
/* Special case for mesh shader workgroups.
|
|
||||||
*
|
|
||||||
* Mesh shaders don't have any real vertex input, but they can produce
|
|
||||||
* an arbitrary number of vertices and primitives (up to 256).
|
|
||||||
* We need to precisely control the number of mesh shader workgroups
|
|
||||||
* that are launched from draw calls.
|
|
||||||
*
|
|
||||||
* To achieve that, we set:
|
|
||||||
* - input primitive topology to point list
|
|
||||||
* - input vertex and primitive count to 1
|
|
||||||
* - max output vertex count and primitive amplification factor
|
|
||||||
* to the boundaries of the shader
|
|
||||||
*
|
|
||||||
* With that, in the draw call:
|
|
||||||
* - drawing 1 input vertex ~ launching 1 mesh shader workgroup
|
|
||||||
*
|
|
||||||
* In the shader:
|
|
||||||
* - base vertex ~ first workgroup index (firstTask in NV_mesh_shader)
|
|
||||||
* - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D)
|
|
||||||
*
|
|
||||||
* Notes:
|
|
||||||
* - without GS_EN=1 PRIM_AMP_FACTOR and MAX_VERTS_PER_SUBGROUP don't seem to work
|
|
||||||
* - with GS_EN=1 we must also set VGT_GS_MAX_VERT_OUT (otherwise the GPU hangs)
|
|
||||||
* - with GS_FAST_LAUNCH=1 every lane's VGPRs are initialized to the same input vertex index
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
nir_shader *ms = stage->nir;
|
|
||||||
|
|
||||||
ngg->enable_vertex_grouping = true;
|
|
||||||
ngg->esgs_ring_size = 1;
|
|
||||||
ngg->hw_max_esverts = 1;
|
|
||||||
ngg->max_gsprims = 1;
|
|
||||||
ngg->max_out_verts = ms->info.mesh.max_vertices_out;
|
|
||||||
ngg->max_vert_out_per_gs_instance = false;
|
|
||||||
ngg->ngg_emit_size = 0;
|
|
||||||
ngg->prim_amp_factor = ms->info.mesh.max_primitives_out;
|
|
||||||
ngg->vgt_esgs_ring_itemsize = 1;
|
|
||||||
|
|
||||||
unsigned min_ngg_workgroup_size =
|
|
||||||
ac_compute_ngg_workgroup_size(ngg->hw_max_esverts, ngg->max_gsprims,
|
|
||||||
ngg->max_out_verts, ngg->prim_amp_factor);
|
|
||||||
|
|
||||||
unsigned api_workgroup_size =
|
|
||||||
ac_compute_cs_workgroup_size(ms->info.workgroup_size, false, UINT32_MAX);
|
|
||||||
|
|
||||||
stage->info.workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pipeline,
|
gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pipeline,
|
||||||
struct radv_pipeline_stage *stages, struct gfx10_ngg_info *ngg)
|
struct radv_pipeline_stage *stages, struct gfx10_ngg_info *ngg)
|
||||||
@@ -4624,9 +4573,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
|
|||||||
else
|
else
|
||||||
unreachable("Missing NGG shader stage.");
|
unreachable("Missing NGG shader stage.");
|
||||||
|
|
||||||
if (*last_vgt_api_stage == MESA_SHADER_MESH)
|
if (*last_vgt_api_stage != MESA_SHADER_MESH)
|
||||||
gfx10_get_ngg_ms_info(&stages[MESA_SHADER_MESH], ngg_info);
|
|
||||||
else
|
|
||||||
gfx10_get_ngg_info(pipeline_key, pipeline, stages, ngg_info);
|
gfx10_get_ngg_info(pipeline_key, pipeline, stages, ngg_info);
|
||||||
} else if (stages[MESA_SHADER_GEOMETRY].nir) {
|
} else if (stages[MESA_SHADER_GEOMETRY].nir) {
|
||||||
struct gfx9_gs_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info.gs_ring_info;
|
struct gfx9_gs_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info.gs_ring_info;
|
||||||
|
@@ -414,7 +414,54 @@ gather_shader_info_gs(const nir_shader *nir, struct radv_shader_info *info)
|
|||||||
static void
|
static void
|
||||||
gather_shader_info_mesh(const nir_shader *nir, struct radv_shader_info *info)
|
gather_shader_info_mesh(const nir_shader *nir, struct radv_shader_info *info)
|
||||||
{
|
{
|
||||||
|
struct gfx10_ngg_info *ngg_info = &info->ngg_info;
|
||||||
|
|
||||||
info->ms.output_prim = nir->info.mesh.primitive_type;
|
info->ms.output_prim = nir->info.mesh.primitive_type;
|
||||||
|
|
||||||
|
/* Special case for mesh shader workgroups.
|
||||||
|
*
|
||||||
|
* Mesh shaders don't have any real vertex input, but they can produce
|
||||||
|
* an arbitrary number of vertices and primitives (up to 256).
|
||||||
|
* We need to precisely control the number of mesh shader workgroups
|
||||||
|
* that are launched from draw calls.
|
||||||
|
*
|
||||||
|
* To achieve that, we set:
|
||||||
|
* - input primitive topology to point list
|
||||||
|
* - input vertex and primitive count to 1
|
||||||
|
* - max output vertex count and primitive amplification factor
|
||||||
|
* to the boundaries of the shader
|
||||||
|
*
|
||||||
|
* With that, in the draw call:
|
||||||
|
* - drawing 1 input vertex ~ launching 1 mesh shader workgroup
|
||||||
|
*
|
||||||
|
* In the shader:
|
||||||
|
* - base vertex ~ first workgroup index (firstTask in NV_mesh_shader)
|
||||||
|
* - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D)
|
||||||
|
*
|
||||||
|
* Notes:
|
||||||
|
* - without GS_EN=1 PRIM_AMP_FACTOR and MAX_VERTS_PER_SUBGROUP don't seem to work
|
||||||
|
* - with GS_EN=1 we must also set VGT_GS_MAX_VERT_OUT (otherwise the GPU hangs)
|
||||||
|
* - with GS_FAST_LAUNCH=1 every lane's VGPRs are initialized to the same input vertex index
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
ngg_info->enable_vertex_grouping = true;
|
||||||
|
ngg_info->esgs_ring_size = 1;
|
||||||
|
ngg_info->hw_max_esverts = 1;
|
||||||
|
ngg_info->max_gsprims = 1;
|
||||||
|
ngg_info->max_out_verts = nir->info.mesh.max_vertices_out;
|
||||||
|
ngg_info->max_vert_out_per_gs_instance = false;
|
||||||
|
ngg_info->ngg_emit_size = 0;
|
||||||
|
ngg_info->prim_amp_factor = nir->info.mesh.max_primitives_out;
|
||||||
|
ngg_info->vgt_esgs_ring_itemsize = 1;
|
||||||
|
|
||||||
|
unsigned min_ngg_workgroup_size =
|
||||||
|
ac_compute_ngg_workgroup_size(ngg_info->hw_max_esverts, ngg_info->max_gsprims,
|
||||||
|
ngg_info->max_out_verts, ngg_info->prim_amp_factor);
|
||||||
|
|
||||||
|
unsigned api_workgroup_size =
|
||||||
|
ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
|
||||||
|
|
||||||
|
info->workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
Reference in New Issue
Block a user