radv: no need to do gs_alloc_req for newer chips in ngg vs/tes
Copy from radeonsi. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17130>
This commit is contained in:
@@ -115,6 +115,7 @@ ac_nir_lower_indirect_derefs(nir_shader *shader,
|
|||||||
|
|
||||||
void
|
void
|
||||||
ac_nir_lower_ngg_nogs(nir_shader *shader,
|
ac_nir_lower_ngg_nogs(nir_shader *shader,
|
||||||
|
enum radeon_family family,
|
||||||
unsigned max_num_es_vertices,
|
unsigned max_num_es_vertices,
|
||||||
unsigned num_vertices_per_primitive,
|
unsigned num_vertices_per_primitive,
|
||||||
unsigned max_workgroup_size,
|
unsigned max_workgroup_size,
|
||||||
|
@@ -1359,6 +1359,7 @@ add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_c
|
|||||||
|
|
||||||
void
|
void
|
||||||
ac_nir_lower_ngg_nogs(nir_shader *shader,
|
ac_nir_lower_ngg_nogs(nir_shader *shader,
|
||||||
|
enum radeon_family family,
|
||||||
unsigned max_num_es_vertices,
|
unsigned max_num_es_vertices,
|
||||||
unsigned num_vertices_per_primitives,
|
unsigned num_vertices_per_primitives,
|
||||||
unsigned max_workgroup_size,
|
unsigned max_workgroup_size,
|
||||||
@@ -1422,14 +1423,17 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
|
|||||||
ngg_nogs_init_vertex_indices_vars(b, impl, &state);
|
ngg_nogs_init_vertex_indices_vars(b, impl, &state);
|
||||||
|
|
||||||
if (!can_cull) {
|
if (!can_cull) {
|
||||||
/* Allocate export space on wave 0 - confirm to the HW that we want to use all possible space */
|
/* Newer chips can use PRIMGEN_PASSTHRU_NO_MSG to skip gs_alloc_req for NGG passthrough. */
|
||||||
nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_load_subgroup_id(b), nir_imm_int(b, 0)));
|
if (!(passthrough && family >= CHIP_NAVI23)) {
|
||||||
{
|
/* Allocate export space on wave 0 - confirm to the HW that we want to use all possible space */
|
||||||
nir_ssa_def *vtx_cnt = nir_load_workgroup_num_input_vertices_amd(b);
|
nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_load_subgroup_id(b), nir_imm_int(b, 0)));
|
||||||
nir_ssa_def *prim_cnt = nir_load_workgroup_num_input_primitives_amd(b);
|
{
|
||||||
nir_alloc_vertices_and_primitives_amd(b, vtx_cnt, prim_cnt);
|
nir_ssa_def *vtx_cnt = nir_load_workgroup_num_input_vertices_amd(b);
|
||||||
|
nir_ssa_def *prim_cnt = nir_load_workgroup_num_input_primitives_amd(b);
|
||||||
|
nir_alloc_vertices_and_primitives_amd(b, vtx_cnt, prim_cnt);
|
||||||
|
}
|
||||||
|
nir_pop_if(b, if_wave_0);
|
||||||
}
|
}
|
||||||
nir_pop_if(b, if_wave_0);
|
|
||||||
|
|
||||||
/* Take care of early primitive export, otherwise just pack the primitive export argument */
|
/* Take care of early primitive export, otherwise just pack the primitive export argument */
|
||||||
if (state.early_prim_export)
|
if (state.early_prim_export)
|
||||||
|
@@ -1278,8 +1278,10 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx)
|
|||||||
|
|
||||||
/* TODO: primitive culling */
|
/* TODO: primitive culling */
|
||||||
|
|
||||||
ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx), ngg_get_vtx_cnt(ctx),
|
/* Newer chips can use PRIMGEN_PASSTHRU_NO_MSG to skip gs_alloc_req for NGG passthrough. */
|
||||||
ngg_get_prim_cnt(ctx));
|
if (!(ctx->shader_info->is_ngg_passthrough && ctx->ac.family >= CHIP_NAVI23))
|
||||||
|
ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx), ngg_get_vtx_cnt(ctx),
|
||||||
|
ngg_get_prim_cnt(ctx));
|
||||||
|
|
||||||
/* TODO: streamout queries */
|
/* TODO: streamout queries */
|
||||||
/* Export primitive data to the index buffer.
|
/* Export primitive data to the index buffer.
|
||||||
|
@@ -6524,8 +6524,11 @@ radv_pipeline_emit_vgt_shader_config(struct radeon_cmdbuf *ctx_cs,
|
|||||||
stages |= S_028B54_PRIMGEN_EN(1);
|
stages |= S_028B54_PRIMGEN_EN(1);
|
||||||
if (pipeline->streamout_shader)
|
if (pipeline->streamout_shader)
|
||||||
stages |= S_028B54_NGG_WAVE_ID_EN(1);
|
stages |= S_028B54_NGG_WAVE_ID_EN(1);
|
||||||
if (radv_pipeline_has_ngg_passthrough(pipeline))
|
if (radv_pipeline_has_ngg_passthrough(pipeline)) {
|
||||||
stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1);
|
stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1);
|
||||||
|
if (pdevice->rad_info.family >= CHIP_NAVI23)
|
||||||
|
stages |= S_028B54_PRIMGEN_PASSTHRU_NO_MSG(1);
|
||||||
|
}
|
||||||
} else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
|
} else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
|
||||||
stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
|
stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
|
||||||
}
|
}
|
||||||
|
@@ -1280,7 +1280,9 @@ void radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_
|
|||||||
export_prim_id = info->tes.outinfo.export_prim_id;
|
export_prim_id = info->tes.outinfo.export_prim_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, max_vtx_in, num_vertices_per_prim,
|
NIR_PASS_V(nir, ac_nir_lower_ngg_nogs,
|
||||||
|
device->physical_device->rad_info.family,
|
||||||
|
max_vtx_in, num_vertices_per_prim,
|
||||||
info->workgroup_size, info->wave_size, info->has_ngg_culling,
|
info->workgroup_size, info->wave_size, info->has_ngg_culling,
|
||||||
info->has_ngg_early_prim_export, info->is_ngg_passthrough, export_prim_id,
|
info->has_ngg_early_prim_export, info->is_ngg_passthrough, export_prim_id,
|
||||||
pl_key->vs.provoking_vtx_last, false, pl_key->primitives_generated_query,
|
pl_key->vs.provoking_vtx_last, false, pl_key->primitives_generated_query,
|
||||||
|
Reference in New Issue
Block a user