ac,radeonsi: emulate GS primitive pipeline stat on gfx11 because of culling

GS culls too, so the pipeline stat is incorrect. This can be exposed by
forcing monolithic shader use, which makes culling shaders immediately
available for tests to use.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26917>
This commit is contained in:
Marek Olšák
2024-01-01 19:24:34 -05:00
committed by Marge Bot
parent 1d3f937142
commit b9b00a0e7a
7 changed files with 32 additions and 10 deletions

View File

@@ -898,7 +898,8 @@ ac_nir_accum_ior(nir_builder *b, nir_def *accum_result, nir_def *new_term)
bool
ac_nir_gs_shader_query(nir_builder *b,
bool has_gen_prim_query,
bool has_pipeline_stats_query,
bool has_gs_invocations_query,
bool has_gs_primitives_query,
unsigned num_vertices_per_primitive,
unsigned wave_size,
nir_def *vertex_count[4],
@@ -913,7 +914,7 @@ ac_nir_gs_shader_query(nir_builder *b,
any_query_enabled = ac_nir_accum_ior(b, any_query_enabled, prim_gen_query_enabled);
}
if (has_pipeline_stats_query) {
if (has_gs_invocations_query || has_gs_primitives_query) {
pipeline_query_enabled = nir_load_pipeline_stat_query_enabled_amd(b);
any_query_enabled = ac_nir_accum_ior(b, any_query_enabled, pipeline_query_enabled);
}
@@ -959,7 +960,7 @@ ac_nir_gs_shader_query(nir_builder *b,
/* Store the query result to query result using an atomic add. */
nir_if *if_first_lane = nir_push_if(b, nir_elect(b, 1));
{
if (has_pipeline_stats_query) {
if (has_gs_invocations_query || has_gs_primitives_query) {
nir_if *if_pipeline_query = nir_push_if(b, pipeline_query_enabled);
{
nir_def *count = NULL;
@@ -974,10 +975,11 @@ ac_nir_gs_shader_query(nir_builder *b,
}
}
if (count)
if (has_gs_primitives_query && count)
nir_atomic_add_gs_emit_prim_count_amd(b, count);
nir_atomic_add_shader_invocation_count_amd(b, num_active_threads);
if (has_gs_invocations_query)
nir_atomic_add_shader_invocation_count_amd(b, num_active_threads);
}
nir_pop_if(b, if_pipeline_query);
}
@@ -1237,6 +1239,7 @@ ac_nir_lower_legacy_gs(nir_shader *nir,
bool progress = ac_nir_gs_shader_query(b,
has_gen_prim_query,
has_pipeline_stats_query,
has_pipeline_stats_query,
num_vertices_per_primitive,
64,
s.vertex_count,

View File

@@ -166,6 +166,8 @@ typedef struct {
bool disable_streamout;
bool has_gen_prim_query;
bool has_xfb_prim_query;
bool has_gs_invocations_query;
bool has_gs_primitives_query;
bool kill_pointsize;
bool kill_layer;
bool force_vrs;
@@ -268,7 +270,8 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
bool
ac_nir_gs_shader_query(nir_builder *b,
bool has_gen_prim_query,
bool has_pipeline_stats_query,
bool has_gs_invocations_query,
bool has_gs_primitives_query,
unsigned num_vertices_per_primitive,
unsigned wave_size,
nir_def *vertex_count[4],

View File

@@ -3573,7 +3573,8 @@ ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options)
b->cursor = nir_after_cf_list(&if_gs_thread->then_list);
ac_nir_gs_shader_query(b,
state.options->has_gen_prim_query,
state.options->gfx_level < GFX11,
state.options->has_gs_invocations_query,
state.options->has_gs_primitives_query,
state.num_vertices_per_primitive,
state.options->wave_size,
state.vertex_count,

View File

@@ -856,6 +856,8 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
options.disable_streamout = !device->physical_device->use_ngg_streamout;
options.has_gen_prim_query = info->has_prim_query;
options.has_xfb_prim_query = info->has_xfb_query;
options.has_gs_invocations_query = device->physical_device->rad_info.gfx_level < GFX11;
options.has_gs_primitives_query = device->physical_device->rad_info.gfx_level < GFX11;
options.force_vrs = info->force_vrs_per_vertex;
if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) {

View File

@@ -470,12 +470,17 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
}
case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
case nir_intrinsic_atomic_add_shader_invocation_count_amd: {
nir_def *buf =
si_nir_load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF, 4);
enum pipe_statistics_query_index index =
intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd ?
PIPE_STAT_QUERY_GS_PRIMITIVES : PIPE_STAT_QUERY_GS_INVOCATIONS;
/* GFX11 only needs to emulate PIPE_STAT_QUERY_GS_PRIMITIVES because GS culls,
* which makes the pipeline statistic incorrect.
*/
assert(sel->screen->info.gfx_level < GFX11 || index == PIPE_STAT_QUERY_GS_PRIMITIVES);
nir_def *buf =
si_nir_load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF, 4);
unsigned offset = si_query_pipestat_end_dw_offset(sel->screen, index) * 4;
nir_def *count = intrin->src[0].ssa;

View File

@@ -693,6 +693,12 @@ static struct pipe_query *si_query_hw_create(struct si_screen *sscreen, unsigned
if ((index == PIPE_STAT_QUERY_GS_PRIMITIVES || index == PIPE_STAT_QUERY_GS_INVOCATIONS) &&
sscreen->use_ngg && (sscreen->info.gfx_level >= GFX10 && sscreen->info.gfx_level <= GFX10_3))
query->flags |= SI_QUERY_EMULATE_GS_COUNTERS;
/* GFX11 only emulates PIPE_STAT_QUERY_GS_PRIMITIVES because the shader culls,
* which makes the statistic incorrect.
*/
if (sscreen->info.gfx_level >= GFX11 && index == PIPE_STAT_QUERY_GS_PRIMITIVES)
query->flags |= SI_QUERY_EMULATE_GS_COUNTERS;
break;
default:
assert(0);

View File

@@ -1976,6 +1976,8 @@ static void si_lower_ngg(struct si_shader *shader, nir_shader *nir)
options.gs_out_vtx_bytes = sel->info.gsvs_vertex_size;
options.has_gen_prim_query = options.has_xfb_prim_query =
sel->screen->info.gfx_level >= GFX11;
options.has_gs_invocations_query = sel->screen->info.gfx_level < GFX11;
options.has_gs_primitives_query = true;
/* For monolithic ES/GS to add vscnt wait when GS export pos0. */
if (key->ge.part.gs.es)