ac/nir/ngg: export positions after streamout to improve performance

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32686>
This commit is contained in:
Marek Olšák
2024-12-17 09:54:48 -05:00
committed by Marge Bot
parent fc73749d6c
commit 58a88bbdb9

View File

@@ -2580,10 +2580,6 @@ nogs_export_vertex_params(nir_builder *b, nir_function_impl *impl,
if (s->options->gfx_level >= GFX11) {
/* Export varyings for GFX11+ */
b->cursor = nir_after_cf_node(&if_es_thread->cf_node);
create_output_phis(b, b->shader->info.outputs_written, b->shader->info.outputs_written_16bit, &s->out);
b->cursor = nir_after_impl(impl);
if (!num_es_threads)
num_es_threads = nir_load_merged_wave_info_amd(b);
@@ -2812,7 +2808,27 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
if (wait_attr_ring)
export_outputs &= ~VARYING_BIT_POS;
bool phis_created = false;
/* Add position exports.
*
* If streamout is enabled, export positions after streamout. This increases streamout performance
* for up to 4 vec4 xfb outputs on GFX12 because the streamout code doesn't have go through
* the export allocation bottleneck. Adding more xfb outputs starts to be limited by the memory
* bandwidth.
*/
nir_if *if_pos_exports = NULL;
if (state.streamout_enabled) {
b->cursor = nir_after_cf_node(&if_es_thread->cf_node);
create_output_phis(b, b->shader->info.outputs_written, b->shader->info.outputs_written_16bit,
&state.out);
phis_created = true;
b->cursor = nir_after_impl(impl);
if_pos_exports = nir_push_if(b, es_thread);
} else {
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
}
ac_nir_export_position(b, options->gfx_level,
options->clip_cull_dist_mask,
@@ -2820,6 +2836,16 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option
options->force_vrs, !wait_attr_ring,
export_outputs, &state.out, NULL);
if (if_pos_exports)
nir_pop_if(b, if_pos_exports);
if (options->has_param_exports && options->gfx_level >= GFX11 && !phis_created) {
b->cursor = nir_after_cf_node(&if_es_thread->cf_node);
create_output_phis(b, b->shader->info.outputs_written, b->shader->info.outputs_written_16bit,
&state.out);
}
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
nogs_export_vertex_params(b, impl, if_es_thread, num_es_threads, &state);
if (wait_attr_ring)