diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index a82457b5bc0..b20df25785a 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -12,6 +12,7 @@ #include "nir/nir.h" #include "nir/nir_builder.h" #include "nir/nir_serialize.h" +#include "nir/nir_xfb_info.h" #include "nir/radv_nir.h" #include "spirv/nir_spirv.h" #include "util/disk_cache.h" @@ -22,6 +23,7 @@ #include "radv_debug.h" #include "radv_entrypoints.h" #include "radv_formats.h" +#include "radv_physical_device.h" #include "radv_pipeline_cache.h" #include "radv_rmv.h" #include "radv_shader.h" @@ -1519,6 +1521,141 @@ radv_graphics_shaders_link(const struct radv_device *device, const struct radv_g } } +/** + * Fist pass of varying optimization. + * This function is called for each shader pair from first to last. + * + * 1. Run some NIR passes in preparation. + * 2. Optimize varyings. + * 3. If either shader changed, run algebraic optimizations. + */ +static void +radv_graphics_shaders_link_varyings_first(struct radv_shader_stage *producer_stage, + struct radv_shader_stage *consumer_stage) +{ + nir_shader *producer = producer_stage->nir; + nir_shader *consumer = consumer_stage->nir; + + /* It is expected by nir_opt_varyings that no undefined stores are present in the shader. */ + NIR_PASS(_, producer, nir_opt_undef); + + /* Update load/store alignments because inter-stage code motion may move instructions used to deduce this info. */ + NIR_PASS(_, consumer, nir_opt_load_store_update_alignments); + + /* Scalarize all I/O, because nir_opt_varyings and nir_opt_vectorize_io expect all I/O to be scalarized. */ + NIR_PASS(_, producer, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL); + NIR_PASS(_, consumer, nir_lower_io_to_scalar, nir_var_shader_in, NULL, NULL); + + /* Eliminate useless vec->mov copies resulting from scalarization. */ + NIR_PASS(_, producer, nir_copy_prop); + + const nir_opt_varyings_progress p = nir_opt_varyings(producer, consumer, true, 0, 0); + + /* Run algebraic optimizations on shaders that changed. */ + if (p & nir_progress_producer) { + radv_optimize_nir_algebraic(producer, false, false); + } + if (p & nir_progress_consumer) { + radv_optimize_nir_algebraic(consumer, false, false); + } +} + +/** + * Second pass of varying optimization. + * This function is called for each shader pair from last to fist, + * after the first pass had already been called for each pair. + * Done because the previous pass might have enabled additional + * opportunities for optimization. + * + * 1. Optimize varyings again. + * 2. If either shader changed, run algebraic optimizations. + * 3. Run some NIR passes to clean up the shaders. + */ +static void +radv_graphics_shaders_link_varyings_second(struct radv_shader_stage *producer_stage, + struct radv_shader_stage *consumer_stage) +{ + nir_shader *producer = producer_stage->nir; + nir_shader *consumer = consumer_stage->nir; + + const nir_opt_varyings_progress p = nir_opt_varyings(producer, consumer, true, 0, 0); + + /* Run algebraic optimizations on shaders that changed. */ + if (p & nir_progress_producer) { + radv_optimize_nir_algebraic(producer, true, false); + } + if (p & nir_progress_consumer) { + radv_optimize_nir_algebraic(consumer, true, false); + } + + /* Re-vectorize I/O for stages that output to memory (LDS or VRAM). + * Don't vectorize FS inputs, doing so just regresses shader stats without any benefit. + * There is also no benefit from re-vectorizing the outputs of the last pre-rasterization + * stage here, because ac_nir_lower_ngg/legacy already takes care of that. + */ + if (consumer->info.stage != MESA_SHADER_FRAGMENT) { + NIR_PASS(_, producer, nir_opt_vectorize_io, nir_var_shader_out); + NIR_PASS(_, consumer, nir_opt_vectorize_io, nir_var_shader_in); + } + + /* Recompute driver locations of PS inputs + * because the backend compiler relies on their driver locations. + */ + if (consumer->info.stage == MESA_SHADER_FRAGMENT) + nir_recompute_io_bases(consumer, nir_var_shader_in); + + /* Gather shader info; at least the I/O info likely changed + * and changes to only the I/O info are not reflected in nir_opt_varyings_progress. + */ + nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer)); + nir_shader_gather_info(consumer, nir_shader_get_entrypoint(consumer)); + + /* Recreate XFB info from intrinsics (nir_opt_varyings may have changed it). */ + if (producer->xfb_info) { + nir_gather_xfb_info_from_intrinsics(producer); + } +} + +/** + * Varying optimizations performed on lowered shader I/O. + * + * We do this after lowering shader I/O because this is more effective + * than running the same optimizations on I/O derefs. + */ +static void +radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages) +{ + /* Optimize varyings from first to last stage. */ + gl_shader_stage prev = MESA_SHADER_NONE; + for (int i = 0; i < ARRAY_SIZE(graphics_shader_order); ++i) { + gl_shader_stage s = graphics_shader_order[i]; + if (!stages[s].nir) + continue; + + if (prev != MESA_SHADER_NONE) { + if (!stages[prev].key.optimisations_disabled && !stages[s].key.optimisations_disabled) + radv_graphics_shaders_link_varyings_first(&stages[prev], &stages[s]); + } + + prev = s; + } + + /* Optimize varyings from last to first stage. */ + gl_shader_stage next = MESA_SHADER_NONE; + for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; --i) { + gl_shader_stage s = graphics_shader_order[i]; + if (!stages[s].nir) + continue; + + if (next != MESA_SHADER_NONE) { + if (!stages[s].key.optimisations_disabled && !stages[next].key.optimisations_disabled) + radv_graphics_shaders_link_varyings_second(&stages[s], &stages[next]); + } + + next = s; + } +} + struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state) { @@ -2518,6 +2655,9 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac radv_nir_remap_color_attachment(stages[MESA_SHADER_FRAGMENT].nir, gfx_state); } + /* Optimize varyings on lowered shader I/O (more efficient than optimizing I/O derefs). */ + radv_graphics_shaders_link_varyings(stages); + radv_fill_shader_info(device, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages); radv_declare_pipeline_args(device, stages, gfx_state, active_nir_stages); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index da26148b4b4..845fcc5013a 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -64,6 +64,8 @@ get_nir_options_for_stage(struct radv_physical_device *pdev, gl_shader_stage sta options->max_unroll_iterations_aggressive = 128; options->lower_doubles_options = nir_lower_drcp | nir_lower_dsqrt | nir_lower_drsq | nir_lower_ddiv; options->io_options |= nir_io_mediump_is_32bit; + options->varying_estimate_instr_cost = ac_nir_varying_estimate_instr_cost; + options->varying_expression_max_cost = ac_nir_varying_expression_max_cost; } void