radv: Add new linking step and use nir_opt_varyings.
The nir_opt_varyings pass is the new NIR solution for shader linking, with new features including better I/O compaction, packing 16-bit I/O, inter-stage code motion and more. Fossil DB stats on Rembrandt: Totals from 34585 (43.56% of 79395) affected shaders: MaxWaves: 873362 -> 873260 (-0.01%); split: +0.11%, -0.12% Instrs: 21543639 -> 21526956 (-0.08%); split: -0.27%, +0.19% CodeSize: 115077568 -> 115015536 (-0.05%); split: -0.25%, +0.20% VGPRs: 1465152 -> 1464192 (-0.07%); split: -0.29%, +0.22% Inputs: 161776 -> 158711 (-1.89%); split: -1.90%, +0.00% Outputs: 46532551993 -> 46532548680 (-0.00%); split: -0.00%, +0.00% LDS: 70597120 -> 70794752 (+0.28%); split: -0.04%, +0.32% Latency: 162963576 -> 162785055 (-0.11%); split: -0.25%, +0.14% InvThroughput: 37356298 -> 37261700 (-0.25%); split: -0.37%, +0.12% VClause: 427827 -> 427105 (-0.17%); split: -0.35%, +0.18% SClause: 669989 -> 668623 (-0.20%); split: -0.36%, +0.15% Copies: 1582166 -> 1582592 (+0.03%); split: -0.36%, +0.39% Branches: 523203 -> 523789 (+0.11%); split: -0.04%, +0.15% PreSGPRs: 1272992 -> 1273228 (+0.02%); split: -0.05%, +0.07% PreVGPRs: 1164295 -> 1161623 (-0.23%); split: -0.43%, +0.20% VALU: 13733432 -> 13714109 (-0.14%); split: -0.35%, +0.21% SALU: 2828974 -> 2831841 (+0.10%); split: -0.12%, +0.22% VMEM: 748396 -> 748500 (+0.01%); split: -0.16%, +0.18% SMEM: 1263487 -> 1263329 (-0.01%); split: -0.03%, +0.02% Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28676>
This commit is contained in:
@@ -12,6 +12,7 @@
|
||||
#include "nir/nir.h"
|
||||
#include "nir/nir_builder.h"
|
||||
#include "nir/nir_serialize.h"
|
||||
#include "nir/nir_xfb_info.h"
|
||||
#include "nir/radv_nir.h"
|
||||
#include "spirv/nir_spirv.h"
|
||||
#include "util/disk_cache.h"
|
||||
@@ -22,6 +23,7 @@
|
||||
#include "radv_debug.h"
|
||||
#include "radv_entrypoints.h"
|
||||
#include "radv_formats.h"
|
||||
#include "radv_physical_device.h"
|
||||
#include "radv_pipeline_cache.h"
|
||||
#include "radv_rmv.h"
|
||||
#include "radv_shader.h"
|
||||
@@ -1519,6 +1521,141 @@ radv_graphics_shaders_link(const struct radv_device *device, const struct radv_g
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fist pass of varying optimization.
|
||||
* This function is called for each shader pair from first to last.
|
||||
*
|
||||
* 1. Run some NIR passes in preparation.
|
||||
* 2. Optimize varyings.
|
||||
* 3. If either shader changed, run algebraic optimizations.
|
||||
*/
|
||||
static void
|
||||
radv_graphics_shaders_link_varyings_first(struct radv_shader_stage *producer_stage,
|
||||
struct radv_shader_stage *consumer_stage)
|
||||
{
|
||||
nir_shader *producer = producer_stage->nir;
|
||||
nir_shader *consumer = consumer_stage->nir;
|
||||
|
||||
/* It is expected by nir_opt_varyings that no undefined stores are present in the shader. */
|
||||
NIR_PASS(_, producer, nir_opt_undef);
|
||||
|
||||
/* Update load/store alignments because inter-stage code motion may move instructions used to deduce this info. */
|
||||
NIR_PASS(_, consumer, nir_opt_load_store_update_alignments);
|
||||
|
||||
/* Scalarize all I/O, because nir_opt_varyings and nir_opt_vectorize_io expect all I/O to be scalarized. */
|
||||
NIR_PASS(_, producer, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||
NIR_PASS(_, consumer, nir_lower_io_to_scalar, nir_var_shader_in, NULL, NULL);
|
||||
|
||||
/* Eliminate useless vec->mov copies resulting from scalarization. */
|
||||
NIR_PASS(_, producer, nir_copy_prop);
|
||||
|
||||
const nir_opt_varyings_progress p = nir_opt_varyings(producer, consumer, true, 0, 0);
|
||||
|
||||
/* Run algebraic optimizations on shaders that changed. */
|
||||
if (p & nir_progress_producer) {
|
||||
radv_optimize_nir_algebraic(producer, false, false);
|
||||
}
|
||||
if (p & nir_progress_consumer) {
|
||||
radv_optimize_nir_algebraic(consumer, false, false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Second pass of varying optimization.
|
||||
* This function is called for each shader pair from last to fist,
|
||||
* after the first pass had already been called for each pair.
|
||||
* Done because the previous pass might have enabled additional
|
||||
* opportunities for optimization.
|
||||
*
|
||||
* 1. Optimize varyings again.
|
||||
* 2. If either shader changed, run algebraic optimizations.
|
||||
* 3. Run some NIR passes to clean up the shaders.
|
||||
*/
|
||||
static void
|
||||
radv_graphics_shaders_link_varyings_second(struct radv_shader_stage *producer_stage,
|
||||
struct radv_shader_stage *consumer_stage)
|
||||
{
|
||||
nir_shader *producer = producer_stage->nir;
|
||||
nir_shader *consumer = consumer_stage->nir;
|
||||
|
||||
const nir_opt_varyings_progress p = nir_opt_varyings(producer, consumer, true, 0, 0);
|
||||
|
||||
/* Run algebraic optimizations on shaders that changed. */
|
||||
if (p & nir_progress_producer) {
|
||||
radv_optimize_nir_algebraic(producer, true, false);
|
||||
}
|
||||
if (p & nir_progress_consumer) {
|
||||
radv_optimize_nir_algebraic(consumer, true, false);
|
||||
}
|
||||
|
||||
/* Re-vectorize I/O for stages that output to memory (LDS or VRAM).
|
||||
* Don't vectorize FS inputs, doing so just regresses shader stats without any benefit.
|
||||
* There is also no benefit from re-vectorizing the outputs of the last pre-rasterization
|
||||
* stage here, because ac_nir_lower_ngg/legacy already takes care of that.
|
||||
*/
|
||||
if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
|
||||
NIR_PASS(_, producer, nir_opt_vectorize_io, nir_var_shader_out);
|
||||
NIR_PASS(_, consumer, nir_opt_vectorize_io, nir_var_shader_in);
|
||||
}
|
||||
|
||||
/* Recompute driver locations of PS inputs
|
||||
* because the backend compiler relies on their driver locations.
|
||||
*/
|
||||
if (consumer->info.stage == MESA_SHADER_FRAGMENT)
|
||||
nir_recompute_io_bases(consumer, nir_var_shader_in);
|
||||
|
||||
/* Gather shader info; at least the I/O info likely changed
|
||||
* and changes to only the I/O info are not reflected in nir_opt_varyings_progress.
|
||||
*/
|
||||
nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
|
||||
nir_shader_gather_info(consumer, nir_shader_get_entrypoint(consumer));
|
||||
|
||||
/* Recreate XFB info from intrinsics (nir_opt_varyings may have changed it). */
|
||||
if (producer->xfb_info) {
|
||||
nir_gather_xfb_info_from_intrinsics(producer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Varying optimizations performed on lowered shader I/O.
|
||||
*
|
||||
* We do this after lowering shader I/O because this is more effective
|
||||
* than running the same optimizations on I/O derefs.
|
||||
*/
|
||||
static void
|
||||
radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages)
|
||||
{
|
||||
/* Optimize varyings from first to last stage. */
|
||||
gl_shader_stage prev = MESA_SHADER_NONE;
|
||||
for (int i = 0; i < ARRAY_SIZE(graphics_shader_order); ++i) {
|
||||
gl_shader_stage s = graphics_shader_order[i];
|
||||
if (!stages[s].nir)
|
||||
continue;
|
||||
|
||||
if (prev != MESA_SHADER_NONE) {
|
||||
if (!stages[prev].key.optimisations_disabled && !stages[s].key.optimisations_disabled)
|
||||
radv_graphics_shaders_link_varyings_first(&stages[prev], &stages[s]);
|
||||
}
|
||||
|
||||
prev = s;
|
||||
}
|
||||
|
||||
/* Optimize varyings from last to first stage. */
|
||||
gl_shader_stage next = MESA_SHADER_NONE;
|
||||
for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; --i) {
|
||||
gl_shader_stage s = graphics_shader_order[i];
|
||||
if (!stages[s].nir)
|
||||
continue;
|
||||
|
||||
if (next != MESA_SHADER_NONE) {
|
||||
if (!stages[s].key.optimisations_disabled && !stages[next].key.optimisations_disabled)
|
||||
radv_graphics_shaders_link_varyings_second(&stages[s], &stages[next]);
|
||||
}
|
||||
|
||||
next = s;
|
||||
}
|
||||
}
|
||||
|
||||
struct radv_ps_epilog_key
|
||||
radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state)
|
||||
{
|
||||
@@ -2518,6 +2655,9 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
||||
radv_nir_remap_color_attachment(stages[MESA_SHADER_FRAGMENT].nir, gfx_state);
|
||||
}
|
||||
|
||||
/* Optimize varyings on lowered shader I/O (more efficient than optimizing I/O derefs). */
|
||||
radv_graphics_shaders_link_varyings(stages);
|
||||
|
||||
radv_fill_shader_info(device, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages);
|
||||
|
||||
radv_declare_pipeline_args(device, stages, gfx_state, active_nir_stages);
|
||||
|
@@ -64,6 +64,8 @@ get_nir_options_for_stage(struct radv_physical_device *pdev, gl_shader_stage sta
|
||||
options->max_unroll_iterations_aggressive = 128;
|
||||
options->lower_doubles_options = nir_lower_drcp | nir_lower_dsqrt | nir_lower_drsq | nir_lower_ddiv;
|
||||
options->io_options |= nir_io_mediump_is_32bit;
|
||||
options->varying_estimate_instr_cost = ac_nir_varying_estimate_instr_cost;
|
||||
options->varying_expression_max_cost = ac_nir_varying_expression_max_cost;
|
||||
}
|
||||
|
||||
void
|
||||
|
Reference in New Issue
Block a user