nir: add nir_opt_varyings, new pass optimizing and compacting varyings
Highlights: - all shader stages and all input/output types are handled, including inputs and outputs with multiple vertices - the optimizations performed are: unused input/output removal, constant and uniform propagation, output deduplication, inter-shader code motion, and compaction - constant and uniform propagation and output deduplication work even if a shader contains multiple stores of the same output, e.g. in GS - the same optimizations are also performed between output stores and output loads (for TCS) - FS inputs are packed agressively. Only flat, interp FP32, and interp FP16 can't be in the same vec4. Also, if an output value is non-divergent within a primitive, the corresponding FS input is opportunistically promoted to flat. The big comment at the beginning of nir_opt_varyings.c has a detailed explanation, which is the same as: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8841 dEQP and GLCTS have incorrect tests that fail with this, see: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10361 Acked-by: Timothy Arceri <tarceri@itsqueeze.com> Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26819>
This commit is contained in:
@@ -273,6 +273,7 @@ files_libnir = files(
|
||||
'nir_opt_undef.c',
|
||||
'nir_opt_uniform_atomics.c',
|
||||
'nir_opt_uniform_subgroup.c',
|
||||
'nir_opt_varyings.c',
|
||||
'nir_opt_vectorize.c',
|
||||
'nir_passthrough_gs.c',
|
||||
'nir_passthrough_tcs.c',
|
||||
|
@@ -3577,6 +3577,16 @@ typedef enum {
|
||||
*/
|
||||
nir_io_has_flexible_input_interpolation_except_flat = BITFIELD_BIT(0),
|
||||
|
||||
/**
|
||||
* nir_opt_varyings compacts (relocates) components of varyings by
|
||||
* rewriting their locations completely, effectively moving components of
|
||||
* varyings between slots. This option forces nir_opt_varyings to make
|
||||
* VARYING_SLOT_POS unused by moving its contents to VARn if the consumer
|
||||
* is not FS. If this option is not set and POS is unused, it moves
|
||||
* components of VARn to POS until it's fully used.
|
||||
*/
|
||||
nir_io_dont_use_pos_for_non_fs_varyings = BITFIELD_BIT(1),
|
||||
|
||||
/* Options affecting the GLSL compiler are below. */
|
||||
|
||||
/**
|
||||
@@ -3584,6 +3594,17 @@ typedef enum {
|
||||
* This is only affects GLSL compilation.
|
||||
*/
|
||||
nir_io_glsl_lower_derefs = BITFIELD_BIT(16),
|
||||
|
||||
/**
|
||||
* Run nir_opt_varyings in the GLSL linker. If false, optimize varyings
|
||||
* the old way and lower IO later.
|
||||
*
|
||||
* nir_io_lower_to_intrinsics must be set for this to take effect.
|
||||
*
|
||||
* TODO: remove this and default to enabled once we are sure that this
|
||||
* codepath is solid.
|
||||
*/
|
||||
nir_io_glsl_opt_varyings = BITFIELD_BIT(17),
|
||||
} nir_io_options;
|
||||
|
||||
/** An instruction filtering callback
|
||||
@@ -4083,6 +4104,31 @@ typedef struct nir_shader_compiler_options {
|
||||
* Used by nir_lower_io_passes.
|
||||
*/
|
||||
void (*lower_mediump_io)(struct nir_shader *nir);
|
||||
|
||||
/**
|
||||
* Return the maximum cost of an expression that's written to a shader
|
||||
* output that can be moved into the next shader to remove that output.
|
||||
*
|
||||
* Currently only uniform expressions are moved. A uniform expression is
|
||||
* any ALU expression sourcing only constants, uniforms, and UBO loads.
|
||||
*
|
||||
* Set to NULL or return 0 if you only want to propagate constants from
|
||||
* outputs to inputs.
|
||||
*
|
||||
* Drivers can set the maximum cost based on the types of consecutive
|
||||
* shaders or shader SHA1s.
|
||||
*
|
||||
* Drivers should also set "varying_estimate_instr_cost".
|
||||
*/
|
||||
unsigned (*varying_expression_max_cost)(struct nir_shader *consumer,
|
||||
struct nir_shader *producer);
|
||||
|
||||
/**
|
||||
* Return the cost of an instruction that could be moved into the next
|
||||
* shader. If the cost of all instructions in an expression is <=
|
||||
* varying_expression_max_cost(), the instruction is moved.
|
||||
*/
|
||||
unsigned (*varying_estimate_instr_cost)(struct nir_instr *instr);
|
||||
} nir_shader_compiler_options;
|
||||
|
||||
typedef struct nir_shader {
|
||||
@@ -5204,6 +5250,22 @@ nir_deref_instr *nir_clone_deref_instr(struct nir_builder *b,
|
||||
nir_variable *var,
|
||||
nir_deref_instr *deref);
|
||||
|
||||
|
||||
/* Return status from nir_opt_varyings. */
|
||||
typedef enum {
|
||||
/* Whether the IR changed such that NIR optimizations should be run, such
|
||||
* as due to removal of loads and stores. IO semantic changes such as
|
||||
* compaction don't count as IR changes because they don't affect NIR
|
||||
* optimizations.
|
||||
*/
|
||||
nir_progress_producer = BITFIELD_BIT(0),
|
||||
nir_progress_consumer = BITFIELD_BIT(1),
|
||||
} nir_opt_varyings_progress;
|
||||
|
||||
nir_opt_varyings_progress
|
||||
nir_opt_varyings(nir_shader *producer, nir_shader *consumer, bool spirv,
|
||||
unsigned max_uniform_components, unsigned max_ubos_per_stage);
|
||||
|
||||
bool nir_slot_is_sysval_output(gl_varying_slot slot,
|
||||
gl_shader_stage next_shader);
|
||||
bool nir_slot_is_varying(gl_varying_slot slot);
|
||||
|
4091
src/compiler/nir/nir_opt_varyings.c
Normal file
4091
src/compiler/nir/nir_opt_varyings.c
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user