glsl/linker,st/mesa: enable nir_opt_varyings and lower IO in the linker

The varying linker isn't changed. The passes are executed after linking
varyings and before linking uniforms if nir->options->lower_io_variables
is true. nir_opt_varyings can move uniforms between shaders and cause them
to be DCE'd.

It requires moving IO deref lowering from st/mesa into the GLSL linker
and nir_opt_varyings should be added at the same time because IO deref
lowering alone would disable IO optimizations in st/mesa such as
compaction.

Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26819>
This commit is contained in:
Marek Olšák
2023-09-26 01:30:15 -04:00
committed by Marge Bot
parent 70fd817278
commit 9b819adbd8
11 changed files with 179 additions and 26 deletions

View File

@@ -4509,6 +4509,9 @@ gl_nir_link_varyings(const struct gl_constants *consts,
break; break;
} }
} }
/* Lower IO and thoroughly optimize and compact varyings. */
gl_nir_lower_optimize_varyings(consts, prog, false);
} }
ralloc_free(mem_ctx); ralloc_free(mem_ctx);

View File

@@ -23,6 +23,7 @@
#include "nir.h" #include "nir.h"
#include "nir_builder.h" #include "nir_builder.h"
#include "nir_xfb_info.h"
#include "gl_nir.h" #include "gl_nir.h"
#include "gl_nir_linker.h" #include "gl_nir_linker.h"
#include "gl_nir_link_varyings.h" #include "gl_nir_link_varyings.h"
@@ -1278,6 +1279,131 @@ prelink_lowering(const struct gl_constants *consts,
return true; return true;
} }
/**
* Lower load_deref and store_deref on input/output variables to load_input
* and store_output intrinsics, and perform varying optimizations and
* compaction.
*/
void
gl_nir_lower_optimize_varyings(const struct gl_constants *consts,
struct gl_shader_program *prog, bool spirv)
{
nir_shader *shaders[MESA_SHADER_STAGES];
unsigned num_shaders = 0;
unsigned max_ubos = UINT_MAX;
unsigned max_uniform_comps = UINT_MAX;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_linked_shader *shader = prog->_LinkedShaders[i];
if (!shader)
continue;
nir_shader *nir = shader->Program->nir;
if (nir->info.stage == MESA_SHADER_COMPUTE)
return;
if (!(nir->options->io_options & nir_io_glsl_lower_derefs) ||
!(nir->options->io_options & nir_io_glsl_opt_varyings))
return;
shaders[num_shaders] = nir;
max_uniform_comps = MIN2(max_uniform_comps,
consts->Program[i].MaxUniformComponents);
max_ubos = MIN2(max_ubos, consts->Program[i].MaxUniformBlocks);
num_shaders++;
}
/* Lower IO derefs to load and store intrinsics. */
for (unsigned i = 0; i < num_shaders; i++) {
nir_shader *nir = shaders[i];
nir_lower_io_passes(nir, true);
}
/* There is nothing to optimize for only 1 shader. */
if (num_shaders == 1)
return;
for (unsigned i = 0; i < num_shaders; i++) {
nir_shader *nir = shaders[i];
/* nir_opt_varyings requires scalar IO. */
NIR_PASS_V(nir, nir_lower_io_to_scalar,
(i != 0 ? nir_var_shader_in : 0) |
(i != num_shaders - 1 ? nir_var_shader_out : 0), NULL, NULL);
/* nir_opt_varyings requires shaders to be optimized. */
gl_nir_opts(nir);
}
/* Optimize varyings from the first shader to the last shader first, and
* then in the opposite order from the last changed producer.
*
* For example, VS->GS->FS is optimized in this order first:
* (VS,GS), (GS,FS)
*
* That ensures that constants and undefs (dead inputs) are propagated
* forward.
*
* If GS was changed while optimizing (GS,FS), (VS,GS) is optimized again
* because removing outputs in GS can cause a chain reaction in making
* GS inputs, VS outputs, and VS inputs dead.
*/
unsigned highest_changed_producer = 0;
for (unsigned i = 0; i < num_shaders - 1; i++) {
nir_shader *producer = shaders[i];
nir_shader *consumer = shaders[i + 1];
nir_opt_varyings_progress progress =
nir_opt_varyings(producer, consumer, spirv, max_uniform_comps,
max_ubos);
if (progress & nir_progress_producer) {
gl_nir_opts(producer);
highest_changed_producer = i;
}
if (progress & nir_progress_consumer)
gl_nir_opts(consumer);
}
/* Optimize varyings from the highest changed producer to the first
* shader.
*/
for (unsigned i = highest_changed_producer; i > 0; i--) {
nir_shader *producer = shaders[i - 1];
nir_shader *consumer = shaders[i];
nir_opt_varyings_progress progress =
nir_opt_varyings(producer, consumer, spirv, max_uniform_comps,
max_ubos);
if (progress & nir_progress_producer)
gl_nir_opts(producer);
if (progress & nir_progress_consumer)
gl_nir_opts(consumer);
}
/* Final cleanups. */
for (unsigned i = 0; i < num_shaders; i++) {
nir_shader *nir = shaders[i];
/* Recompute intrinsic bases, which are totally random after
* optimizations and compaction. Do that for all inputs and outputs,
* including VS inputs because those could have been removed too.
*/
NIR_PASS_V(nir, nir_recompute_io_bases,
nir_var_shader_in | nir_var_shader_out);
/* Regenerate transform feedback info because compaction in
* nir_opt_varyings always moves them to other slots.
*/
if (nir->xfb_info)
nir_gather_xfb_info_from_intrinsics(nir);
}
}
bool bool
gl_nir_link_spirv(const struct gl_constants *consts, gl_nir_link_spirv(const struct gl_constants *consts,
const struct gl_extensions *exts, const struct gl_extensions *exts,
@@ -1300,14 +1426,18 @@ gl_nir_link_spirv(const struct gl_constants *consts,
if (!prelink_lowering(consts, exts, prog, linked_shader, num_shaders)) if (!prelink_lowering(consts, exts, prog, linked_shader, num_shaders))
return false; return false;
/* Linking the stages in the opposite order (from fragment to vertex) gl_nir_lower_optimize_varyings(consts, prog, true);
* ensures that inter-shader outputs written to in an earlier stage
* are eliminated if they are (transitively) not used in a later if (!linked_shader[0]->Program->nir->info.io_lowered) {
* stage. /* Linking the stages in the opposite order (from fragment to vertex)
*/ * ensures that inter-shader outputs written to in an earlier stage
for (int i = num_shaders - 2; i >= 0; i--) { * are eliminated if they are (transitively) not used in a later
gl_nir_link_opts(linked_shader[i]->Program->nir, * stage.
linked_shader[i + 1]->Program->nir); */
for (int i = num_shaders - 2; i >= 0; i--) {
gl_nir_link_opts(linked_shader[i]->Program->nir,
linked_shader[i + 1]->Program->nir);
}
} }
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
@@ -1663,14 +1793,16 @@ gl_nir_link_glsl(const struct gl_constants *consts,
if (prog->data->LinkStatus == LINKING_FAILURE) if (prog->data->LinkStatus == LINKING_FAILURE)
return false; return false;
/* Linking the stages in the opposite order (from fragment to vertex) if (!linked_shader[0]->Program->nir->info.io_lowered) {
* ensures that inter-shader outputs written to in an earlier stage /* Linking the stages in the opposite order (from fragment to vertex)
* are eliminated if they are (transitively) not used in a later * ensures that inter-shader outputs written to in an earlier stage
* stage. * are eliminated if they are (transitively) not used in a later
*/ * stage.
for (int i = num_shaders - 2; i >= 0; i--) { */
gl_nir_link_opts(linked_shader[i]->Program->nir, for (int i = num_shaders - 2; i >= 0; i--) {
linked_shader[i + 1]->Program->nir); gl_nir_link_opts(linked_shader[i]->Program->nir,
linked_shader[i + 1]->Program->nir);
}
} }
/* Tidy up any left overs from the linking process for single shaders. /* Tidy up any left overs from the linking process for single shaders.

View File

@@ -131,6 +131,10 @@ bool lower_packed_varying_needs_lowering(nir_shader *shader, nir_variable *var,
bool disable_xfb_packing, bool disable_xfb_packing,
bool disable_varying_packing); bool disable_varying_packing);
void
gl_nir_lower_optimize_varyings(const struct gl_constants *consts,
struct gl_shader_program *prog, bool spirv);
#ifdef __cplusplus #ifdef __cplusplus
} /* extern "C" */ } /* extern "C" */
#endif #endif

View File

@@ -3227,8 +3227,14 @@ nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
NIR_PASS_V(nir, nir_lower_global_vars_to_local); NIR_PASS_V(nir, nir_lower_global_vars_to_local);
} }
/* The correct lower_64bit_to_32 flag is required by st/mesa depending
* on whether the GLSL linker lowers IO or not. Setting the wrong flag
* would break 64-bit vertex attribs for GLSL.
*/
NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in, NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
type_size_vec4, nir_lower_io_lower_64bit_to_32); type_size_vec4,
renumber_vs_inputs ? nir_lower_io_lower_64bit_to_32_new :
nir_lower_io_lower_64bit_to_32);
/* nir_io_add_const_offset_to_base needs actual constants. */ /* nir_io_add_const_offset_to_base needs actual constants. */
NIR_PASS_V(nir, nir_opt_constant_folding); NIR_PASS_V(nir, nir_opt_constant_folding);

View File

@@ -133,4 +133,3 @@ KHR-GL46.sparse_texture2_tests.UncommittedRegionsAccess,Fail
KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor,Fail KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor,Fail
KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupResidency,Fail KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupResidency,Fail
KHR-GL46.shader_image_load_store.basic-allTargets-atomic,Fail KHR-GL46.shader_image_load_store.basic-allTargets-atomic,Fail
KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail
1 # LLVM 15.0.1
133 KHR-GL46.shader_image_load_store.basic-allTargets-atomic,Fail
134
135

View File

@@ -22,7 +22,6 @@ spec@!opengl 1.0@rasterpos,Fail
spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail
spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail
spec@!opengl 1.1@line-smooth-stipple,Fail spec@!opengl 1.1@line-smooth-stipple,Fail
spec@!opengl es 3.0@gles-3.0-transform-feedback-uniform-buffer-object,Fail
spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2-mat2,Fail spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2-mat2,Fail
spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2x3-mat2x3,Fail spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2x3-mat2x3,Fail
@@ -136,7 +135,6 @@ KHR-GL46.sparse_texture2_tests.UncommittedRegionsAccess,Fail
KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor,Fail KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor,Fail
KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupResidency,Fail KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupResidency,Fail
KHR-GL46.shader_image_load_store.basic-allTargets-atomic,Fail KHR-GL46.shader_image_load_store.basic-allTargets-atomic,Fail
KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail
# escts failures # escts failures
KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreCS,Fail KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreCS,Fail
1 # LLVM 18.0.0git
22 spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail
23 spec@!opengl 1.1@line-smooth-stipple,Fail
24 spec@!opengl es 3.0@gles-3.0-transform-feedback-uniform-buffer-object,Fail spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
25 spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2-mat2,Fail
26 spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2x3-mat2x3,Fail
27 spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2x4-mat2x4,Fail
135 KHR-GL46.shader_image_load_store.basic-allTargets-atomic,Fail
136 KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail # escts failures
137 # escts failures KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreCS,Fail
KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreCS,Fail
138 KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreVS,Fail
139 # These are dEQP bugs. See: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10361
140 dEQP-GLES31.functional.geometry_shading.query.referenced_by_geometry_shader,Fail

View File

@@ -135,7 +135,6 @@ KHR-GL46.sparse_texture2_tests.StandardPageSizesTestCase,Fail
KHR-GL46.sparse_texture2_tests.UncommittedRegionsAccess,Fail KHR-GL46.sparse_texture2_tests.UncommittedRegionsAccess,Fail
KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor,Fail KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor,Fail
KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupResidency,Fail KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupResidency,Fail
KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail
# escts failures # escts failures
KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreCS,Fail KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreCS,Fail
1 # LLVM 18.0.0git
135 KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupResidency,Fail
136 KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail # escts failures
137 # escts failures KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreCS,Fail
KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreCS,Fail
138 KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreVS,Fail
139 # gfx11-only failures
140 spec@!opengl 1.1@line-smooth-stipple,Fail

View File

@@ -741,7 +741,6 @@ KHR-GL46.transform_feedback_overflow_query_ARB.basic-single-stream-interleaved-a
KHR-GL46.transform_feedback_overflow_query_ARB.basic-single-stream-separate-attribs,Fail KHR-GL46.transform_feedback_overflow_query_ARB.basic-single-stream-separate-attribs,Fail
KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-multiple-buffers-per-stream,Fail KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-multiple-buffers-per-stream,Fail
KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-one-buffer-per-stream,Fail KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-one-buffer-per-stream,Fail
KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail
dEQP-GLES2.functional.texture.format.la88_2d_npot,Fail dEQP-GLES2.functional.texture.format.la88_2d_npot,Fail
dEQP-GLES2.functional.texture.format.la88_2d_pot,Fail dEQP-GLES2.functional.texture.format.la88_2d_pot,Fail
1 glx@glx-multi-window-single-context Fail
741 KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-multiple-buffers-per-stream Fail
742 KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-one-buffer-per-stream Fail
743 KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component dEQP-GLES2.functional.texture.format.la88_2d_npot Fail
dEQP-GLES2.functional.texture.format.la88_2d_npot Fail
744 dEQP-GLES2.functional.texture.format.la88_2d_pot Fail
745 dEQP-GLES2.functional.texture.format.la88_cube_pot Fail
746 dEQP-GLES2.functional.texture.mipmap.2d.generate.la88_fastest Fail

View File

@@ -240,4 +240,3 @@ KHR-GL46.shader_ballot_tests.ShaderBallotFunctionRead,Fail
KHR-GL46.shader_image_load_store.advanced-sso-subroutine,Fail KHR-GL46.shader_image_load_store.advanced-sso-subroutine,Fail
KHR-GL46.shader_image_load_store.basic-allTargets-atomic,Fail KHR-GL46.shader_image_load_store.basic-allTargets-atomic,Fail
KHR-GL46.draw_indirect.basic-drawArrays-bufferOffset,Fail KHR-GL46.draw_indirect.basic-drawArrays-bufferOffset,Fail
KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail
1 # piglit failures
240 KHR-GL46.shader_image_load_store.basic-allTargets-atomic,Fail
241 KHR-GL46.draw_indirect.basic-drawArrays-bufferOffset,Fail
242

View File

@@ -331,7 +331,9 @@ _mesa_spirv_to_nir(struct gl_context *ctx,
NIR_PASS(_, nir, nir_split_var_copies); NIR_PASS(_, nir, nir_split_var_copies);
NIR_PASS(_, nir, nir_split_per_member_structs); NIR_PASS(_, nir, nir_split_per_member_structs);
if (nir->info.stage == MESA_SHADER_VERTEX) if (nir->info.stage == MESA_SHADER_VERTEX &&
(!(nir->options->io_options & nir_io_glsl_lower_derefs) ||
!(nir->options->io_options & nir_io_glsl_opt_varyings)))
nir_remap_dual_slot_attributes(nir, &linked_shader->Program->DualSlotInputs); nir_remap_dual_slot_attributes(nir, &linked_shader->Program->DualSlotInputs);
NIR_PASS(_, nir, nir_lower_frexp); NIR_PASS(_, nir, nir_lower_frexp);

View File

@@ -593,6 +593,14 @@ st_link_glsl_to_nir(struct gl_context *ctx,
const struct gl_shader_compiler_options *options = const struct gl_shader_compiler_options *options =
&ctx->Const.ShaderCompilerOptions[stage]; &ctx->Const.ShaderCompilerOptions[stage];
if (nir->info.io_lowered) {
/* Since IO is lowered, we won't need the IO variables from now on.
* nir_build_program_resource_list was the last pass that needed them.
*/
NIR_PASS_V(nir, nir_remove_dead_variables,
nir_var_shader_in | nir_var_shader_out, NULL);
}
/* If there are forms of indirect addressing that the driver /* If there are forms of indirect addressing that the driver
* cannot handle, perform the lowering pass. * cannot handle, perform the lowering pass.
*/ */
@@ -713,7 +721,8 @@ st_link_glsl_to_nir(struct gl_context *ctx,
prog->info.num_abos = old_info.num_abos; prog->info.num_abos = old_info.num_abos;
if (prog->info.stage == MESA_SHADER_VERTEX) { if (prog->info.stage == MESA_SHADER_VERTEX) {
if (prog->nir->info.io_lowered) { if (prog->nir->info.io_lowered &&
prog->nir->options->io_options & nir_io_glsl_opt_varyings) {
prog->info.inputs_read = prog->nir->info.inputs_read; prog->info.inputs_read = prog->nir->info.inputs_read;
prog->DualSlotInputs = prog->nir->info.dual_slot_inputs; prog->DualSlotInputs = prog->nir->info.dual_slot_inputs;
} else { } else {
@@ -886,8 +895,11 @@ st_finalize_nir(struct st_context *st, struct gl_program *prog,
/* Lower load_deref/store_deref of inputs and outputs. /* Lower load_deref/store_deref of inputs and outputs.
* This depends on st_nir_assign_varying_locations. * This depends on st_nir_assign_varying_locations.
*
* TODO: remove this once nir_io_glsl_opt_varyings is enabled by default.
*/ */
if (nir->options->io_options & nir_io_glsl_lower_derefs) { if (nir->options->io_options & nir_io_glsl_lower_derefs &&
!(nir->options->io_options & nir_io_glsl_opt_varyings)) {
nir_lower_io_passes(nir, false); nir_lower_io_passes(nir, false);
NIR_PASS(_, nir, nir_remove_dead_variables, NIR_PASS(_, nir, nir_remove_dead_variables,
nir_var_shader_in | nir_var_shader_out, NULL); nir_var_shader_in | nir_var_shader_out, NULL);