From 9b819adbd867dc250dba3b73a0d0a0407aef94fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 26 Sep 2023 01:30:15 -0400 Subject: [PATCH] glsl/linker,st/mesa: enable nir_opt_varyings and lower IO in the linker The varying linker isn't changed. The passes are executed after linking varyings and before linking uniforms if nir->options->lower_io_variables is true. nir_opt_varyings can move uniforms between shaders and cause them to be DCE'd. It requires moving IO deref lowering from st/mesa into the GLSL linker and nir_opt_varyings should be added at the same time because IO deref lowering alone would disable IO optimizations in st/mesa such as compaction. Reviewed-by: Timothy Arceri Part-of: --- src/compiler/glsl/gl_nir_link_varyings.c | 3 + src/compiler/glsl/gl_nir_linker.c | 164 ++++++++++++++++-- src/compiler/glsl/gl_nir_linker.h | 4 + src/compiler/nir/nir_lower_io.c | 8 +- .../drivers/radeonsi/ci/gfx10-navi10-fail.csv | 1 - .../radeonsi/ci/gfx10_3-navi21-fail.csv | 2 - .../radeonsi/ci/gfx11-gfx1100-fail.csv | 1 - .../drivers/radeonsi/ci/gfx6-tahiti-fail.csv | 1 - .../radeonsi/ci/gfx8-polaris11-fail.csv | 1 - src/mesa/main/glspirv.c | 4 +- src/mesa/state_tracker/st_glsl_to_nir.cpp | 16 +- 11 files changed, 179 insertions(+), 26 deletions(-) diff --git a/src/compiler/glsl/gl_nir_link_varyings.c b/src/compiler/glsl/gl_nir_link_varyings.c index 4f576f2ee71..543cbc074aa 100644 --- a/src/compiler/glsl/gl_nir_link_varyings.c +++ b/src/compiler/glsl/gl_nir_link_varyings.c @@ -4509,6 +4509,9 @@ gl_nir_link_varyings(const struct gl_constants *consts, break; } } + + /* Lower IO and thoroughly optimize and compact varyings. */ + gl_nir_lower_optimize_varyings(consts, prog, false); } ralloc_free(mem_ctx); diff --git a/src/compiler/glsl/gl_nir_linker.c b/src/compiler/glsl/gl_nir_linker.c index e64aaaec3cc..1cd96a647dd 100644 --- a/src/compiler/glsl/gl_nir_linker.c +++ b/src/compiler/glsl/gl_nir_linker.c @@ -23,6 +23,7 @@ #include "nir.h" #include "nir_builder.h" +#include "nir_xfb_info.h" #include "gl_nir.h" #include "gl_nir_linker.h" #include "gl_nir_link_varyings.h" @@ -1278,6 +1279,131 @@ prelink_lowering(const struct gl_constants *consts, return true; } +/** + * Lower load_deref and store_deref on input/output variables to load_input + * and store_output intrinsics, and perform varying optimizations and + * compaction. + */ +void +gl_nir_lower_optimize_varyings(const struct gl_constants *consts, + struct gl_shader_program *prog, bool spirv) +{ + nir_shader *shaders[MESA_SHADER_STAGES]; + unsigned num_shaders = 0; + unsigned max_ubos = UINT_MAX; + unsigned max_uniform_comps = UINT_MAX; + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_linked_shader *shader = prog->_LinkedShaders[i]; + + if (!shader) + continue; + + nir_shader *nir = shader->Program->nir; + + if (nir->info.stage == MESA_SHADER_COMPUTE) + return; + + if (!(nir->options->io_options & nir_io_glsl_lower_derefs) || + !(nir->options->io_options & nir_io_glsl_opt_varyings)) + return; + + shaders[num_shaders] = nir; + max_uniform_comps = MIN2(max_uniform_comps, + consts->Program[i].MaxUniformComponents); + max_ubos = MIN2(max_ubos, consts->Program[i].MaxUniformBlocks); + num_shaders++; + } + + /* Lower IO derefs to load and store intrinsics. */ + for (unsigned i = 0; i < num_shaders; i++) { + nir_shader *nir = shaders[i]; + + nir_lower_io_passes(nir, true); + } + + /* There is nothing to optimize for only 1 shader. */ + if (num_shaders == 1) + return; + + for (unsigned i = 0; i < num_shaders; i++) { + nir_shader *nir = shaders[i]; + + /* nir_opt_varyings requires scalar IO. */ + NIR_PASS_V(nir, nir_lower_io_to_scalar, + (i != 0 ? nir_var_shader_in : 0) | + (i != num_shaders - 1 ? nir_var_shader_out : 0), NULL, NULL); + + /* nir_opt_varyings requires shaders to be optimized. */ + gl_nir_opts(nir); + } + + /* Optimize varyings from the first shader to the last shader first, and + * then in the opposite order from the last changed producer. + * + * For example, VS->GS->FS is optimized in this order first: + * (VS,GS), (GS,FS) + * + * That ensures that constants and undefs (dead inputs) are propagated + * forward. + * + * If GS was changed while optimizing (GS,FS), (VS,GS) is optimized again + * because removing outputs in GS can cause a chain reaction in making + * GS inputs, VS outputs, and VS inputs dead. + */ + unsigned highest_changed_producer = 0; + for (unsigned i = 0; i < num_shaders - 1; i++) { + nir_shader *producer = shaders[i]; + nir_shader *consumer = shaders[i + 1]; + + nir_opt_varyings_progress progress = + nir_opt_varyings(producer, consumer, spirv, max_uniform_comps, + max_ubos); + + if (progress & nir_progress_producer) { + gl_nir_opts(producer); + highest_changed_producer = i; + } + if (progress & nir_progress_consumer) + gl_nir_opts(consumer); + } + + /* Optimize varyings from the highest changed producer to the first + * shader. + */ + for (unsigned i = highest_changed_producer; i > 0; i--) { + nir_shader *producer = shaders[i - 1]; + nir_shader *consumer = shaders[i]; + + nir_opt_varyings_progress progress = + nir_opt_varyings(producer, consumer, spirv, max_uniform_comps, + max_ubos); + + if (progress & nir_progress_producer) + gl_nir_opts(producer); + if (progress & nir_progress_consumer) + gl_nir_opts(consumer); + } + + /* Final cleanups. */ + for (unsigned i = 0; i < num_shaders; i++) { + nir_shader *nir = shaders[i]; + + /* Recompute intrinsic bases, which are totally random after + * optimizations and compaction. Do that for all inputs and outputs, + * including VS inputs because those could have been removed too. + */ + NIR_PASS_V(nir, nir_recompute_io_bases, + nir_var_shader_in | nir_var_shader_out); + + /* Regenerate transform feedback info because compaction in + * nir_opt_varyings always moves them to other slots. + */ + if (nir->xfb_info) + nir_gather_xfb_info_from_intrinsics(nir); + } +} + bool gl_nir_link_spirv(const struct gl_constants *consts, const struct gl_extensions *exts, @@ -1300,14 +1426,18 @@ gl_nir_link_spirv(const struct gl_constants *consts, if (!prelink_lowering(consts, exts, prog, linked_shader, num_shaders)) return false; - /* Linking the stages in the opposite order (from fragment to vertex) - * ensures that inter-shader outputs written to in an earlier stage - * are eliminated if they are (transitively) not used in a later - * stage. - */ - for (int i = num_shaders - 2; i >= 0; i--) { - gl_nir_link_opts(linked_shader[i]->Program->nir, - linked_shader[i + 1]->Program->nir); + gl_nir_lower_optimize_varyings(consts, prog, true); + + if (!linked_shader[0]->Program->nir->info.io_lowered) { + /* Linking the stages in the opposite order (from fragment to vertex) + * ensures that inter-shader outputs written to in an earlier stage + * are eliminated if they are (transitively) not used in a later + * stage. + */ + for (int i = num_shaders - 2; i >= 0; i--) { + gl_nir_link_opts(linked_shader[i]->Program->nir, + linked_shader[i + 1]->Program->nir); + } } for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { @@ -1663,14 +1793,16 @@ gl_nir_link_glsl(const struct gl_constants *consts, if (prog->data->LinkStatus == LINKING_FAILURE) return false; - /* Linking the stages in the opposite order (from fragment to vertex) - * ensures that inter-shader outputs written to in an earlier stage - * are eliminated if they are (transitively) not used in a later - * stage. - */ - for (int i = num_shaders - 2; i >= 0; i--) { - gl_nir_link_opts(linked_shader[i]->Program->nir, - linked_shader[i + 1]->Program->nir); + if (!linked_shader[0]->Program->nir->info.io_lowered) { + /* Linking the stages in the opposite order (from fragment to vertex) + * ensures that inter-shader outputs written to in an earlier stage + * are eliminated if they are (transitively) not used in a later + * stage. + */ + for (int i = num_shaders - 2; i >= 0; i--) { + gl_nir_link_opts(linked_shader[i]->Program->nir, + linked_shader[i + 1]->Program->nir); + } } /* Tidy up any left overs from the linking process for single shaders. diff --git a/src/compiler/glsl/gl_nir_linker.h b/src/compiler/glsl/gl_nir_linker.h index f31ef25097b..3531ac413f9 100644 --- a/src/compiler/glsl/gl_nir_linker.h +++ b/src/compiler/glsl/gl_nir_linker.h @@ -131,6 +131,10 @@ bool lower_packed_varying_needs_lowering(nir_shader *shader, nir_variable *var, bool disable_xfb_packing, bool disable_varying_packing); +void +gl_nir_lower_optimize_varyings(const struct gl_constants *consts, + struct gl_shader_program *prog, bool spirv); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index d7ec7d0113e..4cabad15bf7 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -3227,8 +3227,14 @@ nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs) NIR_PASS_V(nir, nir_lower_global_vars_to_local); } + /* The correct lower_64bit_to_32 flag is required by st/mesa depending + * on whether the GLSL linker lowers IO or not. Setting the wrong flag + * would break 64-bit vertex attribs for GLSL. + */ NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in, - type_size_vec4, nir_lower_io_lower_64bit_to_32); + type_size_vec4, + renumber_vs_inputs ? nir_lower_io_lower_64bit_to_32_new : + nir_lower_io_lower_64bit_to_32); /* nir_io_add_const_offset_to_base needs actual constants. */ NIR_PASS_V(nir, nir_opt_constant_folding); diff --git a/src/gallium/drivers/radeonsi/ci/gfx10-navi10-fail.csv b/src/gallium/drivers/radeonsi/ci/gfx10-navi10-fail.csv index 64ff7b63abc..0afa1822393 100644 --- a/src/gallium/drivers/radeonsi/ci/gfx10-navi10-fail.csv +++ b/src/gallium/drivers/radeonsi/ci/gfx10-navi10-fail.csv @@ -133,4 +133,3 @@ KHR-GL46.sparse_texture2_tests.UncommittedRegionsAccess,Fail KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor,Fail KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupResidency,Fail KHR-GL46.shader_image_load_store.basic-allTargets-atomic,Fail -KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail diff --git a/src/gallium/drivers/radeonsi/ci/gfx10_3-navi21-fail.csv b/src/gallium/drivers/radeonsi/ci/gfx10_3-navi21-fail.csv index 0f86e813a23..7ba32084d9d 100644 --- a/src/gallium/drivers/radeonsi/ci/gfx10_3-navi21-fail.csv +++ b/src/gallium/drivers/radeonsi/ci/gfx10_3-navi21-fail.csv @@ -22,7 +22,6 @@ spec@!opengl 1.0@rasterpos,Fail spec@!opengl 1.0@rasterpos@glsl_vs_gs_linked,Fail spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail spec@!opengl 1.1@line-smooth-stipple,Fail -spec@!opengl es 3.0@gles-3.0-transform-feedback-uniform-buffer-object,Fail spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2-mat2,Fail spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2x3-mat2x3,Fail @@ -136,7 +135,6 @@ KHR-GL46.sparse_texture2_tests.UncommittedRegionsAccess,Fail KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor,Fail KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupResidency,Fail KHR-GL46.shader_image_load_store.basic-allTargets-atomic,Fail -KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail # escts failures KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreCS,Fail diff --git a/src/gallium/drivers/radeonsi/ci/gfx11-gfx1100-fail.csv b/src/gallium/drivers/radeonsi/ci/gfx11-gfx1100-fail.csv index ee936e565f3..7082abb6814 100644 --- a/src/gallium/drivers/radeonsi/ci/gfx11-gfx1100-fail.csv +++ b/src/gallium/drivers/radeonsi/ci/gfx11-gfx1100-fail.csv @@ -135,7 +135,6 @@ KHR-GL46.sparse_texture2_tests.StandardPageSizesTestCase,Fail KHR-GL46.sparse_texture2_tests.UncommittedRegionsAccess,Fail KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupColor,Fail KHR-GL46.sparse_texture_clamp_tests.SparseTextureClampLookupResidency,Fail -KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail # escts failures KHR-GLES31.core.shader_image_load_store.basic-allTargets-loadStoreCS,Fail diff --git a/src/gallium/drivers/radeonsi/ci/gfx6-tahiti-fail.csv b/src/gallium/drivers/radeonsi/ci/gfx6-tahiti-fail.csv index a350bd2b36b..e8891d6fce2 100644 --- a/src/gallium/drivers/radeonsi/ci/gfx6-tahiti-fail.csv +++ b/src/gallium/drivers/radeonsi/ci/gfx6-tahiti-fail.csv @@ -741,7 +741,6 @@ KHR-GL46.transform_feedback_overflow_query_ARB.basic-single-stream-interleaved-a KHR-GL46.transform_feedback_overflow_query_ARB.basic-single-stream-separate-attribs,Fail KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-multiple-buffers-per-stream,Fail KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-one-buffer-per-stream,Fail -KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail dEQP-GLES2.functional.texture.format.la88_2d_npot,Fail dEQP-GLES2.functional.texture.format.la88_2d_pot,Fail diff --git a/src/gallium/drivers/radeonsi/ci/gfx8-polaris11-fail.csv b/src/gallium/drivers/radeonsi/ci/gfx8-polaris11-fail.csv index ce189a52300..3d4171bfefe 100644 --- a/src/gallium/drivers/radeonsi/ci/gfx8-polaris11-fail.csv +++ b/src/gallium/drivers/radeonsi/ci/gfx8-polaris11-fail.csv @@ -240,4 +240,3 @@ KHR-GL46.shader_ballot_tests.ShaderBallotFunctionRead,Fail KHR-GL46.shader_image_load_store.advanced-sso-subroutine,Fail KHR-GL46.shader_image_load_store.basic-allTargets-atomic,Fail KHR-GL46.draw_indirect.basic-drawArrays-bufferOffset,Fail -KHR-Single-GL46.enhanced_layouts.xfb_capture_inactive_output_component,Fail diff --git a/src/mesa/main/glspirv.c b/src/mesa/main/glspirv.c index 58095da844f..cf5c030a795 100644 --- a/src/mesa/main/glspirv.c +++ b/src/mesa/main/glspirv.c @@ -331,7 +331,9 @@ _mesa_spirv_to_nir(struct gl_context *ctx, NIR_PASS(_, nir, nir_split_var_copies); NIR_PASS(_, nir, nir_split_per_member_structs); - if (nir->info.stage == MESA_SHADER_VERTEX) + if (nir->info.stage == MESA_SHADER_VERTEX && + (!(nir->options->io_options & nir_io_glsl_lower_derefs) || + !(nir->options->io_options & nir_io_glsl_opt_varyings))) nir_remap_dual_slot_attributes(nir, &linked_shader->Program->DualSlotInputs); NIR_PASS(_, nir, nir_lower_frexp); diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 40e73eb4538..2c974092ef0 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -593,6 +593,14 @@ st_link_glsl_to_nir(struct gl_context *ctx, const struct gl_shader_compiler_options *options = &ctx->Const.ShaderCompilerOptions[stage]; + if (nir->info.io_lowered) { + /* Since IO is lowered, we won't need the IO variables from now on. + * nir_build_program_resource_list was the last pass that needed them. + */ + NIR_PASS_V(nir, nir_remove_dead_variables, + nir_var_shader_in | nir_var_shader_out, NULL); + } + /* If there are forms of indirect addressing that the driver * cannot handle, perform the lowering pass. */ @@ -713,7 +721,8 @@ st_link_glsl_to_nir(struct gl_context *ctx, prog->info.num_abos = old_info.num_abos; if (prog->info.stage == MESA_SHADER_VERTEX) { - if (prog->nir->info.io_lowered) { + if (prog->nir->info.io_lowered && + prog->nir->options->io_options & nir_io_glsl_opt_varyings) { prog->info.inputs_read = prog->nir->info.inputs_read; prog->DualSlotInputs = prog->nir->info.dual_slot_inputs; } else { @@ -886,8 +895,11 @@ st_finalize_nir(struct st_context *st, struct gl_program *prog, /* Lower load_deref/store_deref of inputs and outputs. * This depends on st_nir_assign_varying_locations. + * + * TODO: remove this once nir_io_glsl_opt_varyings is enabled by default. */ - if (nir->options->io_options & nir_io_glsl_lower_derefs) { + if (nir->options->io_options & nir_io_glsl_lower_derefs && + !(nir->options->io_options & nir_io_glsl_opt_varyings)) { nir_lower_io_passes(nir, false); NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_in | nir_var_shader_out, NULL);