From 643f2cb8a33578e38f47b54b9c312cf44a75b7c0 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 22 Oct 2020 14:58:01 +0200 Subject: [PATCH] ir3, tu: Cleanup indirect i/o lowering Do all the necessary lowering in one place, during finalization, and stop uselessly calling nir_lower_indirect_derefs in turnip. Splitting i/o to elements should no longer be necessary since we use the i/o semantics instead of variables now. This has the side effect that we no longer generate enormous if-ladders for tess/GS shaders with turnip. Part-of: --- src/freedreno/ir3/ir3_nir.c | 42 +++++++++++++++++-- src/freedreno/ir3/ir3_nir.h | 1 + src/freedreno/vulkan/tu_shader.c | 19 ++++----- .../drivers/freedreno/ir3/ir3_cmdline.c | 1 + .../drivers/freedreno/ir3/ir3_gallium.c | 1 + 5 files changed, 48 insertions(+), 16 deletions(-) diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 284ec295025..eb449bbf6f3 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -53,7 +53,6 @@ static const nir_shader_compiler_options options = { .vertex_id_zero_based = true, .lower_extract_byte = true, .lower_extract_word = true, - .lower_all_io_to_elements = true, .lower_helper_invocation = true, .lower_bitfield_insert_to_shifts = true, .lower_bitfield_extract_to_shifts = true, @@ -107,7 +106,6 @@ static const nir_shader_compiler_options options_a6xx = { .vertex_id_zero_based = false, .lower_extract_byte = true, .lower_extract_word = true, - .lower_all_io_to_elements = true, .lower_helper_invocation = true, .lower_bitfield_insert_to_shifts = true, .lower_bitfield_extract_to_shifts = true, @@ -278,6 +276,44 @@ should_split_wrmask(const nir_instr *instr, const void *data) } } +void +ir3_nir_lower_io_to_temporaries(nir_shader *s) +{ + /* Outputs consumed by the VPC, VS inputs, and FS outputs are all handled + * by the hardware pre-loading registers at the beginning and then reading + * them at the end, so we can't access them indirectly except through + * normal register-indirect accesses, and therefore ir3 doesn't support + * indirect accesses on those. Other i/o is lowered in ir3_nir_lower_tess, + * and indirects work just fine for those. GS outputs may be consumed by + * VPC, but have their own lowering in ir3_nir_lower_gs() which does + * something similar to nir_lower_io_to_temporaries so we shouldn't need + * to lower them. + * + * Note: this might be a little inefficient for VS or TES outputs which are + * when the next stage isn't an FS, but it probably don't make sense to + * depend on the next stage before variant creation. + * + * TODO: for gallium, mesa/st also does some redundant lowering, including + * running this pass for GS inputs/outputs which we don't want but not + * including TES outputs or FS inputs which we do need. We should probably + * stop doing that once we're sure all drivers are doing their own + * indirect i/o lowering. + */ + bool lower_input = s->info.stage == MESA_SHADER_VERTEX || s->info.stage == MESA_SHADER_FRAGMENT; + bool lower_output = s->info.stage != MESA_SHADER_TESS_CTRL && s->info.stage != MESA_SHADER_GEOMETRY; + if (lower_input || lower_output) { + NIR_PASS_V(s, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(s), + lower_output, lower_input); + + /* nir_lower_io_to_temporaries() creates global variables and copy + * instructions which need to be cleaned up. + */ + NIR_PASS_V(s, nir_split_var_copies); + NIR_PASS_V(s, nir_lower_var_copies); + NIR_PASS_V(s, nir_lower_global_vars_to_local); + } +} + void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s) { @@ -303,8 +339,6 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s) if (s->info.stage == MESA_SHADER_GEOMETRY) NIR_PASS_V(s, ir3_nir_lower_gs); - NIR_PASS_V(s, nir_lower_io_arrays_to_elements_no_indirects, false); - NIR_PASS_V(s, nir_lower_amul, ir3_glsl_type_size); OPT_V(s, nir_lower_regs_to_ssa); diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 76eef3b2646..03e847ef42f 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -53,6 +53,7 @@ void ir3_nir_lower_gs(nir_shader *shader); const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler); void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s); +void ir3_nir_lower_io_to_temporaries(nir_shader *s); void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s); void ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s); void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s); diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index ef9def776ee..80414c038f5 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -168,8 +168,6 @@ tu_spirv_to_nir(struct tu_device *dev, NIR_PASS_V(nir, nir_propagate_invariant); - NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true); - NIR_PASS_V(nir, nir_lower_global_vars_to_local); NIR_PASS_V(nir, nir_split_var_copies); NIR_PASS_V(nir, nir_lower_var_copies); @@ -177,16 +175,6 @@ tu_spirv_to_nir(struct tu_device *dev, NIR_PASS_V(nir, nir_opt_copy_prop_vars); NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all); - /* ir3 doesn't support indirect input/output */ - /* TODO: We shouldn't perform this lowering pass on gl_TessLevelInner - * and gl_TessLevelOuter. Since the tess levels are actually stored in - * a global BO, they can be directly accessed via stg and ldg. - * nir_lower_indirect_derefs will instead generate a big if-ladder which - * isn't *incorrect* but is much less efficient. */ - NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out, UINT32_MAX); - - NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); - NIR_PASS_V(nir, nir_lower_is_helper_invocation); NIR_PASS_V(nir, nir_lower_system_values); @@ -825,6 +813,13 @@ tu_shader_create(struct tu_device *dev, }); } + /* This needs to happen before multiview lowering which rewrites store + * instructions of the position variable, so that we can just rewrite one + * store at the end instead of having to rewrite every store specified by + * the user. + */ + ir3_nir_lower_io_to_temporaries(nir); + if (nir->info.stage == MESA_SHADER_VERTEX && multiview_mask) { tu_nir_lower_multiview(nir, multiview_mask, &shader->multi_pos_output, dev); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index 7b7a5ed254c..f713e2cb431 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -403,6 +403,7 @@ main(int argc, char **argv) return -1; } + ir3_nir_lower_io_to_temporaries(nir); ir3_finalize_nir(compiler, nir); ir3_nir_post_finalize(compiler, nir); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index 65e13efac87..ef984a078a1 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -466,6 +466,7 @@ ir3_screen_finalize_nir(struct pipe_screen *pscreen, void *nir, bool optimize) { struct fd_screen *screen = fd_screen(pscreen); + ir3_nir_lower_io_to_temporaries(nir); ir3_finalize_nir(screen->compiler, nir); }