ir3, tu: Cleanup indirect i/o lowering
Do all the necessary lowering in one place, during finalization, and stop uselessly calling nir_lower_indirect_derefs in turnip. Splitting i/o to elements should no longer be necessary since we use the i/o semantics instead of variables now. This has the side effect that we no longer generate enormous if-ladders for tess/GS shaders with turnip. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7274>
This commit is contained in:
@@ -53,7 +53,6 @@ static const nir_shader_compiler_options options = {
|
||||
.vertex_id_zero_based = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_all_io_to_elements = true,
|
||||
.lower_helper_invocation = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
@@ -107,7 +106,6 @@ static const nir_shader_compiler_options options_a6xx = {
|
||||
.vertex_id_zero_based = false,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
.lower_all_io_to_elements = true,
|
||||
.lower_helper_invocation = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
@@ -278,6 +276,44 @@ should_split_wrmask(const nir_instr *instr, const void *data)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ir3_nir_lower_io_to_temporaries(nir_shader *s)
|
||||
{
|
||||
/* Outputs consumed by the VPC, VS inputs, and FS outputs are all handled
|
||||
* by the hardware pre-loading registers at the beginning and then reading
|
||||
* them at the end, so we can't access them indirectly except through
|
||||
* normal register-indirect accesses, and therefore ir3 doesn't support
|
||||
* indirect accesses on those. Other i/o is lowered in ir3_nir_lower_tess,
|
||||
* and indirects work just fine for those. GS outputs may be consumed by
|
||||
* VPC, but have their own lowering in ir3_nir_lower_gs() which does
|
||||
* something similar to nir_lower_io_to_temporaries so we shouldn't need
|
||||
* to lower them.
|
||||
*
|
||||
* Note: this might be a little inefficient for VS or TES outputs which are
|
||||
* when the next stage isn't an FS, but it probably don't make sense to
|
||||
* depend on the next stage before variant creation.
|
||||
*
|
||||
* TODO: for gallium, mesa/st also does some redundant lowering, including
|
||||
* running this pass for GS inputs/outputs which we don't want but not
|
||||
* including TES outputs or FS inputs which we do need. We should probably
|
||||
* stop doing that once we're sure all drivers are doing their own
|
||||
* indirect i/o lowering.
|
||||
*/
|
||||
bool lower_input = s->info.stage == MESA_SHADER_VERTEX || s->info.stage == MESA_SHADER_FRAGMENT;
|
||||
bool lower_output = s->info.stage != MESA_SHADER_TESS_CTRL && s->info.stage != MESA_SHADER_GEOMETRY;
|
||||
if (lower_input || lower_output) {
|
||||
NIR_PASS_V(s, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(s),
|
||||
lower_output, lower_input);
|
||||
|
||||
/* nir_lower_io_to_temporaries() creates global variables and copy
|
||||
* instructions which need to be cleaned up.
|
||||
*/
|
||||
NIR_PASS_V(s, nir_split_var_copies);
|
||||
NIR_PASS_V(s, nir_lower_var_copies);
|
||||
NIR_PASS_V(s, nir_lower_global_vars_to_local);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
|
||||
{
|
||||
@@ -303,8 +339,6 @@ ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
|
||||
if (s->info.stage == MESA_SHADER_GEOMETRY)
|
||||
NIR_PASS_V(s, ir3_nir_lower_gs);
|
||||
|
||||
NIR_PASS_V(s, nir_lower_io_arrays_to_elements_no_indirects, false);
|
||||
|
||||
NIR_PASS_V(s, nir_lower_amul, ir3_glsl_type_size);
|
||||
|
||||
OPT_V(s, nir_lower_regs_to_ssa);
|
||||
|
@@ -53,6 +53,7 @@ void ir3_nir_lower_gs(nir_shader *shader);
|
||||
|
||||
const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler);
|
||||
void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s);
|
||||
void ir3_nir_lower_io_to_temporaries(nir_shader *s);
|
||||
void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s);
|
||||
void ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s);
|
||||
void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s);
|
||||
|
@@ -168,8 +168,6 @@ tu_spirv_to_nir(struct tu_device *dev,
|
||||
|
||||
NIR_PASS_V(nir, nir_propagate_invariant);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
|
||||
NIR_PASS_V(nir, nir_split_var_copies);
|
||||
NIR_PASS_V(nir, nir_lower_var_copies);
|
||||
@@ -177,16 +175,6 @@ tu_spirv_to_nir(struct tu_device *dev,
|
||||
NIR_PASS_V(nir, nir_opt_copy_prop_vars);
|
||||
NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
|
||||
|
||||
/* ir3 doesn't support indirect input/output */
|
||||
/* TODO: We shouldn't perform this lowering pass on gl_TessLevelInner
|
||||
* and gl_TessLevelOuter. Since the tess levels are actually stored in
|
||||
* a global BO, they can be directly accessed via stg and ldg.
|
||||
* nir_lower_indirect_derefs will instead generate a big if-ladder which
|
||||
* isn't *incorrect* but is much less efficient. */
|
||||
NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_is_helper_invocation);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_system_values);
|
||||
@@ -825,6 +813,13 @@ tu_shader_create(struct tu_device *dev,
|
||||
});
|
||||
}
|
||||
|
||||
/* This needs to happen before multiview lowering which rewrites store
|
||||
* instructions of the position variable, so that we can just rewrite one
|
||||
* store at the end instead of having to rewrite every store specified by
|
||||
* the user.
|
||||
*/
|
||||
ir3_nir_lower_io_to_temporaries(nir);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX && multiview_mask) {
|
||||
tu_nir_lower_multiview(nir, multiview_mask,
|
||||
&shader->multi_pos_output, dev);
|
||||
|
@@ -403,6 +403,7 @@ main(int argc, char **argv)
|
||||
return -1;
|
||||
}
|
||||
|
||||
ir3_nir_lower_io_to_temporaries(nir);
|
||||
ir3_finalize_nir(compiler, nir);
|
||||
ir3_nir_post_finalize(compiler, nir);
|
||||
|
||||
|
@@ -466,6 +466,7 @@ ir3_screen_finalize_nir(struct pipe_screen *pscreen, void *nir, bool optimize)
|
||||
{
|
||||
struct fd_screen *screen = fd_screen(pscreen);
|
||||
|
||||
ir3_nir_lower_io_to_temporaries(nir);
|
||||
ir3_finalize_nir(screen->compiler, nir);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user