nir: add indirect loop unrolling to compiler options

This is where it should be rather than having to pass it into the
optimisation pass every time.

It also allows us to call the loop analysis pass without having to
duplicate these options which we will do later in this series.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12064>
This commit is contained in:
Timothy Arceri
2021-07-29 19:34:26 +10:00
committed by Marge Bot
parent a654e39f15
commit a9ed4538ab
24 changed files with 90 additions and 94 deletions

View File

@@ -181,7 +181,7 @@ radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader,
NIR_PASS(progress, shader, nir_opt_shrink_vectors,
!device->instance->disable_shrink_image_store);
if (shader->options->max_unroll_iterations) {
NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
NIR_PASS(progress, shader, nir_opt_loop_unroll);
}
} while (progress && !optimize_conservatively);

View File

@@ -1235,10 +1235,7 @@ agx_optimize_nir(nir_shader *nir)
NIR_PASS(progress, nir, nir_opt_undef);
NIR_PASS(progress, nir, nir_lower_undef_to_zero);
NIR_PASS(progress, nir, nir_opt_loop_unroll,
nir_var_shader_in |
nir_var_shader_out |
nir_var_function_temp);
NIR_PASS(progress, nir, nir_opt_loop_unroll);
} while (progress);
NIR_PASS_V(nir, nir_opt_algebraic_late);

View File

@@ -213,6 +213,8 @@ static const nir_shader_compiler_options agx_nir_options = {
.lower_doubles_options = nir_lower_dmod,
.lower_int64_options = ~(nir_lower_iadd64 | nir_lower_imul_2x32_64),
.force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
.has_fsub = true,
.has_isub = true,
.has_cs_global_id = true,

View File

@@ -1877,9 +1877,7 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
if (c && !c->disable_loop_unrolling &&
s->options->max_unroll_iterations > 0) {
bool local_progress = false;
NIR_PASS(local_progress, s, nir_opt_loop_unroll,
nir_var_shader_in |
nir_var_function_temp);
NIR_PASS(local_progress, s, nir_opt_loop_unroll);
c->unrolled_any_loops |= local_progress;
progress |= local_progress;
}

View File

@@ -248,6 +248,7 @@ const nir_shader_compiler_options v3dv_nir_options = {
* needs to be supported */
.lower_interpolate_at = true,
.max_unroll_iterations = 16,
.force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
.divergence_analysis_options =
nir_divergence_multiple_workgroup_per_compute_subgroup
};

View File

@@ -3690,6 +3690,12 @@ typedef struct nir_shader_compiler_options {
* vectorized IO can pack more varyings when linking. */
bool linker_ignore_precision;
/**
* Specifies which type of indirectly accessed variables should force
* loop unrolling.
*/
nir_variable_mode force_indirect_unrolling;
nir_lower_int64_options lower_int64_options;
nir_lower_doubles_options lower_doubles_options;
nir_divergence_options divergence_analysis_options;
@@ -5436,7 +5442,7 @@ bool nir_opt_large_constants(nir_shader *shader,
glsl_type_size_align_func size_align,
unsigned threshold);
bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
bool nir_opt_loop_unroll(nir_shader *shader);
typedef enum {
nir_move_const_undef = (1 << 0),

View File

@@ -1058,10 +1058,11 @@ nir_opt_loop_unroll_impl(nir_function_impl *impl,
* should force loop unrolling.
*/
bool
nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask)
nir_opt_loop_unroll(nir_shader *shader)
{
bool progress = false;
nir_variable_mode indirect_mask = shader->options->force_indirect_unrolling;
nir_foreach_function(function, shader) {
if (function->impl) {
progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask);

View File

@@ -132,6 +132,7 @@ static const nir_shader_compiler_options options_a6xx = {
.has_fsub = true,
.has_isub = true,
.max_unroll_iterations = 32,
.force_indirect_unrolling = nir_var_all,
.lower_wpos_pntc = true,
.lower_cs_local_index_from_id = true,
@@ -269,7 +270,7 @@ ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s)
OPT(s, nir_opt_dce);
}
progress |= OPT(s, nir_opt_if, false);
progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
progress |= OPT(s, nir_opt_loop_unroll);
progress |= OPT(s, nir_opt_remove_phis);
progress |= OPT(s, nir_opt_undef);
} while (progress);

View File

@@ -2458,7 +2458,6 @@ static void
ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
{
bool progress;
nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen);
unsigned pipe_stage = pipe_shader_type_from_mesa(s->info.stage);
unsigned control_flow_depth =
screen->get_shader_param(screen, pipe_stage,
@@ -2492,7 +2491,7 @@ ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
NIR_PASS(progress, s, nir_opt_trivial_continues);
NIR_PASS(progress, s, nir_opt_vectorize, ntt_should_vectorize_instr, NULL);
NIR_PASS(progress, s, nir_opt_undef);
NIR_PASS(progress, s, nir_opt_loop_unroll, no_indirects_mask);
NIR_PASS(progress, s, nir_opt_loop_unroll);
} while (progress);
}
@@ -2802,6 +2801,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
!screen->get_shader_param(screen, pipe_shader_type_from_mesa(s->info.stage),
PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen);
if (!options->lower_extract_byte ||
!options->lower_extract_word ||
!options->lower_insert_byte ||
@@ -2812,7 +2813,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
!options->lower_rotate ||
!options->lower_uniforms_to_ubo ||
!options->lower_vector_cmp ||
options->lower_fsqrt != lower_fsqrt) {
options->lower_fsqrt != lower_fsqrt ||
options->force_indirect_unrolling != no_indirects_mask) {
nir_shader_compiler_options *new_options = ralloc(s, nir_shader_compiler_options);
*new_options = *s->options;
@@ -2827,6 +2829,7 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
new_options->lower_uniforms_to_ubo = true,
new_options->lower_vector_cmp = true;
new_options->lower_fsqrt = lower_fsqrt;
new_options->force_indirect_unrolling = no_indirects_mask;
s->options = new_options;
}

View File

@@ -2444,7 +2444,7 @@ ttn_optimize_nir(nir_shader *nir)
NIR_PASS(progress, nir, nir_opt_conditional_discard);
if (nir->options->max_unroll_iterations) {
NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
NIR_PASS(progress, nir, nir_opt_loop_unroll);
}
} while (progress);

View File

@@ -164,7 +164,7 @@ etna_optimize_loop(nir_shader *s)
OPT(s, nir_copy_prop);
OPT(s, nir_opt_dce);
}
progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
progress |= OPT(s, nir_opt_loop_unroll);
progress |= OPT(s, nir_opt_if, false);
progress |= OPT(s, nir_opt_remove_phis);
progress |= OPT(s, nir_opt_undef);

View File

@@ -1076,6 +1076,7 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
.lower_fsqrt = !screen->specs.has_sin_cos_sqrt,
.lower_sincos = !screen->specs.has_sin_cos_sqrt,
.lower_uniforms_to_ubo = screen->specs.halti >= 2,
.force_indirect_unrolling = nir_var_all,
};
/* apply debug options that disable individual features */

View File

@@ -49,6 +49,7 @@ static const nir_shader_compiler_options options = {
.has_isub = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.force_indirect_unrolling = nir_var_all,
};
const nir_shader_compiler_options *
@@ -92,7 +93,7 @@ ir2_optimize_loop(nir_shader *s)
OPT(s, nir_copy_prop);
OPT(s, nir_opt_dce);
}
progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
progress |= OPT(s, nir_opt_loop_unroll);
progress |= OPT(s, nir_opt_if, false);
progress |= OPT(s, nir_opt_remove_phis);
progress |= OPT(s, nir_opt_undef);

View File

@@ -61,6 +61,7 @@ static const nir_shader_compiler_options vs_nir_options = {
.lower_fceil = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
};
static const nir_shader_compiler_options fs_nir_options = {
@@ -80,6 +81,7 @@ static const nir_shader_compiler_options fs_nir_options = {
.lower_insert_word = true,
.lower_bitops = true,
.lower_vector_cmp = true,
.force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
};
const void *
@@ -131,10 +133,7 @@ lima_program_optimize_vs_nir(struct nir_shader *s)
NIR_PASS(progress, s, lima_nir_lower_ftrunc);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef);
NIR_PASS(progress, s, nir_opt_loop_unroll,
nir_var_shader_in |
nir_var_shader_out |
nir_var_function_temp);
NIR_PASS(progress, s, nir_opt_loop_unroll);
} while (progress);
NIR_PASS_V(s, nir_lower_int_to_float);
@@ -236,10 +235,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef);
NIR_PASS(progress, s, nir_opt_loop_unroll,
nir_var_shader_in |
nir_var_shader_out |
nir_var_function_temp);
NIR_PASS(progress, s, nir_opt_loop_unroll);
NIR_PASS(progress, s, lima_nir_split_load_input);
} while (progress);

View File

@@ -596,7 +596,7 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first)
NIR_PASS(progress, nir, nir_opt_undef);
NIR_PASS(progress, nir, nir_opt_conditional_discard);
if (nir->options->max_unroll_iterations) {
NIR_PASS(progress, nir, nir_opt_loop_unroll, 0);
NIR_PASS(progress, nir, nir_opt_loop_unroll);
}
if (nir->info.stage == MESA_SHADER_FRAGMENT)

View File

@@ -1548,10 +1548,7 @@ vc4_optimize_nir(struct nir_shader *s)
}
NIR_PASS(progress, s, nir_opt_undef);
NIR_PASS(progress, s, nir_opt_loop_unroll,
nir_var_shader_in |
nir_var_shader_out |
nir_var_function_temp);
NIR_PASS(progress, s, nir_opt_loop_unroll);
} while (progress);
}
@@ -2195,6 +2192,7 @@ static const nir_shader_compiler_options nir_options = {
.has_fsub = true,
.has_isub = true,
.max_unroll_iterations = 32,
.force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
};
const void *

View File

@@ -68,7 +68,8 @@
.lower_usub_sat64 = true, \
.lower_hadd64 = true, \
.lower_bfe_with_two_constants = true, \
.max_unroll_iterations = 32
.max_unroll_iterations = 32, \
.force_indirect_unrolling = nir_var_function_temp
static const struct nir_shader_compiler_options scalar_nir_options = {
COMMON_OPTIONS,
@@ -196,6 +197,9 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
nir_options->unify_interfaces = i < MESA_SHADER_FRAGMENT;
nir_options->force_indirect_unrolling |=
brw_nir_no_indirect_mask(compiler, i);
compiler->glsl_compiler_options[i].NirOptions = nir_options;
compiler->glsl_compiler_options[i].ClampBlockIndicesToArrayBounds = true;

View File

@@ -520,63 +520,10 @@ brw_nir_lower_fs_outputs(nir_shader *nir)
this_progress; \
})
static nir_variable_mode
brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
gl_shader_stage stage)
{
const struct intel_device_info *devinfo = compiler->devinfo;
const bool is_scalar = compiler->scalar_stage[stage];
nir_variable_mode indirect_mask = 0;
switch (stage) {
case MESA_SHADER_VERTEX:
case MESA_SHADER_FRAGMENT:
indirect_mask |= nir_var_shader_in;
break;
case MESA_SHADER_GEOMETRY:
if (!is_scalar)
indirect_mask |= nir_var_shader_in;
break;
default:
/* Everything else can handle indirect inputs */
break;
}
if (is_scalar && stage != MESA_SHADER_TESS_CTRL)
indirect_mask |= nir_var_shader_out;
/* On HSW+, we allow indirects in scalar shaders. They get implemented
* using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in
* brw_postprocess_nir.
*
* We haven't plumbed through the indirect scratch messages on gfx6 or
* earlier so doing indirects via scratch doesn't work there. On gfx7 and
* earlier the scratch space size is limited to 12kB. If we allowed
* indirects as scratch all the time, we may easily exceed this limit
* without having any fallback.
*/
if (is_scalar && devinfo->verx10 <= 70)
indirect_mask |= nir_var_function_temp;
return indirect_mask;
}
void
brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
bool is_scalar, bool allow_copies)
{
nir_variable_mode loop_indirect_mask =
brw_nir_no_indirect_mask(compiler, nir->info.stage);
/* We can handle indirects via scratch messages. However, they are
* expensive so we'd rather not if we can avoid it. Have loop unrolling
* try to get rid of them.
*/
if (is_scalar)
loop_indirect_mask |= nir_var_function_temp;
bool progress;
unsigned lower_flrp =
(nir->options->lower_flrp16 ? 16 : 0) |
@@ -671,7 +618,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
OPT(nir_opt_if, false);
OPT(nir_opt_conditional_discard);
if (nir->options->max_unroll_iterations != 0) {
OPT(nir_opt_loop_unroll, loop_indirect_mask);
OPT(nir_opt_loop_unroll);
}
OPT(nir_opt_remove_phis);
OPT(nir_opt_gcm, false);

View File

@@ -127,6 +127,50 @@ brw_get_scratch_size(int size)
return MAX2(1024, util_next_power_of_two(size));
}
static inline nir_variable_mode
brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
gl_shader_stage stage)
{
const struct intel_device_info *devinfo = compiler->devinfo;
const bool is_scalar = compiler->scalar_stage[stage];
nir_variable_mode indirect_mask = (nir_variable_mode) 0;
switch (stage) {
case MESA_SHADER_VERTEX:
case MESA_SHADER_FRAGMENT:
indirect_mask |= nir_var_shader_in;
break;
case MESA_SHADER_GEOMETRY:
if (!is_scalar)
indirect_mask |= nir_var_shader_in;
break;
default:
/* Everything else can handle indirect inputs */
break;
}
if (is_scalar && stage != MESA_SHADER_TESS_CTRL)
indirect_mask |= nir_var_shader_out;
/* On HSW+, we allow indirects in scalar shaders. They get implemented
* using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in
* brw_postprocess_nir.
*
* We haven't plumbed through the indirect scratch messages on gfx6 or
* earlier so doing indirects via scratch doesn't work there. On gfx7 and
* earlier the scratch space size is limited to 12kB. If we allowed
* indirects as scratch all the time, we may easily exceed this limit
* without having any fallback.
*/
if (is_scalar && devinfo->verx10 <= 70)
indirect_mask |= nir_var_function_temp;
return indirect_mask;
}
bool brw_texture_offset(const nir_tex_instr *tex, unsigned src,
uint32_t *offset_bits);

View File

@@ -324,7 +324,7 @@ st_nir_opts(nir_shader *nir)
NIR_PASS(progress, nir, nir_opt_undef);
NIR_PASS(progress, nir, nir_opt_conditional_discard);
if (nir->options->max_unroll_iterations) {
NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
NIR_PASS(progress, nir, nir_opt_loop_unroll);
}
} while (progress);
}

View File

@@ -3306,10 +3306,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
NIR_PASS(progress, nir, nir_opt_undef);
NIR_PASS(progress, nir, nir_lower_undef_to_zero);
NIR_PASS(progress, nir, nir_opt_loop_unroll,
nir_var_shader_in |
nir_var_shader_out |
nir_var_function_temp);
NIR_PASS(progress, nir, nir_opt_loop_unroll);
} while (progress);
/* TODO: Why is 64-bit getting rematerialized?

View File

@@ -93,6 +93,7 @@ static const nir_shader_compiler_options bifrost_nir_options = {
.vertex_id_zero_based = true,
.lower_cs_local_index_from_id = true,
.max_unroll_iterations = 32,
.force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
};
#endif

View File

@@ -369,10 +369,7 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
NIR_PASS(progress, nir, nir_opt_undef);
NIR_PASS(progress, nir, nir_lower_undef_to_zero);
NIR_PASS(progress, nir, nir_opt_loop_unroll,
nir_var_shader_in |
nir_var_shader_out |
nir_var_function_temp);
NIR_PASS(progress, nir, nir_opt_loop_unroll);
NIR_PASS(progress, nir, nir_opt_vectorize,
midgard_vectorize_filter, NULL);

View File

@@ -97,6 +97,7 @@ static const nir_shader_compiler_options midgard_nir_options = {
.has_cs_global_id = true,
.lower_cs_local_index_from_id = true,
.max_unroll_iterations = 32,
.force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
};
#endif