radeonsi: vectorize load/stores and shrink stores

based on RADV

Reviewed-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29282>
This commit is contained in:
Marek Olšák
2024-05-18 19:24:05 -04:00
committed by Marge Bot
parent 8cb254e0b8
commit eef5e4221f

View File

@@ -2385,8 +2385,6 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
NIR_PASS(progress, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
NIR_PASS(progress, nir, ac_nir_lower_global_access);
/* Loop unrolling caused by uniform inlining can help eliminate indirect indexing, so
* this should be done after that.
*/
@@ -2497,6 +2495,20 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
NIR_PASS_V(nir, nir_clear_shared_memory, shared_size, chunk_size);
}
NIR_PASS(progress, nir, nir_opt_load_store_vectorize,
&(nir_load_store_vectorize_options){
.modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_shared | nir_var_mem_global |
nir_var_shader_temp,
.callback = ac_nir_mem_vectorize_callback,
.cb_data = &sel->screen->info.gfx_level,
/* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
* the final offset is not.
*/
.has_shared2_amd = sel->screen->info.gfx_level >= GFX7,
});
NIR_PASS(progress, nir, nir_opt_shrink_stores, false);
NIR_PASS(progress, nir, ac_nir_lower_global_access);
NIR_PASS(progress, nir, ac_nir_lower_intrinsics_to_args, sel->screen->info.gfx_level,
si_select_hw_stage(nir->info.stage, key, sel->screen->info.gfx_level),
&args->ac);