From eef5e4221f03c0844ca117cc9590397a70d37126 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 18 May 2024 19:24:05 -0400 Subject: [PATCH] radeonsi: vectorize load/stores and shrink stores based on RADV Reviewed-by: Qiang Yu Part-of: --- src/gallium/drivers/radeonsi/si_shader.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 982884e217f..a19644c81bf 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2385,8 +2385,6 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); NIR_PASS(progress, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16); - NIR_PASS(progress, nir, ac_nir_lower_global_access); - /* Loop unrolling caused by uniform inlining can help eliminate indirect indexing, so * this should be done after that. */ @@ -2497,6 +2495,20 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, NIR_PASS_V(nir, nir_clear_shared_memory, shared_size, chunk_size); } + NIR_PASS(progress, nir, nir_opt_load_store_vectorize, + &(nir_load_store_vectorize_options){ + .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_shared | nir_var_mem_global | + nir_var_shader_temp, + .callback = ac_nir_mem_vectorize_callback, + .cb_data = &sel->screen->info.gfx_level, + /* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if + * the final offset is not. + */ + .has_shared2_amd = sel->screen->info.gfx_level >= GFX7, + }); + NIR_PASS(progress, nir, nir_opt_shrink_stores, false); + NIR_PASS(progress, nir, ac_nir_lower_global_access); + NIR_PASS(progress, nir, ac_nir_lower_intrinsics_to_args, sel->screen->info.gfx_level, si_select_hw_stage(nir->info.stage, key, sel->screen->info.gfx_level), &args->ac);