radv: vectorize RT stack access

fossil-db (gfx1100):
Totals from 10 (0.01% of 133461) affected shaders:
MaxWaves: 176 -> 174 (-1.14%)
Instrs: 39260 -> 38710 (-1.40%)
CodeSize: 202272 -> 197288 (-2.46%)
VGPRs: 888 -> 900 (+1.35%)
Latency: 82306 -> 81762 (-0.66%); split: -0.68%, +0.02%
InvThroughput: 11182 -> 11158 (-0.21%); split: -0.52%, +0.30%
VClause: 721 -> 700 (-2.91%)
SClause: 1147 -> 1148 (+0.09%); split: -0.17%, +0.26%
Copies: 3625 -> 3891 (+7.34%)
PreVGPRs: 819 -> 845 (+3.17%); split: -0.37%, +3.54%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24350>
This commit is contained in:
Rhys Perry
2023-07-26 16:15:35 +01:00
committed by Marge Bot
parent 6f315e6049
commit 81641b0155
2 changed files with 18 additions and 3 deletions

View File

@@ -327,8 +327,19 @@ radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned
if (num_components > 4)
return false;
/* >128 bit loads are split except with SMEM */
if (bit_size * num_components > 128)
bool is_scratch = false;
switch (low->intrinsic) {
case nir_intrinsic_load_stack:
case nir_intrinsic_store_stack:
is_scratch = true;
break;
default:
break;
}
/* >128 bit loads are split except with SMEM. On GFX6-8, >32 bit scratch loads are split. */
enum amd_gfx_level gfx_level = *(enum amd_gfx_level *)data;
if (bit_size * num_components > (is_scratch && gfx_level <= GFX8 ? 32 : 128))
return false;
uint32_t align;
@@ -343,7 +354,9 @@ radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned
case nir_intrinsic_store_ssbo:
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_push_constant: {
case nir_intrinsic_load_push_constant:
case nir_intrinsic_load_stack:
case nir_intrinsic_store_stack: {
unsigned max_components;
if (align % 4 == 0)
max_components = NIR_MAX_VEC_COMPONENTS;
@@ -554,6 +567,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key
nir_load_store_vectorize_options vectorize_opts = {
.modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_shared | nir_var_mem_global,
.callback = radv_mem_vectorize_callback,
.cb_data = &gfx_level,
.robust_modes = 0,
/* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
* the final offset is not.

View File

@@ -376,6 +376,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
.stack_alignment = 16,
.localized_loads = true,
.vectorizer_callback = radv_mem_vectorize_callback,
.vectorizer_data = &device->physical_device->rad_info.gfx_level,
};
uint32_t num_resume_shaders = 0;
nir_shader **resume_shaders = NULL;