radv: vectorize RT stack access
fossil-db (gfx1100): Totals from 10 (0.01% of 133461) affected shaders: MaxWaves: 176 -> 174 (-1.14%) Instrs: 39260 -> 38710 (-1.40%) CodeSize: 202272 -> 197288 (-2.46%) VGPRs: 888 -> 900 (+1.35%) Latency: 82306 -> 81762 (-0.66%); split: -0.68%, +0.02% InvThroughput: 11182 -> 11158 (-0.21%); split: -0.52%, +0.30% VClause: 721 -> 700 (-2.91%) SClause: 1147 -> 1148 (+0.09%); split: -0.17%, +0.26% Copies: 3625 -> 3891 (+7.34%) PreVGPRs: 819 -> 845 (+3.17%); split: -0.37%, +3.54% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24350>
This commit is contained in:
@@ -327,8 +327,19 @@ radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned
|
||||
if (num_components > 4)
|
||||
return false;
|
||||
|
||||
/* >128 bit loads are split except with SMEM */
|
||||
if (bit_size * num_components > 128)
|
||||
bool is_scratch = false;
|
||||
switch (low->intrinsic) {
|
||||
case nir_intrinsic_load_stack:
|
||||
case nir_intrinsic_store_stack:
|
||||
is_scratch = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* >128 bit loads are split except with SMEM. On GFX6-8, >32 bit scratch loads are split. */
|
||||
enum amd_gfx_level gfx_level = *(enum amd_gfx_level *)data;
|
||||
if (bit_size * num_components > (is_scratch && gfx_level <= GFX8 ? 32 : 128))
|
||||
return false;
|
||||
|
||||
uint32_t align;
|
||||
@@ -343,7 +354,9 @@ radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned
|
||||
case nir_intrinsic_store_ssbo:
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_load_push_constant: {
|
||||
case nir_intrinsic_load_push_constant:
|
||||
case nir_intrinsic_load_stack:
|
||||
case nir_intrinsic_store_stack: {
|
||||
unsigned max_components;
|
||||
if (align % 4 == 0)
|
||||
max_components = NIR_MAX_VEC_COMPONENTS;
|
||||
@@ -554,6 +567,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key
|
||||
nir_load_store_vectorize_options vectorize_opts = {
|
||||
.modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_shared | nir_var_mem_global,
|
||||
.callback = radv_mem_vectorize_callback,
|
||||
.cb_data = &gfx_level,
|
||||
.robust_modes = 0,
|
||||
/* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
|
||||
* the final offset is not.
|
||||
|
@@ -376,6 +376,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
|
||||
.stack_alignment = 16,
|
||||
.localized_loads = true,
|
||||
.vectorizer_callback = radv_mem_vectorize_callback,
|
||||
.vectorizer_data = &device->physical_device->rad_info.gfx_level,
|
||||
};
|
||||
uint32_t num_resume_shaders = 0;
|
||||
nir_shader **resume_shaders = NULL;
|
||||
|
Reference in New Issue
Block a user