radv: Add option to clear LDS at the end of a shader.

Only shaders which explicitly allow shared memory are included for
now. The pass is very late to avoid optimizations removing the stores
and to ensure the clear gets added after MS outputs get loaded from LDS.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26679>
This commit is contained in:
Bas Nieuwenhuizen
2023-11-01 15:34:13 +01:00
committed by Marge Bot
parent da6a5e1f63
commit eaf61adea5
6 changed files with 28 additions and 1 deletions

View File

@@ -229,6 +229,7 @@ radv_generate_pipeline_key(const struct radv_device *device, const VkPipelineSha
#define RADV_HASH_SHADER_PS_WAVE32 (1 << 2)
#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3)
#define RADV_HASH_SHADER_LLVM (1 << 4)
#define RADV_HASH_SHADER_CLEAR_LDS (1 << 5)
#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13)
#define RADV_HASH_SHADER_EMULATE_RT (1 << 16)
@@ -267,6 +268,8 @@ radv_get_hash_flags(const struct radv_device *device, bool stats)
hash_flags |= RADV_HASH_SHADER_NO_RT;
if (device->instance->dual_color_blend_by_location)
hash_flags |= RADV_HASH_SHADER_DUAL_BLEND_MRT1;
if (device->instance->clear_lds)
hash_flags |= RADV_HASH_SHADER_CLEAR_LDS;
return hash_flags;
}
@@ -701,6 +704,12 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key
NIR_PASS_V(stage->nir, ac_nir_lower_ps, &options);
}
if (radv_shader_should_clear_lds(device, stage->nir)) {
const unsigned chunk_size = 16; /* max single store size */
const unsigned shared_size = ALIGN(stage->nir->info.shared_size, chunk_size);
NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size);
}
NIR_PASS(_, stage->nir, nir_lower_int64);
NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8);
@@ -782,6 +791,14 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_key
}
}
bool
radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader)
{
return (shader->info.stage == MESA_SHADER_COMPUTE || shader->info.stage == MESA_SHADER_MESH ||
shader->info.stage == MESA_SHADER_TASK) &&
shader->info.shared_size > 0 && device->instance->clear_lds;
}
static uint32_t
radv_get_executable_count(struct radv_pipeline *pipeline)
{